hugetlbfs: avoid taking i_mutex from hugetlbfs_read()
Taking i_mutex in hugetlbfs_read() can result in deadlock with mmap as explained below Thread A: read() on hugetlbfs hugetlbfs_read() called i_mutex grabbed hugetlbfs_read_actor() called __copy_to_user() called page fault is triggered Thread B, sharing address space with A: mmap() the same file ->mmap_sem is grabbed on task_B->mm->mmap_sem hugetlbfs_file_mmap() is called attempt to grab ->i_mutex and block waiting for A to give it up Thread A: pagefault handled blocked on attempt to grab task_A->mm->mmap_sem, which happens to be the same thing as task_B->mm->mmap_sem. Block waiting for B to give it up. AFAIU the i_mutex locking was added to hugetlbfs_read() as per http://lkml.indiana.edu/hypermail/linux/kernel/0707.2/3066.html to take care of the race between truncate and read. This patch fixes this by looking at page->mapping under lock_page() (find_lock_page()) to ensure that the inode didn't get truncated in the range during a parallel read. Ideally we can extend the patch to make sure we don't increase i_size in mmap. But that will break userspace, because applications will now have to use truncate(2) to increase i_size in hugetlbfs. Based on the original patch from Hillf Danton. Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Cc: Hillf Danton <dhillf@gmail.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Hugh Dickins <hughd@google.com> Cc: <stable@kernel.org> [everything after 2007 :)] Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
f5bf18fa22
commit
a05b0855fd
1 changed files with 9 additions and 16 deletions
|
@ -245,17 +245,10 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
|
||||||
loff_t isize;
|
loff_t isize;
|
||||||
ssize_t retval = 0;
|
ssize_t retval = 0;
|
||||||
|
|
||||||
mutex_lock(&inode->i_mutex);
|
|
||||||
|
|
||||||
/* validate length */
|
/* validate length */
|
||||||
if (len == 0)
|
if (len == 0)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
isize = i_size_read(inode);
|
|
||||||
if (!isize)
|
|
||||||
goto out;
|
|
||||||
|
|
||||||
end_index = (isize - 1) >> huge_page_shift(h);
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
struct page *page;
|
struct page *page;
|
||||||
unsigned long nr, ret;
|
unsigned long nr, ret;
|
||||||
|
@ -263,18 +256,21 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
|
||||||
|
|
||||||
/* nr is the maximum number of bytes to copy from this page */
|
/* nr is the maximum number of bytes to copy from this page */
|
||||||
nr = huge_page_size(h);
|
nr = huge_page_size(h);
|
||||||
|
isize = i_size_read(inode);
|
||||||
|
if (!isize)
|
||||||
|
goto out;
|
||||||
|
end_index = (isize - 1) >> huge_page_shift(h);
|
||||||
if (index >= end_index) {
|
if (index >= end_index) {
|
||||||
if (index > end_index)
|
if (index > end_index)
|
||||||
goto out;
|
goto out;
|
||||||
nr = ((isize - 1) & ~huge_page_mask(h)) + 1;
|
nr = ((isize - 1) & ~huge_page_mask(h)) + 1;
|
||||||
if (nr <= offset) {
|
if (nr <= offset)
|
||||||
goto out;
|
goto out;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
nr = nr - offset;
|
nr = nr - offset;
|
||||||
|
|
||||||
/* Find the page */
|
/* Find the page */
|
||||||
page = find_get_page(mapping, index);
|
page = find_lock_page(mapping, index);
|
||||||
if (unlikely(page == NULL)) {
|
if (unlikely(page == NULL)) {
|
||||||
/*
|
/*
|
||||||
* We have a HOLE, zero out the user-buffer for the
|
* We have a HOLE, zero out the user-buffer for the
|
||||||
|
@ -286,17 +282,18 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
|
||||||
else
|
else
|
||||||
ra = 0;
|
ra = 0;
|
||||||
} else {
|
} else {
|
||||||
|
unlock_page(page);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We have the page, copy it to user space buffer.
|
* We have the page, copy it to user space buffer.
|
||||||
*/
|
*/
|
||||||
ra = hugetlbfs_read_actor(page, offset, buf, len, nr);
|
ra = hugetlbfs_read_actor(page, offset, buf, len, nr);
|
||||||
ret = ra;
|
ret = ra;
|
||||||
|
page_cache_release(page);
|
||||||
}
|
}
|
||||||
if (ra < 0) {
|
if (ra < 0) {
|
||||||
if (retval == 0)
|
if (retval == 0)
|
||||||
retval = ra;
|
retval = ra;
|
||||||
if (page)
|
|
||||||
page_cache_release(page);
|
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -306,16 +303,12 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
|
||||||
index += offset >> huge_page_shift(h);
|
index += offset >> huge_page_shift(h);
|
||||||
offset &= ~huge_page_mask(h);
|
offset &= ~huge_page_mask(h);
|
||||||
|
|
||||||
if (page)
|
|
||||||
page_cache_release(page);
|
|
||||||
|
|
||||||
/* short read or no more work */
|
/* short read or no more work */
|
||||||
if ((ret != nr) || (len == 0))
|
if ((ret != nr) || (len == 0))
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
out:
|
out:
|
||||||
*ppos = ((loff_t)index << huge_page_shift(h)) + offset;
|
*ppos = ((loff_t)index << huge_page_shift(h)) + offset;
|
||||||
mutex_unlock(&inode->i_mutex);
|
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue