readahead: clean up and simplify the code for filemap page fault readahead
This shouldn't really change behavior all that much, but the single rather complex function with read-ahead inside a loop etc is broken up into more manageable pieces. The behaviour is also less subtle, with the read-ahead being done up-front rather than inside some subtle loop and thus avoiding the now unnecessary extra state variables (ie "did_readaround" is gone). Fengguang: the code split in fact fixed a bug reported by Pavel Levshin: the PGMAJFAULT accounting used to be bypassed when MADV_RANDOM is set, in which case the original code will directly jump to no_cached_page reading. Cc: Pavel Levshin <lpk@581.spb.su> Cc: <wli@movementarian.org> Cc: Nick Piggin <npiggin@suse.de> Signed-off-by: Wu Fengguang <fengguang.wu@intel.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
51daa88ebd
commit
ef00e08e26
1 changed files with 89 additions and 67 deletions
156
mm/filemap.c
156
mm/filemap.c
|
@ -1456,6 +1456,68 @@ static int page_cache_read(struct file *file, pgoff_t offset)
|
|||
|
||||
#define MMAP_LOTSAMISS (100)
|
||||
|
||||
/*
|
||||
* Synchronous readahead happens when we don't even find
|
||||
* a page in the page cache at all.
|
||||
*/
|
||||
static void do_sync_mmap_readahead(struct vm_area_struct *vma,
|
||||
struct file_ra_state *ra,
|
||||
struct file *file,
|
||||
pgoff_t offset)
|
||||
{
|
||||
unsigned long ra_pages;
|
||||
struct address_space *mapping = file->f_mapping;
|
||||
|
||||
/* If we don't want any read-ahead, don't bother */
|
||||
if (VM_RandomReadHint(vma))
|
||||
return;
|
||||
|
||||
if (VM_SequentialReadHint(vma)) {
|
||||
page_cache_sync_readahead(mapping, ra, file, offset, 1);
|
||||
return;
|
||||
}
|
||||
|
||||
if (ra->mmap_miss < INT_MAX)
|
||||
ra->mmap_miss++;
|
||||
|
||||
/*
|
||||
* Do we miss much more than hit in this file? If so,
|
||||
* stop bothering with read-ahead. It will only hurt.
|
||||
*/
|
||||
if (ra->mmap_miss > MMAP_LOTSAMISS)
|
||||
return;
|
||||
|
||||
ra_pages = max_sane_readahead(ra->ra_pages);
|
||||
if (ra_pages) {
|
||||
pgoff_t start = 0;
|
||||
|
||||
if (offset > ra_pages / 2)
|
||||
start = offset - ra_pages / 2;
|
||||
do_page_cache_readahead(mapping, file, start, ra_pages);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Asynchronous readahead happens when we find the page and PG_readahead,
|
||||
* so we want to possibly extend the readahead further..
|
||||
*/
|
||||
static void do_async_mmap_readahead(struct vm_area_struct *vma,
|
||||
struct file_ra_state *ra,
|
||||
struct file *file,
|
||||
struct page *page,
|
||||
pgoff_t offset)
|
||||
{
|
||||
struct address_space *mapping = file->f_mapping;
|
||||
|
||||
/* If we don't want any read-ahead, don't bother */
|
||||
if (VM_RandomReadHint(vma))
|
||||
return;
|
||||
if (ra->mmap_miss > 0)
|
||||
ra->mmap_miss--;
|
||||
if (PageReadahead(page))
|
||||
page_cache_async_readahead(mapping, ra, file, page, offset, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* filemap_fault - read in file data for page fault handling
|
||||
* @vma: vma in which the fault was taken
|
||||
|
@ -1475,78 +1537,44 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
|||
struct address_space *mapping = file->f_mapping;
|
||||
struct file_ra_state *ra = &file->f_ra;
|
||||
struct inode *inode = mapping->host;
|
||||
pgoff_t offset = vmf->pgoff;
|
||||
struct page *page;
|
||||
pgoff_t size;
|
||||
int did_readaround = 0;
|
||||
int ret = 0;
|
||||
|
||||
size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
|
||||
if (vmf->pgoff >= size)
|
||||
if (offset >= size)
|
||||
return VM_FAULT_SIGBUS;
|
||||
|
||||
/* If we don't want any read-ahead, don't bother */
|
||||
if (VM_RandomReadHint(vma))
|
||||
goto no_cached_page;
|
||||
|
||||
/*
|
||||
* Do we have something in the page cache already?
|
||||
*/
|
||||
retry_find:
|
||||
page = find_lock_page(mapping, vmf->pgoff);
|
||||
/*
|
||||
* For sequential accesses, we use the generic readahead logic.
|
||||
*/
|
||||
if (VM_SequentialReadHint(vma)) {
|
||||
if (!page) {
|
||||
page_cache_sync_readahead(mapping, ra, file,
|
||||
vmf->pgoff, 1);
|
||||
page = find_lock_page(mapping, vmf->pgoff);
|
||||
if (!page)
|
||||
goto no_cached_page;
|
||||
}
|
||||
if (PageReadahead(page)) {
|
||||
page_cache_async_readahead(mapping, ra, file, page,
|
||||
vmf->pgoff, 1);
|
||||
}
|
||||
}
|
||||
|
||||
if (!page) {
|
||||
unsigned long ra_pages;
|
||||
|
||||
ra->mmap_miss++;
|
||||
|
||||
page = find_get_page(mapping, offset);
|
||||
if (likely(page)) {
|
||||
/*
|
||||
* Do we miss much more than hit in this file? If so,
|
||||
* stop bothering with read-ahead. It will only hurt.
|
||||
* We found the page, so try async readahead before
|
||||
* waiting for the lock.
|
||||
*/
|
||||
if (ra->mmap_miss > MMAP_LOTSAMISS)
|
||||
do_async_mmap_readahead(vma, ra, file, page, offset);
|
||||
lock_page(page);
|
||||
|
||||
/* Did it get truncated? */
|
||||
if (unlikely(page->mapping != mapping)) {
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
goto no_cached_page;
|
||||
|
||||
/*
|
||||
* To keep the pgmajfault counter straight, we need to
|
||||
* check did_readaround, as this is an inner loop.
|
||||
*/
|
||||
if (!did_readaround) {
|
||||
ret = VM_FAULT_MAJOR;
|
||||
count_vm_event(PGMAJFAULT);
|
||||
}
|
||||
did_readaround = 1;
|
||||
ra_pages = max_sane_readahead(file->f_ra.ra_pages);
|
||||
if (ra_pages) {
|
||||
pgoff_t start = 0;
|
||||
|
||||
if (vmf->pgoff > ra_pages / 2)
|
||||
start = vmf->pgoff - ra_pages / 2;
|
||||
do_page_cache_readahead(mapping, file, start, ra_pages);
|
||||
}
|
||||
page = find_lock_page(mapping, vmf->pgoff);
|
||||
} else {
|
||||
/* No page in the page cache at all */
|
||||
do_sync_mmap_readahead(vma, ra, file, offset);
|
||||
count_vm_event(PGMAJFAULT);
|
||||
ret = VM_FAULT_MAJOR;
|
||||
retry_find:
|
||||
page = find_lock_page(mapping, offset);
|
||||
if (!page)
|
||||
goto no_cached_page;
|
||||
}
|
||||
|
||||
if (!did_readaround)
|
||||
ra->mmap_miss--;
|
||||
|
||||
/*
|
||||
* We have a locked page in the page cache, now we need to check
|
||||
* that it's up-to-date. If not, it is going to be due to an error.
|
||||
|
@ -1554,18 +1582,18 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
|||
if (unlikely(!PageUptodate(page)))
|
||||
goto page_not_uptodate;
|
||||
|
||||
/* Must recheck i_size under page lock */
|
||||
/*
|
||||
* Found the page and have a reference on it.
|
||||
* We must recheck i_size under page lock.
|
||||
*/
|
||||
size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
|
||||
if (unlikely(vmf->pgoff >= size)) {
|
||||
if (unlikely(offset >= size)) {
|
||||
unlock_page(page);
|
||||
page_cache_release(page);
|
||||
return VM_FAULT_SIGBUS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Found the page and have a reference on it.
|
||||
*/
|
||||
ra->prev_pos = (loff_t)page->index << PAGE_CACHE_SHIFT;
|
||||
ra->prev_pos = (loff_t)offset << PAGE_CACHE_SHIFT;
|
||||
vmf->page = page;
|
||||
return ret | VM_FAULT_LOCKED;
|
||||
|
||||
|
@ -1574,7 +1602,7 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
|||
* We're only likely to ever get here if MADV_RANDOM is in
|
||||
* effect.
|
||||
*/
|
||||
error = page_cache_read(file, vmf->pgoff);
|
||||
error = page_cache_read(file, offset);
|
||||
|
||||
/*
|
||||
* The page we want has now been added to the page cache.
|
||||
|
@ -1594,12 +1622,6 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
|||
return VM_FAULT_SIGBUS;
|
||||
|
||||
page_not_uptodate:
|
||||
/* IO error path */
|
||||
if (!did_readaround) {
|
||||
ret = VM_FAULT_MAJOR;
|
||||
count_vm_event(PGMAJFAULT);
|
||||
}
|
||||
|
||||
/*
|
||||
* Umm, take care of errors if the page isn't up-to-date.
|
||||
* Try to re-read it _once_. We do this synchronously,
|
||||
|
|
Loading…
Reference in a new issue