[PATCH] add AOP_TRUNCATED_PAGE, prepend AOP_ to WRITEPAGE_ACTIVATE
readpage(), prepare_write(), and commit_write() callers are updated to understand the special return code AOP_TRUNCATED_PAGE in the style of writepage() and WRITEPAGE_ACTIVATE. AOP_TRUNCATED_PAGE tells the caller that the callee has unlocked the page and that the operation should be tried again with a new page. OCFS2 uses this to detect and work around a lock inversion in its aop methods. There should be no change in behaviour for methods that don't return AOP_TRUNCATED_PAGE. WRITEPAGE_ACTIVATE is also prepended with AOP_ for consistency and they are made enums so that kerneldoc can be used to document their semantics. Signed-off-by: Zach Brown <zach.brown@oracle.com>
This commit is contained in:
parent
7063fbf226
commit
994fc28c7b
9 changed files with 113 additions and 45 deletions
|
@ -213,7 +213,7 @@ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec,
|
||||||
struct address_space_operations *aops = mapping->a_ops;
|
struct address_space_operations *aops = mapping->a_ops;
|
||||||
pgoff_t index;
|
pgoff_t index;
|
||||||
unsigned offset, bv_offs;
|
unsigned offset, bv_offs;
|
||||||
int len, ret = 0;
|
int len, ret;
|
||||||
|
|
||||||
down(&mapping->host->i_sem);
|
down(&mapping->host->i_sem);
|
||||||
index = pos >> PAGE_CACHE_SHIFT;
|
index = pos >> PAGE_CACHE_SHIFT;
|
||||||
|
@ -232,9 +232,15 @@ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec,
|
||||||
page = grab_cache_page(mapping, index);
|
page = grab_cache_page(mapping, index);
|
||||||
if (unlikely(!page))
|
if (unlikely(!page))
|
||||||
goto fail;
|
goto fail;
|
||||||
if (unlikely(aops->prepare_write(file, page, offset,
|
ret = aops->prepare_write(file, page, offset,
|
||||||
offset + size)))
|
offset + size);
|
||||||
|
if (unlikely(ret)) {
|
||||||
|
if (ret == AOP_TRUNCATED_PAGE) {
|
||||||
|
page_cache_release(page);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
goto unlock;
|
goto unlock;
|
||||||
|
}
|
||||||
transfer_result = lo_do_transfer(lo, WRITE, page, offset,
|
transfer_result = lo_do_transfer(lo, WRITE, page, offset,
|
||||||
bvec->bv_page, bv_offs, size, IV);
|
bvec->bv_page, bv_offs, size, IV);
|
||||||
if (unlikely(transfer_result)) {
|
if (unlikely(transfer_result)) {
|
||||||
|
@ -251,9 +257,15 @@ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec,
|
||||||
kunmap_atomic(kaddr, KM_USER0);
|
kunmap_atomic(kaddr, KM_USER0);
|
||||||
}
|
}
|
||||||
flush_dcache_page(page);
|
flush_dcache_page(page);
|
||||||
if (unlikely(aops->commit_write(file, page, offset,
|
ret = aops->commit_write(file, page, offset,
|
||||||
offset + size)))
|
offset + size);
|
||||||
|
if (unlikely(ret)) {
|
||||||
|
if (ret == AOP_TRUNCATED_PAGE) {
|
||||||
|
page_cache_release(page);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
goto unlock;
|
goto unlock;
|
||||||
|
}
|
||||||
if (unlikely(transfer_result))
|
if (unlikely(transfer_result))
|
||||||
goto unlock;
|
goto unlock;
|
||||||
bv_offs += size;
|
bv_offs += size;
|
||||||
|
@ -264,6 +276,7 @@ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec,
|
||||||
unlock_page(page);
|
unlock_page(page);
|
||||||
page_cache_release(page);
|
page_cache_release(page);
|
||||||
}
|
}
|
||||||
|
ret = 0;
|
||||||
out:
|
out:
|
||||||
up(&mapping->host->i_sem);
|
up(&mapping->host->i_sem);
|
||||||
return ret;
|
return ret;
|
||||||
|
|
|
@ -154,7 +154,7 @@ static int ramdisk_commit_write(struct file *file, struct page *page,
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ->writepage to the the blockdev's mapping has to redirty the page so that the
|
* ->writepage to the the blockdev's mapping has to redirty the page so that the
|
||||||
* VM doesn't go and steal it. We return WRITEPAGE_ACTIVATE so that the VM
|
* VM doesn't go and steal it. We return AOP_WRITEPAGE_ACTIVATE so that the VM
|
||||||
* won't try to (pointlessly) write the page again for a while.
|
* won't try to (pointlessly) write the page again for a while.
|
||||||
*
|
*
|
||||||
* Really, these pages should not be on the LRU at all.
|
* Really, these pages should not be on the LRU at all.
|
||||||
|
@ -165,7 +165,7 @@ static int ramdisk_writepage(struct page *page, struct writeback_control *wbc)
|
||||||
make_page_uptodate(page);
|
make_page_uptodate(page);
|
||||||
SetPageDirty(page);
|
SetPageDirty(page);
|
||||||
if (wbc->for_reclaim)
|
if (wbc->for_reclaim)
|
||||||
return WRITEPAGE_ACTIVATE;
|
return AOP_WRITEPAGE_ACTIVATE;
|
||||||
unlock_page(page);
|
unlock_page(page);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -721,7 +721,7 @@ mpage_writepages(struct address_space *mapping,
|
||||||
&last_block_in_bio, &ret, wbc,
|
&last_block_in_bio, &ret, wbc,
|
||||||
page->mapping->a_ops->writepage);
|
page->mapping->a_ops->writepage);
|
||||||
}
|
}
|
||||||
if (unlikely(ret == WRITEPAGE_ACTIVATE))
|
if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE))
|
||||||
unlock_page(page);
|
unlock_page(page);
|
||||||
if (ret || (--(wbc->nr_to_write) <= 0))
|
if (ret || (--(wbc->nr_to_write) <= 0))
|
||||||
done = 1;
|
done = 1;
|
||||||
|
|
|
@ -302,6 +302,37 @@ struct iattr {
|
||||||
*/
|
*/
|
||||||
#include <linux/quota.h>
|
#include <linux/quota.h>
|
||||||
|
|
||||||
|
/**
|
||||||
|
* enum positive_aop_returns - aop return codes with specific semantics
|
||||||
|
*
|
||||||
|
* @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has
|
||||||
|
* completed, that the page is still locked, and
|
||||||
|
* should be considered active. The VM uses this hint
|
||||||
|
* to return the page to the active list -- it won't
|
||||||
|
* be a candidate for writeback again in the near
|
||||||
|
* future. Other callers must be careful to unlock
|
||||||
|
* the page if they get this return. Returned by
|
||||||
|
* writepage();
|
||||||
|
*
|
||||||
|
* @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has
|
||||||
|
* unlocked it and the page might have been truncated.
|
||||||
|
* The caller should back up to acquiring a new page and
|
||||||
|
* trying again. The aop will be taking reasonable
|
||||||
|
* precautions not to livelock. If the caller held a page
|
||||||
|
* reference, it should drop it before retrying. Returned
|
||||||
|
* by readpage(), prepare_write(), and commit_write().
|
||||||
|
*
|
||||||
|
* address_space_operation functions return these large constants to indicate
|
||||||
|
* special semantics to the caller. These are much larger than the bytes in a
|
||||||
|
* page to allow for functions that return the number of bytes operated on in a
|
||||||
|
* given page.
|
||||||
|
*/
|
||||||
|
|
||||||
|
enum positive_aop_returns {
|
||||||
|
AOP_WRITEPAGE_ACTIVATE = 0x80000,
|
||||||
|
AOP_TRUNCATED_PAGE = 0x80001,
|
||||||
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* oh the beauties of C type declarations.
|
* oh the beauties of C type declarations.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -59,12 +59,6 @@ struct writeback_control {
|
||||||
unsigned for_reclaim:1; /* Invoked from the page allocator */
|
unsigned for_reclaim:1; /* Invoked from the page allocator */
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
|
||||||
* ->writepage() return values (make these much larger than a pagesize, in
|
|
||||||
* case some fs is returning number-of-bytes-written from writepage)
|
|
||||||
*/
|
|
||||||
#define WRITEPAGE_ACTIVATE 0x80000 /* IO was not started: activate page */
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* fs/fs-writeback.c
|
* fs/fs-writeback.c
|
||||||
*/
|
*/
|
||||||
|
|
67
mm/filemap.c
67
mm/filemap.c
|
@ -831,8 +831,13 @@ void do_generic_mapping_read(struct address_space *mapping,
|
||||||
/* Start the actual read. The read will unlock the page. */
|
/* Start the actual read. The read will unlock the page. */
|
||||||
error = mapping->a_ops->readpage(filp, page);
|
error = mapping->a_ops->readpage(filp, page);
|
||||||
|
|
||||||
if (unlikely(error))
|
if (unlikely(error)) {
|
||||||
|
if (error == AOP_TRUNCATED_PAGE) {
|
||||||
|
page_cache_release(page);
|
||||||
|
goto find_page;
|
||||||
|
}
|
||||||
goto readpage_error;
|
goto readpage_error;
|
||||||
|
}
|
||||||
|
|
||||||
if (!PageUptodate(page)) {
|
if (!PageUptodate(page)) {
|
||||||
lock_page(page);
|
lock_page(page);
|
||||||
|
@ -1152,26 +1157,24 @@ static int fastcall page_cache_read(struct file * file, unsigned long offset)
|
||||||
{
|
{
|
||||||
struct address_space *mapping = file->f_mapping;
|
struct address_space *mapping = file->f_mapping;
|
||||||
struct page *page;
|
struct page *page;
|
||||||
int error;
|
int ret;
|
||||||
|
|
||||||
|
do {
|
||||||
page = page_cache_alloc_cold(mapping);
|
page = page_cache_alloc_cold(mapping);
|
||||||
if (!page)
|
if (!page)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
error = add_to_page_cache_lru(page, mapping, offset, GFP_KERNEL);
|
ret = add_to_page_cache_lru(page, mapping, offset, GFP_KERNEL);
|
||||||
if (!error) {
|
if (ret == 0)
|
||||||
error = mapping->a_ops->readpage(file, page);
|
ret = mapping->a_ops->readpage(file, page);
|
||||||
page_cache_release(page);
|
else if (ret == -EEXIST)
|
||||||
return error;
|
ret = 0; /* losing race to add is OK */
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* We arrive here in the unlikely event that someone
|
|
||||||
* raced with us and added our page to the cache first
|
|
||||||
* or we are out of memory for radix-tree nodes.
|
|
||||||
*/
|
|
||||||
page_cache_release(page);
|
page_cache_release(page);
|
||||||
return error == -EEXIST ? 0 : error;
|
|
||||||
|
} while (ret == AOP_TRUNCATED_PAGE);
|
||||||
|
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define MMAP_LOTSAMISS (100)
|
#define MMAP_LOTSAMISS (100)
|
||||||
|
@ -1331,10 +1334,14 @@ struct page *filemap_nopage(struct vm_area_struct *area,
|
||||||
goto success;
|
goto success;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!mapping->a_ops->readpage(file, page)) {
|
error = mapping->a_ops->readpage(file, page);
|
||||||
|
if (!error) {
|
||||||
wait_on_page_locked(page);
|
wait_on_page_locked(page);
|
||||||
if (PageUptodate(page))
|
if (PageUptodate(page))
|
||||||
goto success;
|
goto success;
|
||||||
|
} else if (error == AOP_TRUNCATED_PAGE) {
|
||||||
|
page_cache_release(page);
|
||||||
|
goto retry_find;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1358,10 +1365,14 @@ struct page *filemap_nopage(struct vm_area_struct *area,
|
||||||
goto success;
|
goto success;
|
||||||
}
|
}
|
||||||
ClearPageError(page);
|
ClearPageError(page);
|
||||||
if (!mapping->a_ops->readpage(file, page)) {
|
error = mapping->a_ops->readpage(file, page);
|
||||||
|
if (!error) {
|
||||||
wait_on_page_locked(page);
|
wait_on_page_locked(page);
|
||||||
if (PageUptodate(page))
|
if (PageUptodate(page))
|
||||||
goto success;
|
goto success;
|
||||||
|
} else if (error == AOP_TRUNCATED_PAGE) {
|
||||||
|
page_cache_release(page);
|
||||||
|
goto retry_find;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1444,10 +1455,14 @@ static struct page * filemap_getpage(struct file *file, unsigned long pgoff,
|
||||||
goto success;
|
goto success;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!mapping->a_ops->readpage(file, page)) {
|
error = mapping->a_ops->readpage(file, page);
|
||||||
|
if (!error) {
|
||||||
wait_on_page_locked(page);
|
wait_on_page_locked(page);
|
||||||
if (PageUptodate(page))
|
if (PageUptodate(page))
|
||||||
goto success;
|
goto success;
|
||||||
|
} else if (error == AOP_TRUNCATED_PAGE) {
|
||||||
|
page_cache_release(page);
|
||||||
|
goto retry_find;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1470,10 +1485,14 @@ static struct page * filemap_getpage(struct file *file, unsigned long pgoff,
|
||||||
}
|
}
|
||||||
|
|
||||||
ClearPageError(page);
|
ClearPageError(page);
|
||||||
if (!mapping->a_ops->readpage(file, page)) {
|
error = mapping->a_ops->readpage(file, page);
|
||||||
|
if (!error) {
|
||||||
wait_on_page_locked(page);
|
wait_on_page_locked(page);
|
||||||
if (PageUptodate(page))
|
if (PageUptodate(page))
|
||||||
goto success;
|
goto success;
|
||||||
|
} else if (error == AOP_TRUNCATED_PAGE) {
|
||||||
|
page_cache_release(page);
|
||||||
|
goto retry_find;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1934,12 +1953,16 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
|
||||||
status = a_ops->prepare_write(file, page, offset, offset+bytes);
|
status = a_ops->prepare_write(file, page, offset, offset+bytes);
|
||||||
if (unlikely(status)) {
|
if (unlikely(status)) {
|
||||||
loff_t isize = i_size_read(inode);
|
loff_t isize = i_size_read(inode);
|
||||||
|
|
||||||
|
if (status != AOP_TRUNCATED_PAGE)
|
||||||
|
unlock_page(page);
|
||||||
|
page_cache_release(page);
|
||||||
|
if (status == AOP_TRUNCATED_PAGE)
|
||||||
|
continue;
|
||||||
/*
|
/*
|
||||||
* prepare_write() may have instantiated a few blocks
|
* prepare_write() may have instantiated a few blocks
|
||||||
* outside i_size. Trim these off again.
|
* outside i_size. Trim these off again.
|
||||||
*/
|
*/
|
||||||
unlock_page(page);
|
|
||||||
page_cache_release(page);
|
|
||||||
if (pos + bytes > isize)
|
if (pos + bytes > isize)
|
||||||
vmtruncate(inode, isize);
|
vmtruncate(inode, isize);
|
||||||
break;
|
break;
|
||||||
|
@ -1952,6 +1975,10 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
|
||||||
cur_iov, iov_base, bytes);
|
cur_iov, iov_base, bytes);
|
||||||
flush_dcache_page(page);
|
flush_dcache_page(page);
|
||||||
status = a_ops->commit_write(file, page, offset, offset+bytes);
|
status = a_ops->commit_write(file, page, offset, offset+bytes);
|
||||||
|
if (status == AOP_TRUNCATED_PAGE) {
|
||||||
|
page_cache_release(page);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if (likely(copied > 0)) {
|
if (likely(copied > 0)) {
|
||||||
if (!status)
|
if (!status)
|
||||||
status = copied;
|
status = copied;
|
||||||
|
|
|
@ -158,7 +158,7 @@ static int read_pages(struct address_space *mapping, struct file *filp,
|
||||||
{
|
{
|
||||||
unsigned page_idx;
|
unsigned page_idx;
|
||||||
struct pagevec lru_pvec;
|
struct pagevec lru_pvec;
|
||||||
int ret = 0;
|
int ret;
|
||||||
|
|
||||||
if (mapping->a_ops->readpages) {
|
if (mapping->a_ops->readpages) {
|
||||||
ret = mapping->a_ops->readpages(filp, mapping, pages, nr_pages);
|
ret = mapping->a_ops->readpages(filp, mapping, pages, nr_pages);
|
||||||
|
@ -171,14 +171,17 @@ static int read_pages(struct address_space *mapping, struct file *filp,
|
||||||
list_del(&page->lru);
|
list_del(&page->lru);
|
||||||
if (!add_to_page_cache(page, mapping,
|
if (!add_to_page_cache(page, mapping,
|
||||||
page->index, GFP_KERNEL)) {
|
page->index, GFP_KERNEL)) {
|
||||||
mapping->a_ops->readpage(filp, page);
|
ret = mapping->a_ops->readpage(filp, page);
|
||||||
|
if (ret != AOP_TRUNCATED_PAGE) {
|
||||||
if (!pagevec_add(&lru_pvec, page))
|
if (!pagevec_add(&lru_pvec, page))
|
||||||
__pagevec_lru_add(&lru_pvec);
|
__pagevec_lru_add(&lru_pvec);
|
||||||
} else {
|
continue;
|
||||||
|
} /* else fall through to release */
|
||||||
|
}
|
||||||
page_cache_release(page);
|
page_cache_release(page);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
pagevec_lru_add(&lru_pvec);
|
pagevec_lru_add(&lru_pvec);
|
||||||
|
ret = 0;
|
||||||
out:
|
out:
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
|
@ -855,7 +855,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
|
||||||
swap_free(swap);
|
swap_free(swap);
|
||||||
redirty:
|
redirty:
|
||||||
set_page_dirty(page);
|
set_page_dirty(page);
|
||||||
return WRITEPAGE_ACTIVATE; /* Return with the page locked */
|
return AOP_WRITEPAGE_ACTIVATE; /* Return with the page locked */
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_NUMA
|
#ifdef CONFIG_NUMA
|
||||||
|
|
|
@ -367,7 +367,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping)
|
||||||
res = mapping->a_ops->writepage(page, &wbc);
|
res = mapping->a_ops->writepage(page, &wbc);
|
||||||
if (res < 0)
|
if (res < 0)
|
||||||
handle_write_error(mapping, page, res);
|
handle_write_error(mapping, page, res);
|
||||||
if (res == WRITEPAGE_ACTIVATE) {
|
if (res == AOP_WRITEPAGE_ACTIVATE) {
|
||||||
ClearPageReclaim(page);
|
ClearPageReclaim(page);
|
||||||
return PAGE_ACTIVATE;
|
return PAGE_ACTIVATE;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue