[PATCH] direct-io: sync and invalidate file region when falling back to buffered write
When direct-io falls back to buffered write, it will just leave the dirty data floating about in pagecache, pending regular writeback. But normal direct-io semantics are that IO is synchronous, and that it leaves no pagecache behind. So change the fallback-to-buffered-write code to sync the file region and to then strip away the pagecache, just as a regular direct-io write would do. Acked-by: Jeff Moyer <jmoyer@redhat.com> Cc: Zach Brown <zach.brown@oracle.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
parent
e8e82b76e0
commit
fb5527e68d
1 changed files with 45 additions and 6 deletions
51
mm/filemap.c
51
mm/filemap.c
|
@ -2222,7 +2222,7 @@ __generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov,
|
|||
unsigned long nr_segs, loff_t *ppos)
|
||||
{
|
||||
struct file *file = iocb->ki_filp;
|
||||
const struct address_space * mapping = file->f_mapping;
|
||||
struct address_space * mapping = file->f_mapping;
|
||||
size_t ocount; /* original count */
|
||||
size_t count; /* after file limit checks */
|
||||
struct inode *inode = mapping->host;
|
||||
|
@ -2275,8 +2275,11 @@ __generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov,
|
|||
|
||||
/* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
|
||||
if (unlikely(file->f_flags & O_DIRECT)) {
|
||||
written = generic_file_direct_write(iocb, iov,
|
||||
&nr_segs, pos, ppos, count, ocount);
|
||||
loff_t endbyte;
|
||||
ssize_t written_buffered;
|
||||
|
||||
written = generic_file_direct_write(iocb, iov, &nr_segs, pos,
|
||||
ppos, count, ocount);
|
||||
if (written < 0 || written == count)
|
||||
goto out;
|
||||
/*
|
||||
|
@ -2285,10 +2288,46 @@ __generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov,
|
|||
*/
|
||||
pos += written;
|
||||
count -= written;
|
||||
}
|
||||
written_buffered = generic_file_buffered_write(iocb, iov,
|
||||
nr_segs, pos, ppos, count,
|
||||
written);
|
||||
/*
|
||||
* If generic_file_buffered_write() retuned a synchronous error
|
||||
* then we want to return the number of bytes which were
|
||||
* direct-written, or the error code if that was zero. Note
|
||||
* that this differs from normal direct-io semantics, which
|
||||
* will return -EFOO even if some bytes were written.
|
||||
*/
|
||||
if (written_buffered < 0) {
|
||||
err = written_buffered;
|
||||
goto out;
|
||||
}
|
||||
|
||||
written = generic_file_buffered_write(iocb, iov, nr_segs,
|
||||
pos, ppos, count, written);
|
||||
/*
|
||||
* We need to ensure that the page cache pages are written to
|
||||
* disk and invalidated to preserve the expected O_DIRECT
|
||||
* semantics.
|
||||
*/
|
||||
endbyte = pos + written_buffered - written - 1;
|
||||
err = do_sync_file_range(file, pos, endbyte,
|
||||
SYNC_FILE_RANGE_WAIT_BEFORE|
|
||||
SYNC_FILE_RANGE_WRITE|
|
||||
SYNC_FILE_RANGE_WAIT_AFTER);
|
||||
if (err == 0) {
|
||||
written = written_buffered;
|
||||
invalidate_mapping_pages(mapping,
|
||||
pos >> PAGE_CACHE_SHIFT,
|
||||
endbyte >> PAGE_CACHE_SHIFT);
|
||||
} else {
|
||||
/*
|
||||
* We don't know how much we wrote, so just return
|
||||
* the number of bytes which were direct-written
|
||||
*/
|
||||
}
|
||||
} else {
|
||||
written = generic_file_buffered_write(iocb, iov, nr_segs,
|
||||
pos, ppos, count, written);
|
||||
}
|
||||
out:
|
||||
current->backing_dev_info = NULL;
|
||||
return written ? written : err;
|
||||
|
|
Loading…
Reference in a new issue