Btrfs: do aio_write instead of write
In order for AIO to work, we need to implement aio_write. This patch converts our btrfs_file_write to btrfs_aio_write. I've tested this with xfstests and nothing broke, and the AIO stuff magically started working. Thanks, Signed-off-by: Josef Bacik <josef@redhat.com> Signed-off-by: Chris Mason <chris.mason@oracle.com>
This commit is contained in:
parent
4b46fce233
commit
11c65dccf7
2 changed files with 105 additions and 84 deletions
|
@ -2017,6 +2017,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
|
|||
sector_t sector;
|
||||
struct extent_map *em;
|
||||
struct block_device *bdev;
|
||||
struct btrfs_ordered_extent *ordered;
|
||||
int ret;
|
||||
int nr = 0;
|
||||
size_t page_offset = 0;
|
||||
|
@ -2028,7 +2029,15 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
|
|||
set_page_extent_mapped(page);
|
||||
|
||||
end = page_end;
|
||||
lock_extent(tree, start, end, GFP_NOFS);
|
||||
while (1) {
|
||||
lock_extent(tree, start, end, GFP_NOFS);
|
||||
ordered = btrfs_lookup_ordered_extent(inode, start);
|
||||
if (!ordered)
|
||||
break;
|
||||
unlock_extent(tree, start, end, GFP_NOFS);
|
||||
btrfs_start_ordered_extent(inode, ordered, 1);
|
||||
btrfs_put_ordered_extent(ordered);
|
||||
}
|
||||
|
||||
if (page->index == last_byte >> PAGE_CACHE_SHIFT) {
|
||||
char *userpage;
|
||||
|
|
178
fs/btrfs/file.c
178
fs/btrfs/file.c
|
@ -46,32 +46,42 @@
|
|||
static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
|
||||
int write_bytes,
|
||||
struct page **prepared_pages,
|
||||
const char __user *buf)
|
||||
struct iov_iter *i)
|
||||
{
|
||||
long page_fault = 0;
|
||||
int i;
|
||||
size_t copied;
|
||||
int pg = 0;
|
||||
int offset = pos & (PAGE_CACHE_SIZE - 1);
|
||||
|
||||
for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) {
|
||||
while (write_bytes > 0) {
|
||||
size_t count = min_t(size_t,
|
||||
PAGE_CACHE_SIZE - offset, write_bytes);
|
||||
struct page *page = prepared_pages[i];
|
||||
fault_in_pages_readable(buf, count);
|
||||
struct page *page = prepared_pages[pg];
|
||||
again:
|
||||
if (unlikely(iov_iter_fault_in_readable(i, count)))
|
||||
return -EFAULT;
|
||||
|
||||
/* Copy data from userspace to the current page */
|
||||
kmap(page);
|
||||
page_fault = __copy_from_user(page_address(page) + offset,
|
||||
buf, count);
|
||||
copied = iov_iter_copy_from_user(page, i, offset, count);
|
||||
|
||||
/* Flush processor's dcache for this page */
|
||||
flush_dcache_page(page);
|
||||
kunmap(page);
|
||||
buf += count;
|
||||
write_bytes -= count;
|
||||
iov_iter_advance(i, copied);
|
||||
write_bytes -= copied;
|
||||
|
||||
if (page_fault)
|
||||
break;
|
||||
if (unlikely(copied == 0)) {
|
||||
count = min_t(size_t, PAGE_CACHE_SIZE - offset,
|
||||
iov_iter_single_seg_count(i));
|
||||
goto again;
|
||||
}
|
||||
|
||||
if (unlikely(copied < PAGE_CACHE_SIZE - offset)) {
|
||||
offset += copied;
|
||||
} else {
|
||||
pg++;
|
||||
offset = 0;
|
||||
}
|
||||
}
|
||||
return page_fault ? -EFAULT : 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -822,60 +832,24 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* Copied from read-write.c */
|
||||
static void wait_on_retry_sync_kiocb(struct kiocb *iocb)
|
||||
static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
|
||||
const struct iovec *iov,
|
||||
unsigned long nr_segs, loff_t pos)
|
||||
{
|
||||
set_current_state(TASK_UNINTERRUPTIBLE);
|
||||
if (!kiocbIsKicked(iocb))
|
||||
schedule();
|
||||
else
|
||||
kiocbClearKicked(iocb);
|
||||
__set_current_state(TASK_RUNNING);
|
||||
}
|
||||
|
||||
/*
|
||||
* Just a copy of what do_sync_write does.
|
||||
*/
|
||||
static ssize_t __btrfs_direct_write(struct file *file, const char __user *buf,
|
||||
size_t count, loff_t pos, loff_t *ppos)
|
||||
{
|
||||
struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
|
||||
unsigned long nr_segs = 1;
|
||||
struct kiocb kiocb;
|
||||
ssize_t ret;
|
||||
|
||||
init_sync_kiocb(&kiocb, file);
|
||||
kiocb.ki_pos = pos;
|
||||
kiocb.ki_left = count;
|
||||
kiocb.ki_nbytes = count;
|
||||
|
||||
while (1) {
|
||||
ret = generic_file_direct_write(&kiocb, &iov, &nr_segs, pos,
|
||||
ppos, count, count);
|
||||
if (ret != -EIOCBRETRY)
|
||||
break;
|
||||
wait_on_retry_sync_kiocb(&kiocb);
|
||||
}
|
||||
|
||||
if (ret == -EIOCBQUEUED)
|
||||
ret = wait_on_sync_kiocb(&kiocb);
|
||||
*ppos = kiocb.ki_pos;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
|
||||
size_t count, loff_t *ppos)
|
||||
{
|
||||
loff_t pos;
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct inode *inode = fdentry(file)->d_inode;
|
||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||
struct page *pinned[2];
|
||||
struct page **pages = NULL;
|
||||
struct iov_iter i;
|
||||
loff_t *ppos = &iocb->ki_pos;
|
||||
loff_t start_pos;
|
||||
ssize_t num_written = 0;
|
||||
ssize_t err = 0;
|
||||
size_t count;
|
||||
size_t ocount;
|
||||
int ret = 0;
|
||||
struct inode *inode = fdentry(file)->d_inode;
|
||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||
struct page **pages = NULL;
|
||||
int nrptrs;
|
||||
struct page *pinned[2];
|
||||
unsigned long first_index;
|
||||
unsigned long last_index;
|
||||
int will_write;
|
||||
|
@ -887,13 +861,17 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
|
|||
pinned[0] = NULL;
|
||||
pinned[1] = NULL;
|
||||
|
||||
pos = *ppos;
|
||||
start_pos = pos;
|
||||
|
||||
vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
|
||||
|
||||
mutex_lock(&inode->i_mutex);
|
||||
|
||||
err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
|
||||
if (err)
|
||||
goto out;
|
||||
count = ocount;
|
||||
|
||||
current->backing_dev_info = inode->i_mapping->backing_dev_info;
|
||||
err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
|
||||
if (err)
|
||||
|
@ -910,33 +888,69 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
|
|||
BTRFS_I(inode)->sequence++;
|
||||
|
||||
if (unlikely(file->f_flags & O_DIRECT)) {
|
||||
num_written = __btrfs_direct_write(file, buf, count, pos,
|
||||
ppos);
|
||||
pos += num_written;
|
||||
count -= num_written;
|
||||
|
||||
/* We've written everything we wanted to, exit */
|
||||
if (num_written < 0 || !count)
|
||||
ret = btrfs_delalloc_reserve_space(inode, count);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
num_written = generic_file_direct_write(iocb, iov, &nr_segs,
|
||||
pos, ppos, count,
|
||||
ocount);
|
||||
|
||||
/*
|
||||
* the generic O_DIRECT will update in-memory i_size after the
|
||||
* DIOs are done. But our endio handlers that update the on
|
||||
* disk i_size never update past the in memory i_size. So we
|
||||
* need one more update here to catch any additions to the
|
||||
* file
|
||||
*/
|
||||
if (inode->i_size != BTRFS_I(inode)->disk_i_size) {
|
||||
btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
|
||||
mark_inode_dirty(inode);
|
||||
}
|
||||
|
||||
if (num_written < 0) {
|
||||
if (num_written != -EIOCBQUEUED) {
|
||||
/*
|
||||
* aio land will take care of releasing the
|
||||
* delalloc
|
||||
*/
|
||||
btrfs_delalloc_release_space(inode, count);
|
||||
}
|
||||
ret = num_written;
|
||||
num_written = 0;
|
||||
goto out;
|
||||
} else if (num_written == count) {
|
||||
/* pick up pos changes done by the generic code */
|
||||
pos = *ppos;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* the buffered IO will reserve bytes for the rest of the
|
||||
* range, don't double count them here
|
||||
*/
|
||||
btrfs_delalloc_release_space(inode, count - num_written);
|
||||
|
||||
/*
|
||||
* We are going to do buffered for the rest of the range, so we
|
||||
* need to make sure to invalidate the buffered pages when we're
|
||||
* done.
|
||||
*/
|
||||
buffered = 1;
|
||||
buf += num_written;
|
||||
pos += num_written;
|
||||
}
|
||||
|
||||
nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE,
|
||||
PAGE_CACHE_SIZE / (sizeof(struct page *)));
|
||||
iov_iter_init(&i, iov, nr_segs, count, num_written);
|
||||
nrptrs = min((iov_iter_count(&i) + PAGE_CACHE_SIZE - 1) /
|
||||
PAGE_CACHE_SIZE, PAGE_CACHE_SIZE /
|
||||
(sizeof(struct page *)));
|
||||
pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
|
||||
|
||||
/* generic_write_checks can change our pos */
|
||||
start_pos = pos;
|
||||
|
||||
first_index = pos >> PAGE_CACHE_SHIFT;
|
||||
last_index = (pos + count) >> PAGE_CACHE_SHIFT;
|
||||
last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT;
|
||||
|
||||
/*
|
||||
* there are lots of better ways to do this, but this code
|
||||
|
@ -953,7 +967,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
|
|||
unlock_page(pinned[0]);
|
||||
}
|
||||
}
|
||||
if ((pos + count) & (PAGE_CACHE_SIZE - 1)) {
|
||||
if ((pos + iov_iter_count(&i)) & (PAGE_CACHE_SIZE - 1)) {
|
||||
pinned[1] = grab_cache_page(inode->i_mapping, last_index);
|
||||
if (!PageUptodate(pinned[1])) {
|
||||
ret = btrfs_readpage(NULL, pinned[1]);
|
||||
|
@ -964,10 +978,10 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
|
|||
}
|
||||
}
|
||||
|
||||
while (count > 0) {
|
||||
while (iov_iter_count(&i) > 0) {
|
||||
size_t offset = pos & (PAGE_CACHE_SIZE - 1);
|
||||
size_t write_bytes = min(count, nrptrs *
|
||||
(size_t)PAGE_CACHE_SIZE -
|
||||
size_t write_bytes = min(iov_iter_count(&i),
|
||||
nrptrs * (size_t)PAGE_CACHE_SIZE -
|
||||
offset);
|
||||
size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
|
||||
PAGE_CACHE_SHIFT;
|
||||
|
@ -988,7 +1002,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
|
|||
}
|
||||
|
||||
ret = btrfs_copy_from_user(pos, num_pages,
|
||||
write_bytes, pages, buf);
|
||||
write_bytes, pages, &i);
|
||||
if (ret == 0) {
|
||||
dirty_and_release_pages(NULL, root, file, pages,
|
||||
num_pages, pos, write_bytes);
|
||||
|
@ -1012,8 +1026,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
|
|||
btrfs_throttle(root);
|
||||
}
|
||||
|
||||
buf += write_bytes;
|
||||
count -= write_bytes;
|
||||
pos += write_bytes;
|
||||
num_written += write_bytes;
|
||||
|
||||
|
@ -1206,7 +1218,7 @@ const struct file_operations btrfs_file_operations = {
|
|||
.read = do_sync_read,
|
||||
.aio_read = generic_file_aio_read,
|
||||
.splice_read = generic_file_splice_read,
|
||||
.write = btrfs_file_write,
|
||||
.aio_write = btrfs_file_aio_write,
|
||||
.mmap = btrfs_file_mmap,
|
||||
.open = generic_file_open,
|
||||
.release = btrfs_release_file,
|
||||
|
|
Loading…
Reference in a new issue