CIFS: Add asynchronous write support through kernel AIO
This patch adds support to process write calls passed by io_submit() asynchronously. It based on the previously introduced async context that allows to process i/o responses in a separate thread and return the caller immediately for asynchronous calls. This improves writing performance of single threaded applications with increasing of i/o queue depth size. Signed-off-by: Pavel Shilovsky <pshilov@microsoft.com> Signed-off-by: Steve French <smfrench@gmail.com>
This commit is contained in:
parent
6685c5e2d1
commit
c610c4b619
2 changed files with 141 additions and 53 deletions
|
@ -1124,6 +1124,7 @@ struct cifs_aio_ctx {
|
||||||
struct kiocb *iocb;
|
struct kiocb *iocb;
|
||||||
struct cifsFileInfo *cfile;
|
struct cifsFileInfo *cfile;
|
||||||
struct bio_vec *bv;
|
struct bio_vec *bv;
|
||||||
|
loff_t pos;
|
||||||
unsigned int npages;
|
unsigned int npages;
|
||||||
ssize_t rc;
|
ssize_t rc;
|
||||||
unsigned int len;
|
unsigned int len;
|
||||||
|
@ -1171,6 +1172,7 @@ struct cifs_writedata {
|
||||||
enum writeback_sync_modes sync_mode;
|
enum writeback_sync_modes sync_mode;
|
||||||
struct work_struct work;
|
struct work_struct work;
|
||||||
struct cifsFileInfo *cfile;
|
struct cifsFileInfo *cfile;
|
||||||
|
struct cifs_aio_ctx *ctx;
|
||||||
__u64 offset;
|
__u64 offset;
|
||||||
pid_t pid;
|
pid_t pid;
|
||||||
unsigned int bytes;
|
unsigned int bytes;
|
||||||
|
|
192
fs/cifs/file.c
192
fs/cifs/file.c
|
@ -2458,11 +2458,14 @@ cifs_uncached_writedata_release(struct kref *refcount)
|
||||||
struct cifs_writedata *wdata = container_of(refcount,
|
struct cifs_writedata *wdata = container_of(refcount,
|
||||||
struct cifs_writedata, refcount);
|
struct cifs_writedata, refcount);
|
||||||
|
|
||||||
|
kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
|
||||||
for (i = 0; i < wdata->nr_pages; i++)
|
for (i = 0; i < wdata->nr_pages; i++)
|
||||||
put_page(wdata->pages[i]);
|
put_page(wdata->pages[i]);
|
||||||
cifs_writedata_release(refcount);
|
cifs_writedata_release(refcount);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
|
||||||
|
|
||||||
static void
|
static void
|
||||||
cifs_uncached_writev_complete(struct work_struct *work)
|
cifs_uncached_writev_complete(struct work_struct *work)
|
||||||
{
|
{
|
||||||
|
@ -2478,7 +2481,8 @@ cifs_uncached_writev_complete(struct work_struct *work)
|
||||||
spin_unlock(&inode->i_lock);
|
spin_unlock(&inode->i_lock);
|
||||||
|
|
||||||
complete(&wdata->done);
|
complete(&wdata->done);
|
||||||
|
collect_uncached_write_data(wdata->ctx);
|
||||||
|
/* the below call can possibly free the last ref to aio ctx */
|
||||||
kref_put(&wdata->refcount, cifs_uncached_writedata_release);
|
kref_put(&wdata->refcount, cifs_uncached_writedata_release);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2527,7 +2531,8 @@ wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
|
||||||
static int
|
static int
|
||||||
cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
|
cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
|
||||||
struct cifsFileInfo *open_file,
|
struct cifsFileInfo *open_file,
|
||||||
struct cifs_sb_info *cifs_sb, struct list_head *wdata_list)
|
struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
|
||||||
|
struct cifs_aio_ctx *ctx)
|
||||||
{
|
{
|
||||||
int rc = 0;
|
int rc = 0;
|
||||||
size_t cur_len;
|
size_t cur_len;
|
||||||
|
@ -2595,6 +2600,8 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
|
||||||
wdata->pagesz = PAGE_SIZE;
|
wdata->pagesz = PAGE_SIZE;
|
||||||
wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
|
wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
|
||||||
wdata->credits = credits;
|
wdata->credits = credits;
|
||||||
|
wdata->ctx = ctx;
|
||||||
|
kref_get(&ctx->refcount);
|
||||||
|
|
||||||
if (!wdata->cfile->invalidHandle ||
|
if (!wdata->cfile->invalidHandle ||
|
||||||
!(rc = cifs_reopen_file(wdata->cfile, false)))
|
!(rc = cifs_reopen_file(wdata->cfile, false)))
|
||||||
|
@ -2620,15 +2627,95 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
|
||||||
|
{
|
||||||
|
struct cifs_writedata *wdata, *tmp;
|
||||||
|
struct cifs_tcon *tcon;
|
||||||
|
struct cifs_sb_info *cifs_sb;
|
||||||
|
struct dentry *dentry = ctx->cfile->dentry;
|
||||||
|
unsigned int i;
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
tcon = tlink_tcon(ctx->cfile->tlink);
|
||||||
|
cifs_sb = CIFS_SB(dentry->d_sb);
|
||||||
|
|
||||||
|
mutex_lock(&ctx->aio_mutex);
|
||||||
|
|
||||||
|
if (list_empty(&ctx->list)) {
|
||||||
|
mutex_unlock(&ctx->aio_mutex);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
rc = ctx->rc;
|
||||||
|
/*
|
||||||
|
* Wait for and collect replies for any successful sends in order of
|
||||||
|
* increasing offset. Once an error is hit, then return without waiting
|
||||||
|
* for any more replies.
|
||||||
|
*/
|
||||||
|
restart_loop:
|
||||||
|
list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
|
||||||
|
if (!rc) {
|
||||||
|
if (!try_wait_for_completion(&wdata->done)) {
|
||||||
|
mutex_unlock(&ctx->aio_mutex);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (wdata->result)
|
||||||
|
rc = wdata->result;
|
||||||
|
else
|
||||||
|
ctx->total_len += wdata->bytes;
|
||||||
|
|
||||||
|
/* resend call if it's a retryable error */
|
||||||
|
if (rc == -EAGAIN) {
|
||||||
|
struct list_head tmp_list;
|
||||||
|
struct iov_iter tmp_from = ctx->iter;
|
||||||
|
|
||||||
|
INIT_LIST_HEAD(&tmp_list);
|
||||||
|
list_del_init(&wdata->list);
|
||||||
|
|
||||||
|
iov_iter_advance(&tmp_from,
|
||||||
|
wdata->offset - ctx->pos);
|
||||||
|
|
||||||
|
rc = cifs_write_from_iter(wdata->offset,
|
||||||
|
wdata->bytes, &tmp_from,
|
||||||
|
ctx->cfile, cifs_sb, &tmp_list,
|
||||||
|
ctx);
|
||||||
|
|
||||||
|
list_splice(&tmp_list, &ctx->list);
|
||||||
|
|
||||||
|
kref_put(&wdata->refcount,
|
||||||
|
cifs_uncached_writedata_release);
|
||||||
|
goto restart_loop;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
list_del_init(&wdata->list);
|
||||||
|
kref_put(&wdata->refcount, cifs_uncached_writedata_release);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < ctx->npages; i++)
|
||||||
|
put_page(ctx->bv[i].bv_page);
|
||||||
|
|
||||||
|
cifs_stats_bytes_written(tcon, ctx->total_len);
|
||||||
|
set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
|
||||||
|
|
||||||
|
ctx->rc = (rc == 0) ? ctx->total_len : rc;
|
||||||
|
|
||||||
|
mutex_unlock(&ctx->aio_mutex);
|
||||||
|
|
||||||
|
if (ctx->iocb && ctx->iocb->ki_complete)
|
||||||
|
ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
|
||||||
|
else
|
||||||
|
complete(&ctx->done);
|
||||||
|
}
|
||||||
|
|
||||||
ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
|
ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
|
||||||
{
|
{
|
||||||
struct file *file = iocb->ki_filp;
|
struct file *file = iocb->ki_filp;
|
||||||
ssize_t total_written = 0;
|
ssize_t total_written = 0;
|
||||||
struct cifsFileInfo *open_file;
|
struct cifsFileInfo *cfile;
|
||||||
struct cifs_tcon *tcon;
|
struct cifs_tcon *tcon;
|
||||||
struct cifs_sb_info *cifs_sb;
|
struct cifs_sb_info *cifs_sb;
|
||||||
struct cifs_writedata *wdata, *tmp;
|
struct cifs_aio_ctx *ctx;
|
||||||
struct list_head wdata_list;
|
|
||||||
struct iov_iter saved_from = *from;
|
struct iov_iter saved_from = *from;
|
||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
|
@ -2642,16 +2729,35 @@ ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
|
||||||
if (rc <= 0)
|
if (rc <= 0)
|
||||||
return rc;
|
return rc;
|
||||||
|
|
||||||
INIT_LIST_HEAD(&wdata_list);
|
|
||||||
cifs_sb = CIFS_FILE_SB(file);
|
cifs_sb = CIFS_FILE_SB(file);
|
||||||
open_file = file->private_data;
|
cfile = file->private_data;
|
||||||
tcon = tlink_tcon(open_file->tlink);
|
tcon = tlink_tcon(cfile->tlink);
|
||||||
|
|
||||||
if (!tcon->ses->server->ops->async_writev)
|
if (!tcon->ses->server->ops->async_writev)
|
||||||
return -ENOSYS;
|
return -ENOSYS;
|
||||||
|
|
||||||
rc = cifs_write_from_iter(iocb->ki_pos, iov_iter_count(from), from,
|
ctx = cifs_aio_ctx_alloc();
|
||||||
open_file, cifs_sb, &wdata_list);
|
if (!ctx)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
ctx->cfile = cifsFileInfo_get(cfile);
|
||||||
|
|
||||||
|
if (!is_sync_kiocb(iocb))
|
||||||
|
ctx->iocb = iocb;
|
||||||
|
|
||||||
|
ctx->pos = iocb->ki_pos;
|
||||||
|
|
||||||
|
rc = setup_aio_ctx_iter(ctx, from, WRITE);
|
||||||
|
if (rc) {
|
||||||
|
kref_put(&ctx->refcount, cifs_aio_ctx_release);
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* grab a lock here due to read response handlers can access ctx */
|
||||||
|
mutex_lock(&ctx->aio_mutex);
|
||||||
|
|
||||||
|
rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
|
||||||
|
cfile, cifs_sb, &ctx->list, ctx);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If at least one write was successfully sent, then discard any rc
|
* If at least one write was successfully sent, then discard any rc
|
||||||
|
@ -2659,58 +2765,38 @@ ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
|
||||||
* we'll end up returning whatever was written. If it fails, then
|
* we'll end up returning whatever was written. If it fails, then
|
||||||
* we'll get a new rc value from that.
|
* we'll get a new rc value from that.
|
||||||
*/
|
*/
|
||||||
if (!list_empty(&wdata_list))
|
if (!list_empty(&ctx->list))
|
||||||
rc = 0;
|
rc = 0;
|
||||||
|
|
||||||
/*
|
mutex_unlock(&ctx->aio_mutex);
|
||||||
* Wait for and collect replies for any successful sends in order of
|
|
||||||
* increasing offset. Once an error is hit or we get a fatal signal
|
|
||||||
* while waiting, then return without waiting for any more replies.
|
|
||||||
*/
|
|
||||||
restart_loop:
|
|
||||||
list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
|
|
||||||
if (!rc) {
|
|
||||||
/* FIXME: freezable too? */
|
|
||||||
rc = wait_for_completion_killable(&wdata->done);
|
|
||||||
if (rc)
|
|
||||||
rc = -EINTR;
|
|
||||||
else if (wdata->result)
|
|
||||||
rc = wdata->result;
|
|
||||||
else
|
|
||||||
total_written += wdata->bytes;
|
|
||||||
|
|
||||||
/* resend call if it's a retryable error */
|
if (rc) {
|
||||||
if (rc == -EAGAIN) {
|
kref_put(&ctx->refcount, cifs_aio_ctx_release);
|
||||||
struct list_head tmp_list;
|
return rc;
|
||||||
struct iov_iter tmp_from = saved_from;
|
|
||||||
|
|
||||||
INIT_LIST_HEAD(&tmp_list);
|
|
||||||
list_del_init(&wdata->list);
|
|
||||||
|
|
||||||
iov_iter_advance(&tmp_from,
|
|
||||||
wdata->offset - iocb->ki_pos);
|
|
||||||
|
|
||||||
rc = cifs_write_from_iter(wdata->offset,
|
|
||||||
wdata->bytes, &tmp_from,
|
|
||||||
open_file, cifs_sb, &tmp_list);
|
|
||||||
|
|
||||||
list_splice(&tmp_list, &wdata_list);
|
|
||||||
|
|
||||||
kref_put(&wdata->refcount,
|
|
||||||
cifs_uncached_writedata_release);
|
|
||||||
goto restart_loop;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
list_del_init(&wdata->list);
|
|
||||||
kref_put(&wdata->refcount, cifs_uncached_writedata_release);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!is_sync_kiocb(iocb)) {
|
||||||
|
kref_put(&ctx->refcount, cifs_aio_ctx_release);
|
||||||
|
return -EIOCBQUEUED;
|
||||||
|
}
|
||||||
|
|
||||||
|
rc = wait_for_completion_killable(&ctx->done);
|
||||||
|
if (rc) {
|
||||||
|
mutex_lock(&ctx->aio_mutex);
|
||||||
|
ctx->rc = rc = -EINTR;
|
||||||
|
total_written = ctx->total_len;
|
||||||
|
mutex_unlock(&ctx->aio_mutex);
|
||||||
|
} else {
|
||||||
|
rc = ctx->rc;
|
||||||
|
total_written = ctx->total_len;
|
||||||
|
}
|
||||||
|
|
||||||
|
kref_put(&ctx->refcount, cifs_aio_ctx_release);
|
||||||
|
|
||||||
if (unlikely(!total_written))
|
if (unlikely(!total_written))
|
||||||
return rc;
|
return rc;
|
||||||
|
|
||||||
iocb->ki_pos += total_written;
|
iocb->ki_pos += total_written;
|
||||||
set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(file_inode(file))->flags);
|
|
||||||
cifs_stats_bytes_written(tcon, total_written);
|
|
||||||
return total_written;
|
return total_written;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue