Merge branch 'work.splice_read' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull VFS splice updates from Al Viro: "There's a bunch of branches this cycle, both mine and from other folks and I'd rather send pull requests separately. This one is the conversion of ->splice_read() to ITER_PIPE iov_iter (and introduction of such). Gets rid of a lot of code in fs/splice.c and elsewhere; there will be followups, but these are for the next cycle... Some pipe/splice-related cleanups from Miklos in the same branch as well" * 'work.splice_read' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: pipe: fix comment in pipe_buf_operations pipe: add pipe_buf_steal() helper pipe: add pipe_buf_confirm() helper pipe: add pipe_buf_release() helper pipe: add pipe_buf_get() helper relay: simplify relay_file_read() switch default_file_splice_read() to use of pipe-backed iov_iter switch generic_file_splice_read() to use of ->read_iter() new iov_iter flavour: pipe-backed fuse_dev_splice_read(): switch to add_to_pipe() skb_splice_bits(): get rid of callback new helper: add_to_pipe() splice: lift pipe_lock out of splice_to_pipe() splice: switch get_iovec_page_array() to iov_iter splice_to_pipe(): don't open-code wakeup_pipe_readers() consistent treatment of EFAULT on O_DIRECT read/write
This commit is contained in:
commit
d1f5323370
30 changed files with 748 additions and 1079 deletions
|
@ -889,7 +889,7 @@ static int pipe_to_sg(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
|
|||
return 0;
|
||||
|
||||
/* Try lock this page */
|
||||
if (buf->ops->steal(pipe, buf) == 0) {
|
||||
if (pipe_buf_steal(pipe, buf) == 0) {
|
||||
/* Get reference and unlock page for moving */
|
||||
get_page(buf->page);
|
||||
unlock_page(buf->page);
|
||||
|
|
|
@ -1138,45 +1138,31 @@ ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
|
|||
range_lock_init(&range, *ppos, *ppos + count - 1);
|
||||
|
||||
vio->vui_fd = LUSTRE_FPRIVATE(file);
|
||||
vio->vui_io_subtype = args->via_io_subtype;
|
||||
vio->vui_iter = args->u.normal.via_iter;
|
||||
vio->vui_iocb = args->u.normal.via_iocb;
|
||||
/*
|
||||
* Direct IO reads must also take range lock,
|
||||
* or multiple reads will try to work on the same pages
|
||||
* See LU-6227 for details.
|
||||
*/
|
||||
if (((iot == CIT_WRITE) ||
|
||||
(iot == CIT_READ && (file->f_flags & O_DIRECT))) &&
|
||||
!(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
|
||||
CDEBUG(D_VFSTRACE, "Range lock [%llu, %llu]\n",
|
||||
range.rl_node.in_extent.start,
|
||||
range.rl_node.in_extent.end);
|
||||
result = range_lock(&lli->lli_write_tree,
|
||||
&range);
|
||||
if (result < 0)
|
||||
goto out;
|
||||
|
||||
switch (vio->vui_io_subtype) {
|
||||
case IO_NORMAL:
|
||||
vio->vui_iter = args->u.normal.via_iter;
|
||||
vio->vui_iocb = args->u.normal.via_iocb;
|
||||
/*
|
||||
* Direct IO reads must also take range lock,
|
||||
* or multiple reads will try to work on the same pages
|
||||
* See LU-6227 for details.
|
||||
*/
|
||||
if (((iot == CIT_WRITE) ||
|
||||
(iot == CIT_READ && (file->f_flags & O_DIRECT))) &&
|
||||
!(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
|
||||
CDEBUG(D_VFSTRACE, "Range lock [%llu, %llu]\n",
|
||||
range.rl_node.in_extent.start,
|
||||
range.rl_node.in_extent.end);
|
||||
result = range_lock(&lli->lli_write_tree,
|
||||
&range);
|
||||
if (result < 0)
|
||||
goto out;
|
||||
|
||||
range_locked = true;
|
||||
}
|
||||
down_read(&lli->lli_trunc_sem);
|
||||
break;
|
||||
case IO_SPLICE:
|
||||
vio->u.splice.vui_pipe = args->u.splice.via_pipe;
|
||||
vio->u.splice.vui_flags = args->u.splice.via_flags;
|
||||
break;
|
||||
default:
|
||||
CERROR("Unknown IO type - %u\n", vio->vui_io_subtype);
|
||||
LBUG();
|
||||
range_locked = true;
|
||||
}
|
||||
down_read(&lli->lli_trunc_sem);
|
||||
ll_cl_add(file, env, io);
|
||||
result = cl_io_loop(env, io);
|
||||
ll_cl_remove(file, env);
|
||||
if (args->via_io_subtype == IO_NORMAL)
|
||||
up_read(&lli->lli_trunc_sem);
|
||||
up_read(&lli->lli_trunc_sem);
|
||||
if (range_locked) {
|
||||
CDEBUG(D_VFSTRACE, "Range unlock [%llu, %llu]\n",
|
||||
range.rl_node.in_extent.start,
|
||||
|
@ -1235,7 +1221,7 @@ static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
|
|||
if (IS_ERR(env))
|
||||
return PTR_ERR(env);
|
||||
|
||||
args = ll_env_args(env, IO_NORMAL);
|
||||
args = ll_env_args(env);
|
||||
args->u.normal.via_iter = to;
|
||||
args->u.normal.via_iocb = iocb;
|
||||
|
||||
|
@ -1259,7 +1245,7 @@ static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
|||
if (IS_ERR(env))
|
||||
return PTR_ERR(env);
|
||||
|
||||
args = ll_env_args(env, IO_NORMAL);
|
||||
args = ll_env_args(env);
|
||||
args->u.normal.via_iter = from;
|
||||
args->u.normal.via_iocb = iocb;
|
||||
|
||||
|
@ -1269,31 +1255,6 @@ static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
|||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* Send file content (through pagecache) somewhere with helper
|
||||
*/
|
||||
static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
|
||||
struct pipe_inode_info *pipe, size_t count,
|
||||
unsigned int flags)
|
||||
{
|
||||
struct lu_env *env;
|
||||
struct vvp_io_args *args;
|
||||
ssize_t result;
|
||||
int refcheck;
|
||||
|
||||
env = cl_env_get(&refcheck);
|
||||
if (IS_ERR(env))
|
||||
return PTR_ERR(env);
|
||||
|
||||
args = ll_env_args(env, IO_SPLICE);
|
||||
args->u.splice.via_pipe = pipe;
|
||||
args->u.splice.via_flags = flags;
|
||||
|
||||
result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
|
||||
cl_env_put(env, &refcheck);
|
||||
return result;
|
||||
}
|
||||
|
||||
int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry,
|
||||
__u64 flags, struct lov_user_md *lum,
|
||||
int lum_size)
|
||||
|
@ -3267,7 +3228,7 @@ struct file_operations ll_file_operations = {
|
|||
.release = ll_file_release,
|
||||
.mmap = ll_file_mmap,
|
||||
.llseek = ll_file_seek,
|
||||
.splice_read = ll_file_splice_read,
|
||||
.splice_read = generic_file_splice_read,
|
||||
.fsync = ll_fsync,
|
||||
.flush = ll_flush
|
||||
};
|
||||
|
@ -3280,7 +3241,7 @@ struct file_operations ll_file_operations_flock = {
|
|||
.release = ll_file_release,
|
||||
.mmap = ll_file_mmap,
|
||||
.llseek = ll_file_seek,
|
||||
.splice_read = ll_file_splice_read,
|
||||
.splice_read = generic_file_splice_read,
|
||||
.fsync = ll_fsync,
|
||||
.flush = ll_flush,
|
||||
.flock = ll_file_flock,
|
||||
|
@ -3296,7 +3257,7 @@ struct file_operations ll_file_operations_noflock = {
|
|||
.release = ll_file_release,
|
||||
.mmap = ll_file_mmap,
|
||||
.llseek = ll_file_seek,
|
||||
.splice_read = ll_file_splice_read,
|
||||
.splice_read = generic_file_splice_read,
|
||||
.fsync = ll_fsync,
|
||||
.flush = ll_flush,
|
||||
.flock = ll_file_noflock,
|
||||
|
|
|
@ -908,17 +908,11 @@ void vvp_write_complete(struct vvp_object *club, struct vvp_page *page);
|
|||
*/
|
||||
struct vvp_io_args {
|
||||
/** normal/splice */
|
||||
enum vvp_io_subtype via_io_subtype;
|
||||
|
||||
union {
|
||||
struct {
|
||||
struct kiocb *via_iocb;
|
||||
struct iov_iter *via_iter;
|
||||
} normal;
|
||||
struct {
|
||||
struct pipe_inode_info *via_pipe;
|
||||
unsigned int via_flags;
|
||||
} splice;
|
||||
} u;
|
||||
};
|
||||
|
||||
|
@ -946,14 +940,9 @@ static inline struct ll_thread_info *ll_env_info(const struct lu_env *env)
|
|||
return lti;
|
||||
}
|
||||
|
||||
static inline struct vvp_io_args *ll_env_args(const struct lu_env *env,
|
||||
enum vvp_io_subtype type)
|
||||
static inline struct vvp_io_args *ll_env_args(const struct lu_env *env)
|
||||
{
|
||||
struct vvp_io_args *via = &ll_env_info(env)->lti_args;
|
||||
|
||||
via->via_io_subtype = type;
|
||||
|
||||
return via;
|
||||
return &ll_env_info(env)->lti_args;
|
||||
}
|
||||
|
||||
void ll_queue_done_writing(struct inode *inode, unsigned long flags);
|
||||
|
|
|
@ -49,14 +49,6 @@ struct obd_device;
|
|||
struct obd_export;
|
||||
struct page;
|
||||
|
||||
/* specific architecture can implement only part of this list */
|
||||
enum vvp_io_subtype {
|
||||
/** normal IO */
|
||||
IO_NORMAL,
|
||||
/** io started from splice_{read|write} */
|
||||
IO_SPLICE
|
||||
};
|
||||
|
||||
/**
|
||||
* IO state private to IO state private to VVP layer.
|
||||
*/
|
||||
|
@ -98,10 +90,6 @@ struct vvp_io {
|
|||
*/
|
||||
bool ft_flags_valid;
|
||||
} fault;
|
||||
struct {
|
||||
struct pipe_inode_info *vui_pipe;
|
||||
unsigned int vui_flags;
|
||||
} splice;
|
||||
struct {
|
||||
struct cl_page_list vui_queue;
|
||||
unsigned long vui_written;
|
||||
|
@ -110,8 +98,6 @@ struct vvp_io {
|
|||
} write;
|
||||
} u;
|
||||
|
||||
enum vvp_io_subtype vui_io_subtype;
|
||||
|
||||
/**
|
||||
* Layout version when this IO is initialized
|
||||
*/
|
||||
|
|
|
@ -53,18 +53,6 @@ static struct vvp_io *cl2vvp_io(const struct lu_env *env,
|
|||
return vio;
|
||||
}
|
||||
|
||||
/**
|
||||
* True, if \a io is a normal io, False for splice_{read,write}
|
||||
*/
|
||||
static int cl_is_normalio(const struct lu_env *env, const struct cl_io *io)
|
||||
{
|
||||
struct vvp_io *vio = vvp_env_io(env);
|
||||
|
||||
LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE);
|
||||
|
||||
return vio->vui_io_subtype == IO_NORMAL;
|
||||
}
|
||||
|
||||
/**
|
||||
* For swapping layout. The file's layout may have changed.
|
||||
* To avoid populating pages to a wrong stripe, we have to verify the
|
||||
|
@ -390,9 +378,6 @@ static int vvp_mmap_locks(const struct lu_env *env,
|
|||
|
||||
LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE);
|
||||
|
||||
if (!cl_is_normalio(env, io))
|
||||
return 0;
|
||||
|
||||
if (!vio->vui_iter) /* nfs or loop back device write */
|
||||
return 0;
|
||||
|
||||
|
@ -461,15 +446,10 @@ static void vvp_io_advance(const struct lu_env *env,
|
|||
const struct cl_io_slice *ios,
|
||||
size_t nob)
|
||||
{
|
||||
struct vvp_io *vio = cl2vvp_io(env, ios);
|
||||
struct cl_io *io = ios->cis_io;
|
||||
struct cl_object *obj = ios->cis_io->ci_obj;
|
||||
|
||||
struct vvp_io *vio = cl2vvp_io(env, ios);
|
||||
CLOBINVRNT(env, obj, vvp_object_invariant(obj));
|
||||
|
||||
if (!cl_is_normalio(env, io))
|
||||
return;
|
||||
|
||||
iov_iter_reexpand(vio->vui_iter, vio->vui_tot_count -= nob);
|
||||
}
|
||||
|
||||
|
@ -478,7 +458,7 @@ static void vvp_io_update_iov(const struct lu_env *env,
|
|||
{
|
||||
size_t size = io->u.ci_rw.crw_count;
|
||||
|
||||
if (!cl_is_normalio(env, io) || !vio->vui_iter)
|
||||
if (!vio->vui_iter)
|
||||
return;
|
||||
|
||||
iov_iter_truncate(vio->vui_iter, size);
|
||||
|
@ -715,25 +695,8 @@ static int vvp_io_read_start(const struct lu_env *env,
|
|||
|
||||
/* BUG: 5972 */
|
||||
file_accessed(file);
|
||||
switch (vio->vui_io_subtype) {
|
||||
case IO_NORMAL:
|
||||
LASSERT(vio->vui_iocb->ki_pos == pos);
|
||||
result = generic_file_read_iter(vio->vui_iocb, vio->vui_iter);
|
||||
break;
|
||||
case IO_SPLICE:
|
||||
result = generic_file_splice_read(file, &pos,
|
||||
vio->u.splice.vui_pipe, cnt,
|
||||
vio->u.splice.vui_flags);
|
||||
/* LU-1109: do splice read stripe by stripe otherwise if it
|
||||
* may make nfsd stuck if this read occupied all internal pipe
|
||||
* buffers.
|
||||
*/
|
||||
io->ci_continue = 0;
|
||||
break;
|
||||
default:
|
||||
CERROR("Wrong IO type %u\n", vio->vui_io_subtype);
|
||||
LBUG();
|
||||
}
|
||||
LASSERT(vio->vui_iocb->ki_pos == pos);
|
||||
result = generic_file_read_iter(vio->vui_iocb, vio->vui_iter);
|
||||
|
||||
out:
|
||||
if (result >= 0) {
|
||||
|
|
|
@ -37,27 +37,6 @@ coda_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
|
|||
return vfs_iter_read(cfi->cfi_container, to, &iocb->ki_pos);
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
coda_file_splice_read(struct file *coda_file, loff_t *ppos,
|
||||
struct pipe_inode_info *pipe, size_t count,
|
||||
unsigned int flags)
|
||||
{
|
||||
ssize_t (*splice_read)(struct file *, loff_t *,
|
||||
struct pipe_inode_info *, size_t, unsigned int);
|
||||
struct coda_file_info *cfi;
|
||||
struct file *host_file;
|
||||
|
||||
cfi = CODA_FTOC(coda_file);
|
||||
BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
|
||||
host_file = cfi->cfi_container;
|
||||
|
||||
splice_read = host_file->f_op->splice_read;
|
||||
if (!splice_read)
|
||||
splice_read = default_file_splice_read;
|
||||
|
||||
return splice_read(host_file, ppos, pipe, count, flags);
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
coda_file_write_iter(struct kiocb *iocb, struct iov_iter *to)
|
||||
{
|
||||
|
@ -225,6 +204,6 @@ const struct file_operations coda_file_operations = {
|
|||
.open = coda_open,
|
||||
.release = coda_release,
|
||||
.fsync = coda_fsync,
|
||||
.splice_read = coda_file_splice_read,
|
||||
.splice_read = generic_file_splice_read,
|
||||
};
|
||||
|
||||
|
|
|
@ -246,6 +246,9 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async)
|
|||
if ((dio->op == REQ_OP_READ) &&
|
||||
((offset + transferred) > dio->i_size))
|
||||
transferred = dio->i_size - offset;
|
||||
/* ignore EFAULT if some IO has been done */
|
||||
if (unlikely(ret == -EFAULT) && transferred)
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
if (ret == 0)
|
||||
|
|
|
@ -728,7 +728,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
|
|||
struct pipe_buffer *buf = cs->pipebufs;
|
||||
|
||||
if (!cs->write) {
|
||||
err = buf->ops->confirm(cs->pipe, buf);
|
||||
err = pipe_buf_confirm(cs->pipe, buf);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
|
@ -827,7 +827,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
|
|||
|
||||
fuse_copy_finish(cs);
|
||||
|
||||
err = buf->ops->confirm(cs->pipe, buf);
|
||||
err = pipe_buf_confirm(cs->pipe, buf);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
|
@ -840,7 +840,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
|
|||
if (cs->len != PAGE_SIZE)
|
||||
goto out_fallback;
|
||||
|
||||
if (buf->ops->steal(cs->pipe, buf) != 0)
|
||||
if (pipe_buf_steal(cs->pipe, buf) != 0)
|
||||
goto out_fallback;
|
||||
|
||||
newpage = buf->page;
|
||||
|
@ -1341,9 +1341,8 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
|
|||
struct pipe_inode_info *pipe,
|
||||
size_t len, unsigned int flags)
|
||||
{
|
||||
int ret;
|
||||
int total, ret;
|
||||
int page_nr = 0;
|
||||
int do_wakeup = 0;
|
||||
struct pipe_buffer *bufs;
|
||||
struct fuse_copy_state cs;
|
||||
struct fuse_dev *fud = fuse_get_dev(in);
|
||||
|
@ -1362,52 +1361,23 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
|
|||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = 0;
|
||||
pipe_lock(pipe);
|
||||
|
||||
if (!pipe->readers) {
|
||||
send_sig(SIGPIPE, current, 0);
|
||||
if (!ret)
|
||||
ret = -EPIPE;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
if (pipe->nrbufs + cs.nr_segs > pipe->buffers) {
|
||||
ret = -EIO;
|
||||
goto out_unlock;
|
||||
goto out;
|
||||
}
|
||||
|
||||
while (page_nr < cs.nr_segs) {
|
||||
int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
|
||||
struct pipe_buffer *buf = pipe->bufs + newbuf;
|
||||
|
||||
buf->page = bufs[page_nr].page;
|
||||
buf->offset = bufs[page_nr].offset;
|
||||
buf->len = bufs[page_nr].len;
|
||||
for (ret = total = 0; page_nr < cs.nr_segs; total += ret) {
|
||||
/*
|
||||
* Need to be careful about this. Having buf->ops in module
|
||||
* code can Oops if the buffer persists after module unload.
|
||||
*/
|
||||
buf->ops = &nosteal_pipe_buf_ops;
|
||||
|
||||
pipe->nrbufs++;
|
||||
page_nr++;
|
||||
ret += buf->len;
|
||||
|
||||
if (pipe->files)
|
||||
do_wakeup = 1;
|
||||
bufs[page_nr].ops = &nosteal_pipe_buf_ops;
|
||||
ret = add_to_pipe(pipe, &bufs[page_nr++]);
|
||||
if (unlikely(ret < 0))
|
||||
break;
|
||||
}
|
||||
|
||||
out_unlock:
|
||||
pipe_unlock(pipe);
|
||||
|
||||
if (do_wakeup) {
|
||||
smp_mb();
|
||||
if (waitqueue_active(&pipe->wait))
|
||||
wake_up_interruptible(&pipe->wait);
|
||||
kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
|
||||
}
|
||||
|
||||
if (total)
|
||||
ret = total;
|
||||
out:
|
||||
for (; page_nr < cs.nr_segs; page_nr++)
|
||||
put_page(bufs[page_nr].page);
|
||||
|
@ -1992,7 +1962,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
|
|||
pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
|
||||
pipe->nrbufs--;
|
||||
} else {
|
||||
ibuf->ops->get(pipe, ibuf);
|
||||
pipe_buf_get(pipe, ibuf);
|
||||
*obuf = *ibuf;
|
||||
obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
|
||||
obuf->len = rem;
|
||||
|
@ -2014,10 +1984,9 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
|
|||
|
||||
ret = fuse_dev_do_write(fud, &cs, len);
|
||||
|
||||
for (idx = 0; idx < nbuf; idx++) {
|
||||
struct pipe_buffer *buf = &bufs[idx];
|
||||
buf->ops->release(pipe, buf);
|
||||
}
|
||||
for (idx = 0; idx < nbuf; idx++)
|
||||
pipe_buf_release(pipe, &bufs[idx]);
|
||||
|
||||
out:
|
||||
kfree(bufs);
|
||||
return ret;
|
||||
|
|
|
@ -954,30 +954,6 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t le
|
|||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t gfs2_file_splice_read(struct file *in, loff_t *ppos,
|
||||
struct pipe_inode_info *pipe, size_t len,
|
||||
unsigned int flags)
|
||||
{
|
||||
struct inode *inode = in->f_mapping->host;
|
||||
struct gfs2_inode *ip = GFS2_I(inode);
|
||||
struct gfs2_holder gh;
|
||||
int ret;
|
||||
|
||||
inode_lock(inode);
|
||||
|
||||
ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
|
||||
if (ret) {
|
||||
inode_unlock(inode);
|
||||
return ret;
|
||||
}
|
||||
|
||||
gfs2_glock_dq_uninit(&gh);
|
||||
inode_unlock(inode);
|
||||
|
||||
return generic_file_splice_read(in, ppos, pipe, len, flags);
|
||||
}
|
||||
|
||||
|
||||
static ssize_t gfs2_file_splice_write(struct pipe_inode_info *pipe,
|
||||
struct file *out, loff_t *ppos,
|
||||
size_t len, unsigned int flags)
|
||||
|
@ -1140,7 +1116,7 @@ const struct file_operations gfs2_file_fops = {
|
|||
.fsync = gfs2_fsync,
|
||||
.lock = gfs2_lock,
|
||||
.flock = gfs2_flock,
|
||||
.splice_read = gfs2_file_splice_read,
|
||||
.splice_read = generic_file_splice_read,
|
||||
.splice_write = gfs2_file_splice_write,
|
||||
.setlease = simple_nosetlease,
|
||||
.fallocate = gfs2_fallocate,
|
||||
|
@ -1168,7 +1144,7 @@ const struct file_operations gfs2_file_fops_nolock = {
|
|||
.open = gfs2_open,
|
||||
.release = gfs2_release,
|
||||
.fsync = gfs2_fsync,
|
||||
.splice_read = gfs2_file_splice_read,
|
||||
.splice_read = generic_file_splice_read,
|
||||
.splice_write = gfs2_file_splice_write,
|
||||
.setlease = generic_setlease,
|
||||
.fallocate = gfs2_fallocate,
|
||||
|
|
|
@ -182,29 +182,6 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(nfs_file_read);
|
||||
|
||||
ssize_t
|
||||
nfs_file_splice_read(struct file *filp, loff_t *ppos,
|
||||
struct pipe_inode_info *pipe, size_t count,
|
||||
unsigned int flags)
|
||||
{
|
||||
struct inode *inode = file_inode(filp);
|
||||
ssize_t res;
|
||||
|
||||
dprintk("NFS: splice_read(%pD2, %lu@%Lu)\n",
|
||||
filp, (unsigned long) count, (unsigned long long) *ppos);
|
||||
|
||||
nfs_start_io_read(inode);
|
||||
res = nfs_revalidate_mapping(inode, filp->f_mapping);
|
||||
if (!res) {
|
||||
res = generic_file_splice_read(filp, ppos, pipe, count, flags);
|
||||
if (res > 0)
|
||||
nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, res);
|
||||
}
|
||||
nfs_end_io_read(inode);
|
||||
return res;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nfs_file_splice_read);
|
||||
|
||||
int
|
||||
nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
|
||||
{
|
||||
|
@ -871,7 +848,7 @@ const struct file_operations nfs_file_operations = {
|
|||
.fsync = nfs_file_fsync,
|
||||
.lock = nfs_lock,
|
||||
.flock = nfs_flock,
|
||||
.splice_read = nfs_file_splice_read,
|
||||
.splice_read = generic_file_splice_read,
|
||||
.splice_write = iter_file_splice_write,
|
||||
.check_flags = nfs_check_flags,
|
||||
.setlease = simple_nosetlease,
|
||||
|
|
|
@ -365,8 +365,6 @@ int nfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *)
|
|||
int nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync);
|
||||
loff_t nfs_file_llseek(struct file *, loff_t, int);
|
||||
ssize_t nfs_file_read(struct kiocb *, struct iov_iter *);
|
||||
ssize_t nfs_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *,
|
||||
size_t, unsigned int);
|
||||
int nfs_file_mmap(struct file *, struct vm_area_struct *);
|
||||
ssize_t nfs_file_write(struct kiocb *, struct iov_iter *);
|
||||
int nfs_file_release(struct inode *, struct file *);
|
||||
|
|
|
@ -248,7 +248,7 @@ const struct file_operations nfs4_file_operations = {
|
|||
.fsync = nfs_file_fsync,
|
||||
.lock = nfs_lock,
|
||||
.flock = nfs_flock,
|
||||
.splice_read = nfs_file_splice_read,
|
||||
.splice_read = generic_file_splice_read,
|
||||
.splice_write = iter_file_splice_write,
|
||||
.check_flags = nfs_check_flags,
|
||||
.setlease = simple_nosetlease,
|
||||
|
|
|
@ -2321,36 +2321,6 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
|
|||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t ocfs2_file_splice_read(struct file *in,
|
||||
loff_t *ppos,
|
||||
struct pipe_inode_info *pipe,
|
||||
size_t len,
|
||||
unsigned int flags)
|
||||
{
|
||||
int ret = 0, lock_level = 0;
|
||||
struct inode *inode = file_inode(in);
|
||||
|
||||
trace_ocfs2_file_splice_read(inode, in, in->f_path.dentry,
|
||||
(unsigned long long)OCFS2_I(inode)->ip_blkno,
|
||||
in->f_path.dentry->d_name.len,
|
||||
in->f_path.dentry->d_name.name, len);
|
||||
|
||||
/*
|
||||
* See the comment in ocfs2_file_read_iter()
|
||||
*/
|
||||
ret = ocfs2_inode_lock_atime(inode, in->f_path.mnt, &lock_level);
|
||||
if (ret < 0) {
|
||||
mlog_errno(ret);
|
||||
goto bail;
|
||||
}
|
||||
ocfs2_inode_unlock(inode, lock_level);
|
||||
|
||||
ret = generic_file_splice_read(in, ppos, pipe, len, flags);
|
||||
|
||||
bail:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
|
||||
struct iov_iter *to)
|
||||
{
|
||||
|
@ -2509,7 +2479,7 @@ const struct file_operations ocfs2_fops = {
|
|||
#endif
|
||||
.lock = ocfs2_lock,
|
||||
.flock = ocfs2_flock,
|
||||
.splice_read = ocfs2_file_splice_read,
|
||||
.splice_read = generic_file_splice_read,
|
||||
.splice_write = iter_file_splice_write,
|
||||
.fallocate = ocfs2_fallocate,
|
||||
};
|
||||
|
@ -2554,7 +2524,7 @@ const struct file_operations ocfs2_fops_no_plocks = {
|
|||
.compat_ioctl = ocfs2_compat_ioctl,
|
||||
#endif
|
||||
.flock = ocfs2_flock,
|
||||
.splice_read = ocfs2_file_splice_read,
|
||||
.splice_read = generic_file_splice_read,
|
||||
.splice_write = iter_file_splice_write,
|
||||
.fallocate = ocfs2_fallocate,
|
||||
};
|
||||
|
|
|
@ -1314,8 +1314,6 @@ DEFINE_OCFS2_FILE_OPS(ocfs2_file_aio_write);
|
|||
|
||||
DEFINE_OCFS2_FILE_OPS(ocfs2_file_splice_write);
|
||||
|
||||
DEFINE_OCFS2_FILE_OPS(ocfs2_file_splice_read);
|
||||
|
||||
DEFINE_OCFS2_FILE_OPS(ocfs2_file_aio_read);
|
||||
|
||||
DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_truncate_file);
|
||||
|
|
13
fs/pipe.c
13
fs/pipe.c
|
@ -267,7 +267,6 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
|
|||
if (bufs) {
|
||||
int curbuf = pipe->curbuf;
|
||||
struct pipe_buffer *buf = pipe->bufs + curbuf;
|
||||
const struct pipe_buf_operations *ops = buf->ops;
|
||||
size_t chars = buf->len;
|
||||
size_t written;
|
||||
int error;
|
||||
|
@ -275,7 +274,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
|
|||
if (chars > total_len)
|
||||
chars = total_len;
|
||||
|
||||
error = ops->confirm(pipe, buf);
|
||||
error = pipe_buf_confirm(pipe, buf);
|
||||
if (error) {
|
||||
if (!ret)
|
||||
ret = error;
|
||||
|
@ -299,8 +298,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
|
|||
}
|
||||
|
||||
if (!buf->len) {
|
||||
buf->ops = NULL;
|
||||
ops->release(pipe, buf);
|
||||
pipe_buf_release(pipe, buf);
|
||||
curbuf = (curbuf + 1) & (pipe->buffers - 1);
|
||||
pipe->curbuf = curbuf;
|
||||
pipe->nrbufs = --bufs;
|
||||
|
@ -383,11 +381,10 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
|
|||
int lastbuf = (pipe->curbuf + pipe->nrbufs - 1) &
|
||||
(pipe->buffers - 1);
|
||||
struct pipe_buffer *buf = pipe->bufs + lastbuf;
|
||||
const struct pipe_buf_operations *ops = buf->ops;
|
||||
int offset = buf->offset + buf->len;
|
||||
|
||||
if (ops->can_merge && offset + chars <= PAGE_SIZE) {
|
||||
ret = ops->confirm(pipe, buf);
|
||||
if (buf->ops->can_merge && offset + chars <= PAGE_SIZE) {
|
||||
ret = pipe_buf_confirm(pipe, buf);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
|
@ -664,7 +661,7 @@ void free_pipe_info(struct pipe_inode_info *pipe)
|
|||
for (i = 0; i < pipe->buffers; i++) {
|
||||
struct pipe_buffer *buf = pipe->bufs + i;
|
||||
if (buf->ops)
|
||||
buf->ops->release(pipe, buf);
|
||||
pipe_buf_release(pipe, buf);
|
||||
}
|
||||
if (pipe->tmp_page)
|
||||
__free_page(pipe->tmp_page);
|
||||
|
|
687
fs/splice.c
687
fs/splice.c
|
@ -183,82 +183,39 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
|
|||
struct splice_pipe_desc *spd)
|
||||
{
|
||||
unsigned int spd_pages = spd->nr_pages;
|
||||
int ret, do_wakeup, page_nr;
|
||||
int ret = 0, page_nr = 0;
|
||||
|
||||
if (!spd_pages)
|
||||
return 0;
|
||||
|
||||
ret = 0;
|
||||
do_wakeup = 0;
|
||||
page_nr = 0;
|
||||
|
||||
pipe_lock(pipe);
|
||||
|
||||
for (;;) {
|
||||
if (!pipe->readers) {
|
||||
send_sig(SIGPIPE, current, 0);
|
||||
if (!ret)
|
||||
ret = -EPIPE;
|
||||
break;
|
||||
}
|
||||
|
||||
if (pipe->nrbufs < pipe->buffers) {
|
||||
int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
|
||||
struct pipe_buffer *buf = pipe->bufs + newbuf;
|
||||
|
||||
buf->page = spd->pages[page_nr];
|
||||
buf->offset = spd->partial[page_nr].offset;
|
||||
buf->len = spd->partial[page_nr].len;
|
||||
buf->private = spd->partial[page_nr].private;
|
||||
buf->ops = spd->ops;
|
||||
if (spd->flags & SPLICE_F_GIFT)
|
||||
buf->flags |= PIPE_BUF_FLAG_GIFT;
|
||||
|
||||
pipe->nrbufs++;
|
||||
page_nr++;
|
||||
ret += buf->len;
|
||||
|
||||
if (pipe->files)
|
||||
do_wakeup = 1;
|
||||
|
||||
if (!--spd->nr_pages)
|
||||
break;
|
||||
if (pipe->nrbufs < pipe->buffers)
|
||||
continue;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
if (spd->flags & SPLICE_F_NONBLOCK) {
|
||||
if (!ret)
|
||||
ret = -EAGAIN;
|
||||
break;
|
||||
}
|
||||
|
||||
if (signal_pending(current)) {
|
||||
if (!ret)
|
||||
ret = -ERESTARTSYS;
|
||||
break;
|
||||
}
|
||||
|
||||
if (do_wakeup) {
|
||||
smp_mb();
|
||||
if (waitqueue_active(&pipe->wait))
|
||||
wake_up_interruptible_sync(&pipe->wait);
|
||||
kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
|
||||
do_wakeup = 0;
|
||||
}
|
||||
|
||||
pipe->waiting_writers++;
|
||||
pipe_wait(pipe);
|
||||
pipe->waiting_writers--;
|
||||
if (unlikely(!pipe->readers)) {
|
||||
send_sig(SIGPIPE, current, 0);
|
||||
ret = -EPIPE;
|
||||
goto out;
|
||||
}
|
||||
|
||||
pipe_unlock(pipe);
|
||||
while (pipe->nrbufs < pipe->buffers) {
|
||||
int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
|
||||
struct pipe_buffer *buf = pipe->bufs + newbuf;
|
||||
|
||||
if (do_wakeup)
|
||||
wakeup_pipe_readers(pipe);
|
||||
buf->page = spd->pages[page_nr];
|
||||
buf->offset = spd->partial[page_nr].offset;
|
||||
buf->len = spd->partial[page_nr].len;
|
||||
buf->private = spd->partial[page_nr].private;
|
||||
buf->ops = spd->ops;
|
||||
|
||||
pipe->nrbufs++;
|
||||
page_nr++;
|
||||
ret += buf->len;
|
||||
|
||||
if (!--spd->nr_pages)
|
||||
break;
|
||||
}
|
||||
|
||||
if (!ret)
|
||||
ret = -EAGAIN;
|
||||
|
||||
out:
|
||||
while (page_nr < spd_pages)
|
||||
spd->spd_release(spd, page_nr++);
|
||||
|
||||
|
@ -266,6 +223,26 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(splice_to_pipe);
|
||||
|
||||
ssize_t add_to_pipe(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (unlikely(!pipe->readers)) {
|
||||
send_sig(SIGPIPE, current, 0);
|
||||
ret = -EPIPE;
|
||||
} else if (pipe->nrbufs == pipe->buffers) {
|
||||
ret = -EAGAIN;
|
||||
} else {
|
||||
int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
|
||||
pipe->bufs[newbuf] = *buf;
|
||||
pipe->nrbufs++;
|
||||
return buf->len;
|
||||
}
|
||||
pipe_buf_release(pipe, buf);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(add_to_pipe);
|
||||
|
||||
void spd_release_page(struct splice_pipe_desc *spd, unsigned int i)
|
||||
{
|
||||
put_page(spd->pages[i]);
|
||||
|
@ -303,207 +280,6 @@ void splice_shrink_spd(struct splice_pipe_desc *spd)
|
|||
kfree(spd->partial);
|
||||
}
|
||||
|
||||
static int
|
||||
__generic_file_splice_read(struct file *in, loff_t *ppos,
|
||||
struct pipe_inode_info *pipe, size_t len,
|
||||
unsigned int flags)
|
||||
{
|
||||
struct address_space *mapping = in->f_mapping;
|
||||
unsigned int loff, nr_pages, req_pages;
|
||||
struct page *pages[PIPE_DEF_BUFFERS];
|
||||
struct partial_page partial[PIPE_DEF_BUFFERS];
|
||||
struct page *page;
|
||||
pgoff_t index, end_index;
|
||||
loff_t isize;
|
||||
int error, page_nr;
|
||||
struct splice_pipe_desc spd = {
|
||||
.pages = pages,
|
||||
.partial = partial,
|
||||
.nr_pages_max = PIPE_DEF_BUFFERS,
|
||||
.flags = flags,
|
||||
.ops = &page_cache_pipe_buf_ops,
|
||||
.spd_release = spd_release_page,
|
||||
};
|
||||
|
||||
if (splice_grow_spd(pipe, &spd))
|
||||
return -ENOMEM;
|
||||
|
||||
index = *ppos >> PAGE_SHIFT;
|
||||
loff = *ppos & ~PAGE_MASK;
|
||||
req_pages = (len + loff + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
||||
nr_pages = min(req_pages, spd.nr_pages_max);
|
||||
|
||||
/*
|
||||
* Lookup the (hopefully) full range of pages we need.
|
||||
*/
|
||||
spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, spd.pages);
|
||||
index += spd.nr_pages;
|
||||
|
||||
/*
|
||||
* If find_get_pages_contig() returned fewer pages than we needed,
|
||||
* readahead/allocate the rest and fill in the holes.
|
||||
*/
|
||||
if (spd.nr_pages < nr_pages)
|
||||
page_cache_sync_readahead(mapping, &in->f_ra, in,
|
||||
index, req_pages - spd.nr_pages);
|
||||
|
||||
error = 0;
|
||||
while (spd.nr_pages < nr_pages) {
|
||||
/*
|
||||
* Page could be there, find_get_pages_contig() breaks on
|
||||
* the first hole.
|
||||
*/
|
||||
page = find_get_page(mapping, index);
|
||||
if (!page) {
|
||||
/*
|
||||
* page didn't exist, allocate one.
|
||||
*/
|
||||
page = page_cache_alloc_cold(mapping);
|
||||
if (!page)
|
||||
break;
|
||||
|
||||
error = add_to_page_cache_lru(page, mapping, index,
|
||||
mapping_gfp_constraint(mapping, GFP_KERNEL));
|
||||
if (unlikely(error)) {
|
||||
put_page(page);
|
||||
if (error == -EEXIST)
|
||||
continue;
|
||||
break;
|
||||
}
|
||||
/*
|
||||
* add_to_page_cache() locks the page, unlock it
|
||||
* to avoid convoluting the logic below even more.
|
||||
*/
|
||||
unlock_page(page);
|
||||
}
|
||||
|
||||
spd.pages[spd.nr_pages++] = page;
|
||||
index++;
|
||||
}
|
||||
|
||||
/*
|
||||
* Now loop over the map and see if we need to start IO on any
|
||||
* pages, fill in the partial map, etc.
|
||||
*/
|
||||
index = *ppos >> PAGE_SHIFT;
|
||||
nr_pages = spd.nr_pages;
|
||||
spd.nr_pages = 0;
|
||||
for (page_nr = 0; page_nr < nr_pages; page_nr++) {
|
||||
unsigned int this_len;
|
||||
|
||||
if (!len)
|
||||
break;
|
||||
|
||||
/*
|
||||
* this_len is the max we'll use from this page
|
||||
*/
|
||||
this_len = min_t(unsigned long, len, PAGE_SIZE - loff);
|
||||
page = spd.pages[page_nr];
|
||||
|
||||
if (PageReadahead(page))
|
||||
page_cache_async_readahead(mapping, &in->f_ra, in,
|
||||
page, index, req_pages - page_nr);
|
||||
|
||||
/*
|
||||
* If the page isn't uptodate, we may need to start io on it
|
||||
*/
|
||||
if (!PageUptodate(page)) {
|
||||
lock_page(page);
|
||||
|
||||
/*
|
||||
* Page was truncated, or invalidated by the
|
||||
* filesystem. Redo the find/create, but this time the
|
||||
* page is kept locked, so there's no chance of another
|
||||
* race with truncate/invalidate.
|
||||
*/
|
||||
if (!page->mapping) {
|
||||
unlock_page(page);
|
||||
retry_lookup:
|
||||
page = find_or_create_page(mapping, index,
|
||||
mapping_gfp_mask(mapping));
|
||||
|
||||
if (!page) {
|
||||
error = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
put_page(spd.pages[page_nr]);
|
||||
spd.pages[page_nr] = page;
|
||||
}
|
||||
/*
|
||||
* page was already under io and is now done, great
|
||||
*/
|
||||
if (PageUptodate(page)) {
|
||||
unlock_page(page);
|
||||
goto fill_it;
|
||||
}
|
||||
|
||||
/*
|
||||
* need to read in the page
|
||||
*/
|
||||
error = mapping->a_ops->readpage(in, page);
|
||||
if (unlikely(error)) {
|
||||
/*
|
||||
* Re-lookup the page
|
||||
*/
|
||||
if (error == AOP_TRUNCATED_PAGE)
|
||||
goto retry_lookup;
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
fill_it:
|
||||
/*
|
||||
* i_size must be checked after PageUptodate.
|
||||
*/
|
||||
isize = i_size_read(mapping->host);
|
||||
end_index = (isize - 1) >> PAGE_SHIFT;
|
||||
if (unlikely(!isize || index > end_index))
|
||||
break;
|
||||
|
||||
/*
|
||||
* if this is the last page, see if we need to shrink
|
||||
* the length and stop
|
||||
*/
|
||||
if (end_index == index) {
|
||||
unsigned int plen;
|
||||
|
||||
/*
|
||||
* max good bytes in this page
|
||||
*/
|
||||
plen = ((isize - 1) & ~PAGE_MASK) + 1;
|
||||
if (plen <= loff)
|
||||
break;
|
||||
|
||||
/*
|
||||
* force quit after adding this page
|
||||
*/
|
||||
this_len = min(this_len, plen - loff);
|
||||
len = this_len;
|
||||
}
|
||||
|
||||
spd.partial[page_nr].offset = loff;
|
||||
spd.partial[page_nr].len = this_len;
|
||||
len -= this_len;
|
||||
loff = 0;
|
||||
spd.nr_pages++;
|
||||
index++;
|
||||
}
|
||||
|
||||
/*
|
||||
* Release any pages at the end, if we quit early. 'page_nr' is how far
|
||||
* we got, 'nr_pages' is how many pages are in the map.
|
||||
*/
|
||||
while (page_nr < nr_pages)
|
||||
put_page(spd.pages[page_nr++]);
|
||||
in->f_ra.prev_pos = (loff_t)index << PAGE_SHIFT;
|
||||
|
||||
if (spd.nr_pages)
|
||||
error = splice_to_pipe(pipe, &spd);
|
||||
|
||||
splice_shrink_spd(&spd);
|
||||
return error;
|
||||
}
|
||||
|
||||
/**
|
||||
* generic_file_splice_read - splice data from file to a pipe
|
||||
* @in: file to splice from
|
||||
|
@ -514,39 +290,53 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
|
|||
*
|
||||
* Description:
|
||||
* Will read pages from given file and fill them into a pipe. Can be
|
||||
* used as long as the address_space operations for the source implements
|
||||
* a readpage() hook.
|
||||
* used as long as it has more or less sane ->read_iter().
|
||||
*
|
||||
*/
|
||||
ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
|
||||
struct pipe_inode_info *pipe, size_t len,
|
||||
unsigned int flags)
|
||||
{
|
||||
loff_t isize, left;
|
||||
int ret;
|
||||
|
||||
if (IS_DAX(in->f_mapping->host))
|
||||
return default_file_splice_read(in, ppos, pipe, len, flags);
|
||||
struct iov_iter to;
|
||||
struct kiocb kiocb;
|
||||
loff_t isize;
|
||||
int idx, ret;
|
||||
|
||||
isize = i_size_read(in->f_mapping->host);
|
||||
if (unlikely(*ppos >= isize))
|
||||
return 0;
|
||||
|
||||
left = isize - *ppos;
|
||||
if (unlikely(left < len))
|
||||
len = left;
|
||||
|
||||
ret = __generic_file_splice_read(in, ppos, pipe, len, flags);
|
||||
iov_iter_pipe(&to, ITER_PIPE | READ, pipe, len);
|
||||
idx = to.idx;
|
||||
init_sync_kiocb(&kiocb, in);
|
||||
kiocb.ki_pos = *ppos;
|
||||
ret = in->f_op->read_iter(&kiocb, &to);
|
||||
if (ret > 0) {
|
||||
*ppos += ret;
|
||||
*ppos = kiocb.ki_pos;
|
||||
file_accessed(in);
|
||||
} else if (ret < 0) {
|
||||
if (WARN_ON(to.idx != idx || to.iov_offset)) {
|
||||
/*
|
||||
* a bogus ->read_iter() has copied something and still
|
||||
* returned an error instead of a short read.
|
||||
*/
|
||||
to.idx = idx;
|
||||
to.iov_offset = 0;
|
||||
iov_iter_advance(&to, 0); /* to free what was emitted */
|
||||
}
|
||||
/*
|
||||
* callers of ->splice_read() expect -EAGAIN on
|
||||
* "can't put anything in there", rather than -EFAULT.
|
||||
*/
|
||||
if (ret == -EFAULT)
|
||||
ret = -EAGAIN;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(generic_file_splice_read);
|
||||
|
||||
static const struct pipe_buf_operations default_pipe_buf_ops = {
|
||||
const struct pipe_buf_operations default_pipe_buf_ops = {
|
||||
.can_merge = 0,
|
||||
.confirm = generic_pipe_buf_confirm,
|
||||
.release = generic_pipe_buf_release,
|
||||
|
@ -570,7 +360,7 @@ const struct pipe_buf_operations nosteal_pipe_buf_ops = {
|
|||
};
|
||||
EXPORT_SYMBOL(nosteal_pipe_buf_ops);
|
||||
|
||||
static ssize_t kernel_readv(struct file *file, const struct iovec *vec,
|
||||
static ssize_t kernel_readv(struct file *file, const struct kvec *vec,
|
||||
unsigned long vlen, loff_t offset)
|
||||
{
|
||||
mm_segment_t old_fs;
|
||||
|
@ -602,102 +392,70 @@ ssize_t kernel_write(struct file *file, const char *buf, size_t count,
|
|||
}
|
||||
EXPORT_SYMBOL(kernel_write);
|
||||
|
||||
ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
|
||||
static ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
|
||||
struct pipe_inode_info *pipe, size_t len,
|
||||
unsigned int flags)
|
||||
{
|
||||
struct kvec *vec, __vec[PIPE_DEF_BUFFERS];
|
||||
struct iov_iter to;
|
||||
struct page **pages;
|
||||
unsigned int nr_pages;
|
||||
unsigned int nr_freed;
|
||||
size_t offset;
|
||||
struct page *pages[PIPE_DEF_BUFFERS];
|
||||
struct partial_page partial[PIPE_DEF_BUFFERS];
|
||||
struct iovec *vec, __vec[PIPE_DEF_BUFFERS];
|
||||
size_t offset, dummy, copied = 0;
|
||||
ssize_t res;
|
||||
size_t this_len;
|
||||
int error;
|
||||
int i;
|
||||
struct splice_pipe_desc spd = {
|
||||
.pages = pages,
|
||||
.partial = partial,
|
||||
.nr_pages_max = PIPE_DEF_BUFFERS,
|
||||
.flags = flags,
|
||||
.ops = &default_pipe_buf_ops,
|
||||
.spd_release = spd_release_page,
|
||||
};
|
||||
|
||||
if (splice_grow_spd(pipe, &spd))
|
||||
if (pipe->nrbufs == pipe->buffers)
|
||||
return -EAGAIN;
|
||||
|
||||
/*
|
||||
* Try to keep page boundaries matching to source pagecache ones -
|
||||
* it probably won't be much help, but...
|
||||
*/
|
||||
offset = *ppos & ~PAGE_MASK;
|
||||
|
||||
iov_iter_pipe(&to, ITER_PIPE | READ, pipe, len + offset);
|
||||
|
||||
res = iov_iter_get_pages_alloc(&to, &pages, len + offset, &dummy);
|
||||
if (res <= 0)
|
||||
return -ENOMEM;
|
||||
|
||||
res = -ENOMEM;
|
||||
nr_pages = res / PAGE_SIZE;
|
||||
|
||||
vec = __vec;
|
||||
if (spd.nr_pages_max > PIPE_DEF_BUFFERS) {
|
||||
vec = kmalloc(spd.nr_pages_max * sizeof(struct iovec), GFP_KERNEL);
|
||||
if (!vec)
|
||||
goto shrink_ret;
|
||||
if (nr_pages > PIPE_DEF_BUFFERS) {
|
||||
vec = kmalloc(nr_pages * sizeof(struct kvec), GFP_KERNEL);
|
||||
if (unlikely(!vec)) {
|
||||
res = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
offset = *ppos & ~PAGE_MASK;
|
||||
nr_pages = (len + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
||||
pipe->bufs[to.idx].offset = offset;
|
||||
pipe->bufs[to.idx].len -= offset;
|
||||
|
||||
for (i = 0; i < nr_pages && i < spd.nr_pages_max && len; i++) {
|
||||
struct page *page;
|
||||
|
||||
page = alloc_page(GFP_USER);
|
||||
error = -ENOMEM;
|
||||
if (!page)
|
||||
goto err;
|
||||
|
||||
this_len = min_t(size_t, len, PAGE_SIZE - offset);
|
||||
vec[i].iov_base = (void __user *) page_address(page);
|
||||
for (i = 0; i < nr_pages; i++) {
|
||||
size_t this_len = min_t(size_t, len, PAGE_SIZE - offset);
|
||||
vec[i].iov_base = page_address(pages[i]) + offset;
|
||||
vec[i].iov_len = this_len;
|
||||
spd.pages[i] = page;
|
||||
spd.nr_pages++;
|
||||
len -= this_len;
|
||||
offset = 0;
|
||||
}
|
||||
|
||||
res = kernel_readv(in, vec, spd.nr_pages, *ppos);
|
||||
if (res < 0) {
|
||||
error = res;
|
||||
goto err;
|
||||
}
|
||||
|
||||
error = 0;
|
||||
if (!res)
|
||||
goto err;
|
||||
|
||||
nr_freed = 0;
|
||||
for (i = 0; i < spd.nr_pages; i++) {
|
||||
this_len = min_t(size_t, vec[i].iov_len, res);
|
||||
spd.partial[i].offset = 0;
|
||||
spd.partial[i].len = this_len;
|
||||
if (!this_len) {
|
||||
__free_page(spd.pages[i]);
|
||||
spd.pages[i] = NULL;
|
||||
nr_freed++;
|
||||
}
|
||||
res -= this_len;
|
||||
}
|
||||
spd.nr_pages -= nr_freed;
|
||||
|
||||
res = splice_to_pipe(pipe, &spd);
|
||||
if (res > 0)
|
||||
res = kernel_readv(in, vec, nr_pages, *ppos);
|
||||
if (res > 0) {
|
||||
copied = res;
|
||||
*ppos += res;
|
||||
}
|
||||
|
||||
shrink_ret:
|
||||
if (vec != __vec)
|
||||
kfree(vec);
|
||||
splice_shrink_spd(&spd);
|
||||
out:
|
||||
for (i = 0; i < nr_pages; i++)
|
||||
put_page(pages[i]);
|
||||
kvfree(pages);
|
||||
iov_iter_advance(&to, copied); /* truncates and discards */
|
||||
return res;
|
||||
|
||||
err:
|
||||
for (i = 0; i < spd.nr_pages; i++)
|
||||
__free_page(spd.pages[i]);
|
||||
|
||||
res = error;
|
||||
goto shrink_ret;
|
||||
}
|
||||
EXPORT_SYMBOL(default_file_splice_read);
|
||||
|
||||
/*
|
||||
* Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos'
|
||||
|
@ -757,13 +515,12 @@ static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_des
|
|||
|
||||
while (pipe->nrbufs) {
|
||||
struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
|
||||
const struct pipe_buf_operations *ops = buf->ops;
|
||||
|
||||
sd->len = buf->len;
|
||||
if (sd->len > sd->total_len)
|
||||
sd->len = sd->total_len;
|
||||
|
||||
ret = buf->ops->confirm(pipe, buf);
|
||||
ret = pipe_buf_confirm(pipe, buf);
|
||||
if (unlikely(ret)) {
|
||||
if (ret == -ENODATA)
|
||||
ret = 0;
|
||||
|
@ -783,8 +540,7 @@ static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_des
|
|||
sd->total_len -= ret;
|
||||
|
||||
if (!buf->len) {
|
||||
buf->ops = NULL;
|
||||
ops->release(pipe, buf);
|
||||
pipe_buf_release(pipe, buf);
|
||||
pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
|
||||
pipe->nrbufs--;
|
||||
if (pipe->files)
|
||||
|
@ -1003,7 +759,7 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
|
|||
if (idx == pipe->buffers - 1)
|
||||
idx = -1;
|
||||
|
||||
ret = buf->ops->confirm(pipe, buf);
|
||||
ret = pipe_buf_confirm(pipe, buf);
|
||||
if (unlikely(ret)) {
|
||||
if (ret == -ENODATA)
|
||||
ret = 0;
|
||||
|
@ -1030,11 +786,9 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
|
|||
while (ret) {
|
||||
struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
|
||||
if (ret >= buf->len) {
|
||||
const struct pipe_buf_operations *ops = buf->ops;
|
||||
ret -= buf->len;
|
||||
buf->len = 0;
|
||||
buf->ops = NULL;
|
||||
ops->release(pipe, buf);
|
||||
pipe_buf_release(pipe, buf);
|
||||
pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
|
||||
pipe->nrbufs--;
|
||||
if (pipe->files)
|
||||
|
@ -1273,10 +1027,8 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
|
|||
for (i = 0; i < pipe->buffers; i++) {
|
||||
struct pipe_buffer *buf = pipe->bufs + i;
|
||||
|
||||
if (buf->ops) {
|
||||
buf->ops->release(pipe, buf);
|
||||
buf->ops = NULL;
|
||||
}
|
||||
if (buf->ops)
|
||||
pipe_buf_release(pipe, buf);
|
||||
}
|
||||
|
||||
if (!bytes)
|
||||
|
@ -1342,6 +1094,20 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
|
|||
}
|
||||
EXPORT_SYMBOL(do_splice_direct);
|
||||
|
||||
static int wait_for_space(struct pipe_inode_info *pipe, unsigned flags)
|
||||
{
|
||||
while (pipe->nrbufs == pipe->buffers) {
|
||||
if (flags & SPLICE_F_NONBLOCK)
|
||||
return -EAGAIN;
|
||||
if (signal_pending(current))
|
||||
return -ERESTARTSYS;
|
||||
pipe->waiting_writers++;
|
||||
pipe_wait(pipe);
|
||||
pipe->waiting_writers--;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
|
||||
struct pipe_inode_info *opipe,
|
||||
size_t len, unsigned int flags);
|
||||
|
@ -1424,8 +1190,13 @@ static long do_splice(struct file *in, loff_t __user *off_in,
|
|||
offset = in->f_pos;
|
||||
}
|
||||
|
||||
ret = do_splice_to(in, &offset, opipe, len, flags);
|
||||
|
||||
pipe_lock(opipe);
|
||||
ret = wait_for_space(opipe, flags);
|
||||
if (!ret)
|
||||
ret = do_splice_to(in, &offset, opipe, len, flags);
|
||||
pipe_unlock(opipe);
|
||||
if (ret > 0)
|
||||
wakeup_pipe_readers(opipe);
|
||||
if (!off_in)
|
||||
in->f_pos = offset;
|
||||
else if (copy_to_user(off_in, &offset, sizeof(loff_t)))
|
||||
|
@ -1437,106 +1208,50 @@ static long do_splice(struct file *in, loff_t __user *off_in,
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Map an iov into an array of pages and offset/length tupples. With the
|
||||
* partial_page structure, we can map several non-contiguous ranges into
|
||||
* our ones pages[] map instead of splitting that operation into pieces.
|
||||
* Could easily be exported as a generic helper for other users, in which
|
||||
* case one would probably want to add a 'max_nr_pages' parameter as well.
|
||||
*/
|
||||
static int get_iovec_page_array(const struct iovec __user *iov,
|
||||
unsigned int nr_vecs, struct page **pages,
|
||||
struct partial_page *partial, bool aligned,
|
||||
unsigned int pipe_buffers)
|
||||
static int iter_to_pipe(struct iov_iter *from,
|
||||
struct pipe_inode_info *pipe,
|
||||
unsigned flags)
|
||||
{
|
||||
int buffers = 0, error = 0;
|
||||
struct pipe_buffer buf = {
|
||||
.ops = &user_page_pipe_buf_ops,
|
||||
.flags = flags
|
||||
};
|
||||
size_t total = 0;
|
||||
int ret = 0;
|
||||
bool failed = false;
|
||||
|
||||
while (nr_vecs) {
|
||||
unsigned long off, npages;
|
||||
struct iovec entry;
|
||||
void __user *base;
|
||||
size_t len;
|
||||
int i;
|
||||
while (iov_iter_count(from) && !failed) {
|
||||
struct page *pages[16];
|
||||
ssize_t copied;
|
||||
size_t start;
|
||||
int n;
|
||||
|
||||
error = -EFAULT;
|
||||
if (copy_from_user(&entry, iov, sizeof(entry)))
|
||||
copied = iov_iter_get_pages(from, pages, ~0UL, 16, &start);
|
||||
if (copied <= 0) {
|
||||
ret = copied;
|
||||
break;
|
||||
|
||||
base = entry.iov_base;
|
||||
len = entry.iov_len;
|
||||
|
||||
/*
|
||||
* Sanity check this iovec. 0 read succeeds.
|
||||
*/
|
||||
error = 0;
|
||||
if (unlikely(!len))
|
||||
break;
|
||||
error = -EFAULT;
|
||||
if (!access_ok(VERIFY_READ, base, len))
|
||||
break;
|
||||
|
||||
/*
|
||||
* Get this base offset and number of pages, then map
|
||||
* in the user pages.
|
||||
*/
|
||||
off = (unsigned long) base & ~PAGE_MASK;
|
||||
|
||||
/*
|
||||
* If asked for alignment, the offset must be zero and the
|
||||
* length a multiple of the PAGE_SIZE.
|
||||
*/
|
||||
error = -EINVAL;
|
||||
if (aligned && (off || len & ~PAGE_MASK))
|
||||
break;
|
||||
|
||||
npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
||||
if (npages > pipe_buffers - buffers)
|
||||
npages = pipe_buffers - buffers;
|
||||
|
||||
error = get_user_pages_fast((unsigned long)base, npages,
|
||||
0, &pages[buffers]);
|
||||
|
||||
if (unlikely(error <= 0))
|
||||
break;
|
||||
|
||||
/*
|
||||
* Fill this contiguous range into the partial page map.
|
||||
*/
|
||||
for (i = 0; i < error; i++) {
|
||||
const int plen = min_t(size_t, len, PAGE_SIZE - off);
|
||||
|
||||
partial[buffers].offset = off;
|
||||
partial[buffers].len = plen;
|
||||
|
||||
off = 0;
|
||||
len -= plen;
|
||||
buffers++;
|
||||
}
|
||||
|
||||
/*
|
||||
* We didn't complete this iov, stop here since it probably
|
||||
* means we have to move some of this into a pipe to
|
||||
* be able to continue.
|
||||
*/
|
||||
if (len)
|
||||
break;
|
||||
|
||||
/*
|
||||
* Don't continue if we mapped fewer pages than we asked for,
|
||||
* or if we mapped the max number of pages that we have
|
||||
* room for.
|
||||
*/
|
||||
if (error < npages || buffers == pipe_buffers)
|
||||
break;
|
||||
|
||||
nr_vecs--;
|
||||
iov++;
|
||||
for (n = 0; copied; n++, start = 0) {
|
||||
int size = min_t(int, copied, PAGE_SIZE - start);
|
||||
if (!failed) {
|
||||
buf.page = pages[n];
|
||||
buf.offset = start;
|
||||
buf.len = size;
|
||||
ret = add_to_pipe(pipe, &buf);
|
||||
if (unlikely(ret < 0)) {
|
||||
failed = true;
|
||||
} else {
|
||||
iov_iter_advance(from, ret);
|
||||
total += ret;
|
||||
}
|
||||
} else {
|
||||
put_page(pages[n]);
|
||||
}
|
||||
copied -= size;
|
||||
}
|
||||
}
|
||||
|
||||
if (buffers)
|
||||
return buffers;
|
||||
|
||||
return error;
|
||||
return total ? total : ret;
|
||||
}
|
||||
|
||||
static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
|
||||
|
@ -1590,38 +1305,36 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *uiov,
|
|||
* as splice-from-memory, where the regular splice is splice-from-file (or
|
||||
* to file). In both cases the output is a pipe, naturally.
|
||||
*/
|
||||
static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov,
|
||||
static long vmsplice_to_pipe(struct file *file, const struct iovec __user *uiov,
|
||||
unsigned long nr_segs, unsigned int flags)
|
||||
{
|
||||
struct pipe_inode_info *pipe;
|
||||
struct page *pages[PIPE_DEF_BUFFERS];
|
||||
struct partial_page partial[PIPE_DEF_BUFFERS];
|
||||
struct splice_pipe_desc spd = {
|
||||
.pages = pages,
|
||||
.partial = partial,
|
||||
.nr_pages_max = PIPE_DEF_BUFFERS,
|
||||
.flags = flags,
|
||||
.ops = &user_page_pipe_buf_ops,
|
||||
.spd_release = spd_release_page,
|
||||
};
|
||||
struct iovec iovstack[UIO_FASTIOV];
|
||||
struct iovec *iov = iovstack;
|
||||
struct iov_iter from;
|
||||
long ret;
|
||||
unsigned buf_flag = 0;
|
||||
|
||||
if (flags & SPLICE_F_GIFT)
|
||||
buf_flag = PIPE_BUF_FLAG_GIFT;
|
||||
|
||||
pipe = get_pipe_info(file);
|
||||
if (!pipe)
|
||||
return -EBADF;
|
||||
|
||||
if (splice_grow_spd(pipe, &spd))
|
||||
return -ENOMEM;
|
||||
ret = import_iovec(WRITE, uiov, nr_segs,
|
||||
ARRAY_SIZE(iovstack), &iov, &from);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
spd.nr_pages = get_iovec_page_array(iov, nr_segs, spd.pages,
|
||||
spd.partial, false,
|
||||
spd.nr_pages_max);
|
||||
if (spd.nr_pages <= 0)
|
||||
ret = spd.nr_pages;
|
||||
else
|
||||
ret = splice_to_pipe(pipe, &spd);
|
||||
|
||||
splice_shrink_spd(&spd);
|
||||
pipe_lock(pipe);
|
||||
ret = wait_for_space(pipe, flags);
|
||||
if (!ret)
|
||||
ret = iter_to_pipe(&from, pipe, buf_flag);
|
||||
pipe_unlock(pipe);
|
||||
if (ret > 0)
|
||||
wakeup_pipe_readers(pipe);
|
||||
kfree(iov);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -1876,7 +1589,7 @@ static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
|
|||
* Get a reference to this pipe buffer,
|
||||
* so we can copy the contents over.
|
||||
*/
|
||||
ibuf->ops->get(ipipe, ibuf);
|
||||
pipe_buf_get(ipipe, ibuf);
|
||||
*obuf = *ibuf;
|
||||
|
||||
/*
|
||||
|
@ -1948,7 +1661,7 @@ static int link_pipe(struct pipe_inode_info *ipipe,
|
|||
* Get a reference to this pipe buffer,
|
||||
* so we can copy the contents over.
|
||||
*/
|
||||
ibuf->ops->get(ipipe, ibuf);
|
||||
pipe_buf_get(ipipe, ibuf);
|
||||
|
||||
obuf = opipe->bufs + nbuf;
|
||||
*obuf = *ibuf;
|
||||
|
|
|
@ -393,45 +393,6 @@ xfs_file_read_iter(
|
|||
return ret;
|
||||
}
|
||||
|
||||
STATIC ssize_t
|
||||
xfs_file_splice_read(
|
||||
struct file *infilp,
|
||||
loff_t *ppos,
|
||||
struct pipe_inode_info *pipe,
|
||||
size_t count,
|
||||
unsigned int flags)
|
||||
{
|
||||
struct xfs_inode *ip = XFS_I(infilp->f_mapping->host);
|
||||
ssize_t ret;
|
||||
|
||||
XFS_STATS_INC(ip->i_mount, xs_read_calls);
|
||||
|
||||
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
|
||||
return -EIO;
|
||||
|
||||
trace_xfs_file_splice_read(ip, count, *ppos);
|
||||
|
||||
/*
|
||||
* DAX inodes cannot ues the page cache for splice, so we have to push
|
||||
* them through the VFS IO path. This means it goes through
|
||||
* ->read_iter, which for us takes the XFS_IOLOCK_SHARED. Hence we
|
||||
* cannot lock the splice operation at this level for DAX inodes.
|
||||
*/
|
||||
if (IS_DAX(VFS_I(ip))) {
|
||||
ret = default_file_splice_read(infilp, ppos, pipe, count,
|
||||
flags);
|
||||
goto out;
|
||||
}
|
||||
|
||||
xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
|
||||
ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
|
||||
xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
|
||||
out:
|
||||
if (ret > 0)
|
||||
XFS_STATS_ADD(ip->i_mount, xs_read_bytes, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Zero any on disk space between the current EOF and the new, larger EOF.
|
||||
*
|
||||
|
@ -1608,7 +1569,7 @@ const struct file_operations xfs_file_operations = {
|
|||
.llseek = xfs_file_llseek,
|
||||
.read_iter = xfs_file_read_iter,
|
||||
.write_iter = xfs_file_write_iter,
|
||||
.splice_read = xfs_file_splice_read,
|
||||
.splice_read = generic_file_splice_read,
|
||||
.splice_write = iter_file_splice_write,
|
||||
.unlocked_ioctl = xfs_file_ioctl,
|
||||
#ifdef CONFIG_COMPAT
|
||||
|
|
|
@ -1170,7 +1170,6 @@ DEFINE_RW_EVENT(xfs_file_dax_read);
|
|||
DEFINE_RW_EVENT(xfs_file_buffered_write);
|
||||
DEFINE_RW_EVENT(xfs_file_direct_write);
|
||||
DEFINE_RW_EVENT(xfs_file_dax_write);
|
||||
DEFINE_RW_EVENT(xfs_file_splice_read);
|
||||
|
||||
DECLARE_EVENT_CLASS(xfs_page_class,
|
||||
TP_PROTO(struct inode *inode, struct page *page, unsigned long off,
|
||||
|
|
|
@ -2794,8 +2794,6 @@ extern void block_sync_page(struct page *page);
|
|||
/* fs/splice.c */
|
||||
extern ssize_t generic_file_splice_read(struct file *, loff_t *,
|
||||
struct pipe_inode_info *, size_t, unsigned int);
|
||||
extern ssize_t default_file_splice_read(struct file *, loff_t *,
|
||||
struct pipe_inode_info *, size_t, unsigned int);
|
||||
extern ssize_t iter_file_splice_write(struct pipe_inode_info *,
|
||||
struct file *, loff_t *, size_t, unsigned int);
|
||||
extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe,
|
||||
|
|
|
@ -66,15 +66,10 @@ struct pipe_inode_info {
|
|||
*
|
||||
* ->confirm()
|
||||
* ->steal()
|
||||
* ...
|
||||
* ->map()
|
||||
* ...
|
||||
* ->unmap()
|
||||
*
|
||||
* That is, ->map() must be called on a confirmed buffer,
|
||||
* same goes for ->steal(). See below for the meaning of each
|
||||
* operation. Also see kerneldoc in fs/pipe.c for the pipe
|
||||
* and generic variants of these hooks.
|
||||
* That is, ->steal() must be called on a confirmed buffer.
|
||||
* See below for the meaning of each operation. Also see kerneldoc
|
||||
* in fs/pipe.c for the pipe and generic variants of these hooks.
|
||||
*/
|
||||
struct pipe_buf_operations {
|
||||
/*
|
||||
|
@ -115,6 +110,53 @@ struct pipe_buf_operations {
|
|||
void (*get)(struct pipe_inode_info *, struct pipe_buffer *);
|
||||
};
|
||||
|
||||
/**
|
||||
* pipe_buf_get - get a reference to a pipe_buffer
|
||||
* @pipe: the pipe that the buffer belongs to
|
||||
* @buf: the buffer to get a reference to
|
||||
*/
|
||||
static inline void pipe_buf_get(struct pipe_inode_info *pipe,
|
||||
struct pipe_buffer *buf)
|
||||
{
|
||||
buf->ops->get(pipe, buf);
|
||||
}
|
||||
|
||||
/**
|
||||
* pipe_buf_release - put a reference to a pipe_buffer
|
||||
* @pipe: the pipe that the buffer belongs to
|
||||
* @buf: the buffer to put a reference to
|
||||
*/
|
||||
static inline void pipe_buf_release(struct pipe_inode_info *pipe,
|
||||
struct pipe_buffer *buf)
|
||||
{
|
||||
const struct pipe_buf_operations *ops = buf->ops;
|
||||
|
||||
buf->ops = NULL;
|
||||
ops->release(pipe, buf);
|
||||
}
|
||||
|
||||
/**
|
||||
* pipe_buf_confirm - verify contents of the pipe buffer
|
||||
* @pipe: the pipe that the buffer belongs to
|
||||
* @buf: the buffer to confirm
|
||||
*/
|
||||
static inline int pipe_buf_confirm(struct pipe_inode_info *pipe,
|
||||
struct pipe_buffer *buf)
|
||||
{
|
||||
return buf->ops->confirm(pipe, buf);
|
||||
}
|
||||
|
||||
/**
|
||||
* pipe_buf_steal - attempt to take ownership of a pipe_buffer
|
||||
* @pipe: the pipe that the buffer belongs to
|
||||
* @buf: the buffer to attempt to steal
|
||||
*/
|
||||
static inline int pipe_buf_steal(struct pipe_inode_info *pipe,
|
||||
struct pipe_buffer *buf)
|
||||
{
|
||||
return buf->ops->steal(pipe, buf);
|
||||
}
|
||||
|
||||
/* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual
|
||||
memory allocation, whereas PIPE_BUF makes atomicity guarantees. */
|
||||
#define PIPE_SIZE PAGE_SIZE
|
||||
|
@ -129,7 +171,6 @@ extern unsigned long pipe_user_pages_hard;
|
|||
extern unsigned long pipe_user_pages_soft;
|
||||
int pipe_proc_fn(struct ctl_table *, int, void __user *, size_t *, loff_t *);
|
||||
|
||||
|
||||
/* Drop the inode semaphore and wait for a pipe event, atomically */
|
||||
void pipe_wait(struct pipe_inode_info *pipe);
|
||||
|
||||
|
|
|
@ -3064,15 +3064,9 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len);
|
|||
int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len);
|
||||
__wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to,
|
||||
int len, __wsum csum);
|
||||
ssize_t skb_socket_splice(struct sock *sk,
|
||||
struct pipe_inode_info *pipe,
|
||||
struct splice_pipe_desc *spd);
|
||||
int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
|
||||
struct pipe_inode_info *pipe, unsigned int len,
|
||||
unsigned int flags,
|
||||
ssize_t (*splice_cb)(struct sock *,
|
||||
struct pipe_inode_info *,
|
||||
struct splice_pipe_desc *));
|
||||
unsigned int flags);
|
||||
void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
|
||||
unsigned int skb_zerocopy_headlen(const struct sk_buff *from);
|
||||
int skb_zerocopy(struct sk_buff *to, struct sk_buff *from,
|
||||
|
|
|
@ -72,6 +72,8 @@ extern ssize_t __splice_from_pipe(struct pipe_inode_info *,
|
|||
struct splice_desc *, splice_actor *);
|
||||
extern ssize_t splice_to_pipe(struct pipe_inode_info *,
|
||||
struct splice_pipe_desc *);
|
||||
extern ssize_t add_to_pipe(struct pipe_inode_info *,
|
||||
struct pipe_buffer *);
|
||||
extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
|
||||
splice_direct_actor *);
|
||||
|
||||
|
@ -83,4 +85,5 @@ extern void splice_shrink_spd(struct splice_pipe_desc *);
|
|||
extern void spd_release_page(struct splice_pipe_desc *, unsigned int);
|
||||
|
||||
extern const struct pipe_buf_operations page_cache_pipe_buf_ops;
|
||||
extern const struct pipe_buf_operations default_pipe_buf_ops;
|
||||
#endif
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
#include <uapi/linux/uio.h>
|
||||
|
||||
struct page;
|
||||
struct pipe_inode_info;
|
||||
|
||||
struct kvec {
|
||||
void *iov_base; /* and that should *never* hold a userland pointer */
|
||||
|
@ -23,6 +24,7 @@ enum {
|
|||
ITER_IOVEC = 0,
|
||||
ITER_KVEC = 2,
|
||||
ITER_BVEC = 4,
|
||||
ITER_PIPE = 8,
|
||||
};
|
||||
|
||||
struct iov_iter {
|
||||
|
@ -33,8 +35,12 @@ struct iov_iter {
|
|||
const struct iovec *iov;
|
||||
const struct kvec *kvec;
|
||||
const struct bio_vec *bvec;
|
||||
struct pipe_inode_info *pipe;
|
||||
};
|
||||
union {
|
||||
unsigned long nr_segs;
|
||||
int idx;
|
||||
};
|
||||
unsigned long nr_segs;
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -64,7 +70,7 @@ static inline struct iovec iov_iter_iovec(const struct iov_iter *iter)
|
|||
}
|
||||
|
||||
#define iov_for_each(iov, iter, start) \
|
||||
if (!((start).type & ITER_BVEC)) \
|
||||
if (!((start).type & (ITER_BVEC | ITER_PIPE))) \
|
||||
for (iter = (start); \
|
||||
(iter).count && \
|
||||
((iov = iov_iter_iovec(&(iter))), 1); \
|
||||
|
@ -94,6 +100,8 @@ void iov_iter_kvec(struct iov_iter *i, int direction, const struct kvec *kvec,
|
|||
unsigned long nr_segs, size_t count);
|
||||
void iov_iter_bvec(struct iov_iter *i, int direction, const struct bio_vec *bvec,
|
||||
unsigned long nr_segs, size_t count);
|
||||
void iov_iter_pipe(struct iov_iter *i, int direction, struct pipe_inode_info *pipe,
|
||||
size_t count);
|
||||
ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages,
|
||||
size_t maxsize, unsigned maxpages, size_t *start);
|
||||
ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages,
|
||||
|
@ -109,7 +117,7 @@ static inline size_t iov_iter_count(struct iov_iter *i)
|
|||
|
||||
static inline bool iter_is_iovec(struct iov_iter *i)
|
||||
{
|
||||
return !(i->type & (ITER_BVEC | ITER_KVEC));
|
||||
return !(i->type & (ITER_BVEC | ITER_KVEC | ITER_PIPE));
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -1108,51 +1108,23 @@ static size_t relay_file_read_end_pos(struct rchan_buf *buf,
|
|||
return end_pos;
|
||||
}
|
||||
|
||||
/*
|
||||
* subbuf_read_actor - read up to one subbuf's worth of data
|
||||
*/
|
||||
static int subbuf_read_actor(size_t read_start,
|
||||
struct rchan_buf *buf,
|
||||
size_t avail,
|
||||
read_descriptor_t *desc)
|
||||
{
|
||||
void *from;
|
||||
int ret = 0;
|
||||
|
||||
from = buf->start + read_start;
|
||||
ret = avail;
|
||||
if (copy_to_user(desc->arg.buf, from, avail)) {
|
||||
desc->error = -EFAULT;
|
||||
ret = 0;
|
||||
}
|
||||
desc->arg.data += ret;
|
||||
desc->written += ret;
|
||||
desc->count -= ret;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
typedef int (*subbuf_actor_t) (size_t read_start,
|
||||
struct rchan_buf *buf,
|
||||
size_t avail,
|
||||
read_descriptor_t *desc);
|
||||
|
||||
/*
|
||||
* relay_file_read_subbufs - read count bytes, bridging subbuf boundaries
|
||||
*/
|
||||
static ssize_t relay_file_read_subbufs(struct file *filp, loff_t *ppos,
|
||||
subbuf_actor_t subbuf_actor,
|
||||
read_descriptor_t *desc)
|
||||
static ssize_t relay_file_read(struct file *filp,
|
||||
char __user *buffer,
|
||||
size_t count,
|
||||
loff_t *ppos)
|
||||
{
|
||||
struct rchan_buf *buf = filp->private_data;
|
||||
size_t read_start, avail;
|
||||
size_t written = 0;
|
||||
int ret;
|
||||
|
||||
if (!desc->count)
|
||||
if (!count)
|
||||
return 0;
|
||||
|
||||
inode_lock(file_inode(filp));
|
||||
do {
|
||||
void *from;
|
||||
|
||||
if (!relay_file_read_avail(buf, *ppos))
|
||||
break;
|
||||
|
||||
|
@ -1161,32 +1133,22 @@ static ssize_t relay_file_read_subbufs(struct file *filp, loff_t *ppos,
|
|||
if (!avail)
|
||||
break;
|
||||
|
||||
avail = min(desc->count, avail);
|
||||
ret = subbuf_actor(read_start, buf, avail, desc);
|
||||
if (desc->error < 0)
|
||||
avail = min(count, avail);
|
||||
from = buf->start + read_start;
|
||||
ret = avail;
|
||||
if (copy_to_user(buffer, from, avail))
|
||||
break;
|
||||
|
||||
if (ret) {
|
||||
relay_file_read_consume(buf, read_start, ret);
|
||||
*ppos = relay_file_read_end_pos(buf, read_start, ret);
|
||||
}
|
||||
} while (desc->count && ret);
|
||||
buffer += ret;
|
||||
written += ret;
|
||||
count -= ret;
|
||||
|
||||
relay_file_read_consume(buf, read_start, ret);
|
||||
*ppos = relay_file_read_end_pos(buf, read_start, ret);
|
||||
} while (count);
|
||||
inode_unlock(file_inode(filp));
|
||||
|
||||
return desc->written;
|
||||
}
|
||||
|
||||
static ssize_t relay_file_read(struct file *filp,
|
||||
char __user *buffer,
|
||||
size_t count,
|
||||
loff_t *ppos)
|
||||
{
|
||||
read_descriptor_t desc;
|
||||
desc.written = 0;
|
||||
desc.count = count;
|
||||
desc.arg.buf = buffer;
|
||||
desc.error = 0;
|
||||
return relay_file_read_subbufs(filp, ppos, subbuf_read_actor, &desc);
|
||||
return written;
|
||||
}
|
||||
|
||||
static void relay_consume_bytes(struct rchan_buf *rbuf, int bytes_consumed)
|
||||
|
|
395
lib/iov_iter.c
395
lib/iov_iter.c
|
@ -3,8 +3,11 @@
|
|||
#include <linux/pagemap.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/splice.h>
|
||||
#include <net/checksum.h>
|
||||
|
||||
#define PIPE_PARANOIA /* for now */
|
||||
|
||||
#define iterate_iovec(i, n, __v, __p, skip, STEP) { \
|
||||
size_t left; \
|
||||
size_t wanted = n; \
|
||||
|
@ -290,6 +293,93 @@ static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t
|
|||
return wanted - bytes;
|
||||
}
|
||||
|
||||
#ifdef PIPE_PARANOIA
|
||||
static bool sanity(const struct iov_iter *i)
|
||||
{
|
||||
struct pipe_inode_info *pipe = i->pipe;
|
||||
int idx = i->idx;
|
||||
int next = pipe->curbuf + pipe->nrbufs;
|
||||
if (i->iov_offset) {
|
||||
struct pipe_buffer *p;
|
||||
if (unlikely(!pipe->nrbufs))
|
||||
goto Bad; // pipe must be non-empty
|
||||
if (unlikely(idx != ((next - 1) & (pipe->buffers - 1))))
|
||||
goto Bad; // must be at the last buffer...
|
||||
|
||||
p = &pipe->bufs[idx];
|
||||
if (unlikely(p->offset + p->len != i->iov_offset))
|
||||
goto Bad; // ... at the end of segment
|
||||
} else {
|
||||
if (idx != (next & (pipe->buffers - 1)))
|
||||
goto Bad; // must be right after the last buffer
|
||||
}
|
||||
return true;
|
||||
Bad:
|
||||
printk(KERN_ERR "idx = %d, offset = %zd\n", i->idx, i->iov_offset);
|
||||
printk(KERN_ERR "curbuf = %d, nrbufs = %d, buffers = %d\n",
|
||||
pipe->curbuf, pipe->nrbufs, pipe->buffers);
|
||||
for (idx = 0; idx < pipe->buffers; idx++)
|
||||
printk(KERN_ERR "[%p %p %d %d]\n",
|
||||
pipe->bufs[idx].ops,
|
||||
pipe->bufs[idx].page,
|
||||
pipe->bufs[idx].offset,
|
||||
pipe->bufs[idx].len);
|
||||
WARN_ON(1);
|
||||
return false;
|
||||
}
|
||||
#else
|
||||
#define sanity(i) true
|
||||
#endif
|
||||
|
||||
static inline int next_idx(int idx, struct pipe_inode_info *pipe)
|
||||
{
|
||||
return (idx + 1) & (pipe->buffers - 1);
|
||||
}
|
||||
|
||||
static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes,
|
||||
struct iov_iter *i)
|
||||
{
|
||||
struct pipe_inode_info *pipe = i->pipe;
|
||||
struct pipe_buffer *buf;
|
||||
size_t off;
|
||||
int idx;
|
||||
|
||||
if (unlikely(bytes > i->count))
|
||||
bytes = i->count;
|
||||
|
||||
if (unlikely(!bytes))
|
||||
return 0;
|
||||
|
||||
if (!sanity(i))
|
||||
return 0;
|
||||
|
||||
off = i->iov_offset;
|
||||
idx = i->idx;
|
||||
buf = &pipe->bufs[idx];
|
||||
if (off) {
|
||||
if (offset == off && buf->page == page) {
|
||||
/* merge with the last one */
|
||||
buf->len += bytes;
|
||||
i->iov_offset += bytes;
|
||||
goto out;
|
||||
}
|
||||
idx = next_idx(idx, pipe);
|
||||
buf = &pipe->bufs[idx];
|
||||
}
|
||||
if (idx == pipe->curbuf && pipe->nrbufs)
|
||||
return 0;
|
||||
pipe->nrbufs++;
|
||||
buf->ops = &page_cache_pipe_buf_ops;
|
||||
get_page(buf->page = page);
|
||||
buf->offset = offset;
|
||||
buf->len = bytes;
|
||||
i->iov_offset = offset + bytes;
|
||||
i->idx = idx;
|
||||
out:
|
||||
i->count -= bytes;
|
||||
return bytes;
|
||||
}
|
||||
|
||||
/*
|
||||
* Fault in one or more iovecs of the given iov_iter, to a maximum length of
|
||||
* bytes. For each iovec, fault in each page that constitutes the iovec.
|
||||
|
@ -356,9 +446,98 @@ static void memzero_page(struct page *page, size_t offset, size_t len)
|
|||
kunmap_atomic(addr);
|
||||
}
|
||||
|
||||
static inline bool allocated(struct pipe_buffer *buf)
|
||||
{
|
||||
return buf->ops == &default_pipe_buf_ops;
|
||||
}
|
||||
|
||||
static inline void data_start(const struct iov_iter *i, int *idxp, size_t *offp)
|
||||
{
|
||||
size_t off = i->iov_offset;
|
||||
int idx = i->idx;
|
||||
if (off && (!allocated(&i->pipe->bufs[idx]) || off == PAGE_SIZE)) {
|
||||
idx = next_idx(idx, i->pipe);
|
||||
off = 0;
|
||||
}
|
||||
*idxp = idx;
|
||||
*offp = off;
|
||||
}
|
||||
|
||||
static size_t push_pipe(struct iov_iter *i, size_t size,
|
||||
int *idxp, size_t *offp)
|
||||
{
|
||||
struct pipe_inode_info *pipe = i->pipe;
|
||||
size_t off;
|
||||
int idx;
|
||||
ssize_t left;
|
||||
|
||||
if (unlikely(size > i->count))
|
||||
size = i->count;
|
||||
if (unlikely(!size))
|
||||
return 0;
|
||||
|
||||
left = size;
|
||||
data_start(i, &idx, &off);
|
||||
*idxp = idx;
|
||||
*offp = off;
|
||||
if (off) {
|
||||
left -= PAGE_SIZE - off;
|
||||
if (left <= 0) {
|
||||
pipe->bufs[idx].len += size;
|
||||
return size;
|
||||
}
|
||||
pipe->bufs[idx].len = PAGE_SIZE;
|
||||
idx = next_idx(idx, pipe);
|
||||
}
|
||||
while (idx != pipe->curbuf || !pipe->nrbufs) {
|
||||
struct page *page = alloc_page(GFP_USER);
|
||||
if (!page)
|
||||
break;
|
||||
pipe->nrbufs++;
|
||||
pipe->bufs[idx].ops = &default_pipe_buf_ops;
|
||||
pipe->bufs[idx].page = page;
|
||||
pipe->bufs[idx].offset = 0;
|
||||
if (left <= PAGE_SIZE) {
|
||||
pipe->bufs[idx].len = left;
|
||||
return size;
|
||||
}
|
||||
pipe->bufs[idx].len = PAGE_SIZE;
|
||||
left -= PAGE_SIZE;
|
||||
idx = next_idx(idx, pipe);
|
||||
}
|
||||
return size - left;
|
||||
}
|
||||
|
||||
static size_t copy_pipe_to_iter(const void *addr, size_t bytes,
|
||||
struct iov_iter *i)
|
||||
{
|
||||
struct pipe_inode_info *pipe = i->pipe;
|
||||
size_t n, off;
|
||||
int idx;
|
||||
|
||||
if (!sanity(i))
|
||||
return 0;
|
||||
|
||||
bytes = n = push_pipe(i, bytes, &idx, &off);
|
||||
if (unlikely(!n))
|
||||
return 0;
|
||||
for ( ; n; idx = next_idx(idx, pipe), off = 0) {
|
||||
size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
|
||||
memcpy_to_page(pipe->bufs[idx].page, off, addr, chunk);
|
||||
i->idx = idx;
|
||||
i->iov_offset = off + chunk;
|
||||
n -= chunk;
|
||||
addr += chunk;
|
||||
}
|
||||
i->count -= bytes;
|
||||
return bytes;
|
||||
}
|
||||
|
||||
size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
|
||||
{
|
||||
const char *from = addr;
|
||||
if (unlikely(i->type & ITER_PIPE))
|
||||
return copy_pipe_to_iter(addr, bytes, i);
|
||||
iterate_and_advance(i, bytes, v,
|
||||
__copy_to_user(v.iov_base, (from += v.iov_len) - v.iov_len,
|
||||
v.iov_len),
|
||||
|
@ -374,6 +553,10 @@ EXPORT_SYMBOL(copy_to_iter);
|
|||
size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
|
||||
{
|
||||
char *to = addr;
|
||||
if (unlikely(i->type & ITER_PIPE)) {
|
||||
WARN_ON(1);
|
||||
return 0;
|
||||
}
|
||||
iterate_and_advance(i, bytes, v,
|
||||
__copy_from_user((to += v.iov_len) - v.iov_len, v.iov_base,
|
||||
v.iov_len),
|
||||
|
@ -389,6 +572,10 @@ EXPORT_SYMBOL(copy_from_iter);
|
|||
size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
|
||||
{
|
||||
char *to = addr;
|
||||
if (unlikely(i->type & ITER_PIPE)) {
|
||||
WARN_ON(1);
|
||||
return 0;
|
||||
}
|
||||
iterate_and_advance(i, bytes, v,
|
||||
__copy_from_user_nocache((to += v.iov_len) - v.iov_len,
|
||||
v.iov_base, v.iov_len),
|
||||
|
@ -409,14 +596,20 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
|
|||
size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
|
||||
kunmap_atomic(kaddr);
|
||||
return wanted;
|
||||
} else
|
||||
} else if (likely(!(i->type & ITER_PIPE)))
|
||||
return copy_page_to_iter_iovec(page, offset, bytes, i);
|
||||
else
|
||||
return copy_page_to_iter_pipe(page, offset, bytes, i);
|
||||
}
|
||||
EXPORT_SYMBOL(copy_page_to_iter);
|
||||
|
||||
size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
|
||||
struct iov_iter *i)
|
||||
{
|
||||
if (unlikely(i->type & ITER_PIPE)) {
|
||||
WARN_ON(1);
|
||||
return 0;
|
||||
}
|
||||
if (i->type & (ITER_BVEC|ITER_KVEC)) {
|
||||
void *kaddr = kmap_atomic(page);
|
||||
size_t wanted = copy_from_iter(kaddr + offset, bytes, i);
|
||||
|
@ -427,8 +620,34 @@ size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
|
|||
}
|
||||
EXPORT_SYMBOL(copy_page_from_iter);
|
||||
|
||||
static size_t pipe_zero(size_t bytes, struct iov_iter *i)
|
||||
{
|
||||
struct pipe_inode_info *pipe = i->pipe;
|
||||
size_t n, off;
|
||||
int idx;
|
||||
|
||||
if (!sanity(i))
|
||||
return 0;
|
||||
|
||||
bytes = n = push_pipe(i, bytes, &idx, &off);
|
||||
if (unlikely(!n))
|
||||
return 0;
|
||||
|
||||
for ( ; n; idx = next_idx(idx, pipe), off = 0) {
|
||||
size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
|
||||
memzero_page(pipe->bufs[idx].page, off, chunk);
|
||||
i->idx = idx;
|
||||
i->iov_offset = off + chunk;
|
||||
n -= chunk;
|
||||
}
|
||||
i->count -= bytes;
|
||||
return bytes;
|
||||
}
|
||||
|
||||
size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
|
||||
{
|
||||
if (unlikely(i->type & ITER_PIPE))
|
||||
return pipe_zero(bytes, i);
|
||||
iterate_and_advance(i, bytes, v,
|
||||
__clear_user(v.iov_base, v.iov_len),
|
||||
memzero_page(v.bv_page, v.bv_offset, v.bv_len),
|
||||
|
@ -443,6 +662,11 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
|
|||
struct iov_iter *i, unsigned long offset, size_t bytes)
|
||||
{
|
||||
char *kaddr = kmap_atomic(page), *p = kaddr + offset;
|
||||
if (unlikely(i->type & ITER_PIPE)) {
|
||||
kunmap_atomic(kaddr);
|
||||
WARN_ON(1);
|
||||
return 0;
|
||||
}
|
||||
iterate_all_kinds(i, bytes, v,
|
||||
__copy_from_user_inatomic((p += v.iov_len) - v.iov_len,
|
||||
v.iov_base, v.iov_len),
|
||||
|
@ -455,8 +679,49 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
|
|||
}
|
||||
EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
|
||||
|
||||
static void pipe_advance(struct iov_iter *i, size_t size)
|
||||
{
|
||||
struct pipe_inode_info *pipe = i->pipe;
|
||||
struct pipe_buffer *buf;
|
||||
int idx = i->idx;
|
||||
size_t off = i->iov_offset;
|
||||
|
||||
if (unlikely(i->count < size))
|
||||
size = i->count;
|
||||
|
||||
if (size) {
|
||||
if (off) /* make it relative to the beginning of buffer */
|
||||
size += off - pipe->bufs[idx].offset;
|
||||
while (1) {
|
||||
buf = &pipe->bufs[idx];
|
||||
if (size <= buf->len)
|
||||
break;
|
||||
size -= buf->len;
|
||||
idx = next_idx(idx, pipe);
|
||||
}
|
||||
buf->len = size;
|
||||
i->idx = idx;
|
||||
off = i->iov_offset = buf->offset + size;
|
||||
}
|
||||
if (off)
|
||||
idx = next_idx(idx, pipe);
|
||||
if (pipe->nrbufs) {
|
||||
int unused = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
|
||||
/* [curbuf,unused) is in use. Free [idx,unused) */
|
||||
while (idx != unused) {
|
||||
pipe_buf_release(pipe, &pipe->bufs[idx]);
|
||||
idx = next_idx(idx, pipe);
|
||||
pipe->nrbufs--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void iov_iter_advance(struct iov_iter *i, size_t size)
|
||||
{
|
||||
if (unlikely(i->type & ITER_PIPE)) {
|
||||
pipe_advance(i, size);
|
||||
return;
|
||||
}
|
||||
iterate_and_advance(i, size, v, 0, 0, 0)
|
||||
}
|
||||
EXPORT_SYMBOL(iov_iter_advance);
|
||||
|
@ -466,6 +731,8 @@ EXPORT_SYMBOL(iov_iter_advance);
|
|||
*/
|
||||
size_t iov_iter_single_seg_count(const struct iov_iter *i)
|
||||
{
|
||||
if (unlikely(i->type & ITER_PIPE))
|
||||
return i->count; // it is a silly place, anyway
|
||||
if (i->nr_segs == 1)
|
||||
return i->count;
|
||||
else if (i->type & ITER_BVEC)
|
||||
|
@ -501,6 +768,19 @@ void iov_iter_bvec(struct iov_iter *i, int direction,
|
|||
}
|
||||
EXPORT_SYMBOL(iov_iter_bvec);
|
||||
|
||||
void iov_iter_pipe(struct iov_iter *i, int direction,
|
||||
struct pipe_inode_info *pipe,
|
||||
size_t count)
|
||||
{
|
||||
BUG_ON(direction != ITER_PIPE);
|
||||
i->type = direction;
|
||||
i->pipe = pipe;
|
||||
i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
|
||||
i->iov_offset = 0;
|
||||
i->count = count;
|
||||
}
|
||||
EXPORT_SYMBOL(iov_iter_pipe);
|
||||
|
||||
unsigned long iov_iter_alignment(const struct iov_iter *i)
|
||||
{
|
||||
unsigned long res = 0;
|
||||
|
@ -509,6 +789,11 @@ unsigned long iov_iter_alignment(const struct iov_iter *i)
|
|||
if (!size)
|
||||
return 0;
|
||||
|
||||
if (unlikely(i->type & ITER_PIPE)) {
|
||||
if (i->iov_offset && allocated(&i->pipe->bufs[i->idx]))
|
||||
return size | i->iov_offset;
|
||||
return size;
|
||||
}
|
||||
iterate_all_kinds(i, size, v,
|
||||
(res |= (unsigned long)v.iov_base | v.iov_len, 0),
|
||||
res |= v.bv_offset | v.bv_len,
|
||||
|
@ -525,6 +810,11 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
|
|||
if (!size)
|
||||
return 0;
|
||||
|
||||
if (unlikely(i->type & ITER_PIPE)) {
|
||||
WARN_ON(1);
|
||||
return ~0U;
|
||||
}
|
||||
|
||||
iterate_all_kinds(i, size, v,
|
||||
(res |= (!res ? 0 : (unsigned long)v.iov_base) |
|
||||
(size != v.iov_len ? size : 0), 0),
|
||||
|
@ -537,6 +827,47 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
|
|||
}
|
||||
EXPORT_SYMBOL(iov_iter_gap_alignment);
|
||||
|
||||
static inline size_t __pipe_get_pages(struct iov_iter *i,
|
||||
size_t maxsize,
|
||||
struct page **pages,
|
||||
int idx,
|
||||
size_t *start)
|
||||
{
|
||||
struct pipe_inode_info *pipe = i->pipe;
|
||||
size_t n = push_pipe(i, maxsize, &idx, start);
|
||||
if (!n)
|
||||
return -EFAULT;
|
||||
|
||||
maxsize = n;
|
||||
n += *start;
|
||||
while (n >= PAGE_SIZE) {
|
||||
get_page(*pages++ = pipe->bufs[idx].page);
|
||||
idx = next_idx(idx, pipe);
|
||||
n -= PAGE_SIZE;
|
||||
}
|
||||
|
||||
return maxsize;
|
||||
}
|
||||
|
||||
static ssize_t pipe_get_pages(struct iov_iter *i,
|
||||
struct page **pages, size_t maxsize, unsigned maxpages,
|
||||
size_t *start)
|
||||
{
|
||||
unsigned npages;
|
||||
size_t capacity;
|
||||
int idx;
|
||||
|
||||
if (!sanity(i))
|
||||
return -EFAULT;
|
||||
|
||||
data_start(i, &idx, start);
|
||||
/* some of this one + all after this one */
|
||||
npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1;
|
||||
capacity = min(npages,maxpages) * PAGE_SIZE - *start;
|
||||
|
||||
return __pipe_get_pages(i, min(maxsize, capacity), pages, idx, start);
|
||||
}
|
||||
|
||||
ssize_t iov_iter_get_pages(struct iov_iter *i,
|
||||
struct page **pages, size_t maxsize, unsigned maxpages,
|
||||
size_t *start)
|
||||
|
@ -547,6 +878,8 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,
|
|||
if (!maxsize)
|
||||
return 0;
|
||||
|
||||
if (unlikely(i->type & ITER_PIPE))
|
||||
return pipe_get_pages(i, pages, maxsize, maxpages, start);
|
||||
iterate_all_kinds(i, maxsize, v, ({
|
||||
unsigned long addr = (unsigned long)v.iov_base;
|
||||
size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
|
||||
|
@ -582,6 +915,37 @@ static struct page **get_pages_array(size_t n)
|
|||
return p;
|
||||
}
|
||||
|
||||
static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
|
||||
struct page ***pages, size_t maxsize,
|
||||
size_t *start)
|
||||
{
|
||||
struct page **p;
|
||||
size_t n;
|
||||
int idx;
|
||||
int npages;
|
||||
|
||||
if (!sanity(i))
|
||||
return -EFAULT;
|
||||
|
||||
data_start(i, &idx, start);
|
||||
/* some of this one + all after this one */
|
||||
npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1;
|
||||
n = npages * PAGE_SIZE - *start;
|
||||
if (maxsize > n)
|
||||
maxsize = n;
|
||||
else
|
||||
npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE);
|
||||
p = get_pages_array(npages);
|
||||
if (!p)
|
||||
return -ENOMEM;
|
||||
n = __pipe_get_pages(i, maxsize, p, idx, start);
|
||||
if (n > 0)
|
||||
*pages = p;
|
||||
else
|
||||
kvfree(p);
|
||||
return n;
|
||||
}
|
||||
|
||||
ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
|
||||
struct page ***pages, size_t maxsize,
|
||||
size_t *start)
|
||||
|
@ -594,6 +958,8 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
|
|||
if (!maxsize)
|
||||
return 0;
|
||||
|
||||
if (unlikely(i->type & ITER_PIPE))
|
||||
return pipe_get_pages_alloc(i, pages, maxsize, start);
|
||||
iterate_all_kinds(i, maxsize, v, ({
|
||||
unsigned long addr = (unsigned long)v.iov_base;
|
||||
size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
|
||||
|
@ -635,6 +1001,10 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
|
|||
__wsum sum, next;
|
||||
size_t off = 0;
|
||||
sum = *csum;
|
||||
if (unlikely(i->type & ITER_PIPE)) {
|
||||
WARN_ON(1);
|
||||
return 0;
|
||||
}
|
||||
iterate_and_advance(i, bytes, v, ({
|
||||
int err = 0;
|
||||
next = csum_and_copy_from_user(v.iov_base,
|
||||
|
@ -673,6 +1043,10 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum,
|
|||
__wsum sum, next;
|
||||
size_t off = 0;
|
||||
sum = *csum;
|
||||
if (unlikely(i->type & ITER_PIPE)) {
|
||||
WARN_ON(1); /* for now */
|
||||
return 0;
|
||||
}
|
||||
iterate_and_advance(i, bytes, v, ({
|
||||
int err = 0;
|
||||
next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len,
|
||||
|
@ -712,7 +1086,20 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages)
|
|||
if (!size)
|
||||
return 0;
|
||||
|
||||
iterate_all_kinds(i, size, v, ({
|
||||
if (unlikely(i->type & ITER_PIPE)) {
|
||||
struct pipe_inode_info *pipe = i->pipe;
|
||||
size_t off;
|
||||
int idx;
|
||||
|
||||
if (!sanity(i))
|
||||
return 0;
|
||||
|
||||
data_start(i, &idx, &off);
|
||||
/* some of this one + all after this one */
|
||||
npages = ((pipe->curbuf - idx - 1) & (pipe->buffers - 1)) + 1;
|
||||
if (npages >= maxpages)
|
||||
return maxpages;
|
||||
} else iterate_all_kinds(i, size, v, ({
|
||||
unsigned long p = (unsigned long)v.iov_base;
|
||||
npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
|
||||
- p / PAGE_SIZE;
|
||||
|
@ -737,6 +1124,10 @@ EXPORT_SYMBOL(iov_iter_npages);
|
|||
const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
|
||||
{
|
||||
*new = *old;
|
||||
if (unlikely(new->type & ITER_PIPE)) {
|
||||
WARN_ON(1);
|
||||
return NULL;
|
||||
}
|
||||
if (new->type & ITER_BVEC)
|
||||
return new->bvec = kmemdup(new->bvec,
|
||||
new->nr_segs * sizeof(struct bio_vec),
|
||||
|
|
115
mm/shmem.c
115
mm/shmem.c
|
@ -2311,119 +2311,6 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
|
|||
return retval ? retval : error;
|
||||
}
|
||||
|
||||
static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
|
||||
struct pipe_inode_info *pipe, size_t len,
|
||||
unsigned int flags)
|
||||
{
|
||||
struct address_space *mapping = in->f_mapping;
|
||||
struct inode *inode = mapping->host;
|
||||
unsigned int loff, nr_pages, req_pages;
|
||||
struct page *pages[PIPE_DEF_BUFFERS];
|
||||
struct partial_page partial[PIPE_DEF_BUFFERS];
|
||||
struct page *page;
|
||||
pgoff_t index, end_index;
|
||||
loff_t isize, left;
|
||||
int error, page_nr;
|
||||
struct splice_pipe_desc spd = {
|
||||
.pages = pages,
|
||||
.partial = partial,
|
||||
.nr_pages_max = PIPE_DEF_BUFFERS,
|
||||
.flags = flags,
|
||||
.ops = &page_cache_pipe_buf_ops,
|
||||
.spd_release = spd_release_page,
|
||||
};
|
||||
|
||||
isize = i_size_read(inode);
|
||||
if (unlikely(*ppos >= isize))
|
||||
return 0;
|
||||
|
||||
left = isize - *ppos;
|
||||
if (unlikely(left < len))
|
||||
len = left;
|
||||
|
||||
if (splice_grow_spd(pipe, &spd))
|
||||
return -ENOMEM;
|
||||
|
||||
index = *ppos >> PAGE_SHIFT;
|
||||
loff = *ppos & ~PAGE_MASK;
|
||||
req_pages = (len + loff + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
||||
nr_pages = min(req_pages, spd.nr_pages_max);
|
||||
|
||||
spd.nr_pages = find_get_pages_contig(mapping, index,
|
||||
nr_pages, spd.pages);
|
||||
index += spd.nr_pages;
|
||||
error = 0;
|
||||
|
||||
while (spd.nr_pages < nr_pages) {
|
||||
error = shmem_getpage(inode, index, &page, SGP_CACHE);
|
||||
if (error)
|
||||
break;
|
||||
unlock_page(page);
|
||||
spd.pages[spd.nr_pages++] = page;
|
||||
index++;
|
||||
}
|
||||
|
||||
index = *ppos >> PAGE_SHIFT;
|
||||
nr_pages = spd.nr_pages;
|
||||
spd.nr_pages = 0;
|
||||
|
||||
for (page_nr = 0; page_nr < nr_pages; page_nr++) {
|
||||
unsigned int this_len;
|
||||
|
||||
if (!len)
|
||||
break;
|
||||
|
||||
this_len = min_t(unsigned long, len, PAGE_SIZE - loff);
|
||||
page = spd.pages[page_nr];
|
||||
|
||||
if (!PageUptodate(page) || page->mapping != mapping) {
|
||||
error = shmem_getpage(inode, index, &page, SGP_CACHE);
|
||||
if (error)
|
||||
break;
|
||||
unlock_page(page);
|
||||
put_page(spd.pages[page_nr]);
|
||||
spd.pages[page_nr] = page;
|
||||
}
|
||||
|
||||
isize = i_size_read(inode);
|
||||
end_index = (isize - 1) >> PAGE_SHIFT;
|
||||
if (unlikely(!isize || index > end_index))
|
||||
break;
|
||||
|
||||
if (end_index == index) {
|
||||
unsigned int plen;
|
||||
|
||||
plen = ((isize - 1) & ~PAGE_MASK) + 1;
|
||||
if (plen <= loff)
|
||||
break;
|
||||
|
||||
this_len = min(this_len, plen - loff);
|
||||
len = this_len;
|
||||
}
|
||||
|
||||
spd.partial[page_nr].offset = loff;
|
||||
spd.partial[page_nr].len = this_len;
|
||||
len -= this_len;
|
||||
loff = 0;
|
||||
spd.nr_pages++;
|
||||
index++;
|
||||
}
|
||||
|
||||
while (page_nr < nr_pages)
|
||||
put_page(spd.pages[page_nr++]);
|
||||
|
||||
if (spd.nr_pages)
|
||||
error = splice_to_pipe(pipe, &spd);
|
||||
|
||||
splice_shrink_spd(&spd);
|
||||
|
||||
if (error > 0) {
|
||||
*ppos += error;
|
||||
file_accessed(in);
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* llseek SEEK_DATA or SEEK_HOLE through the radix_tree.
|
||||
*/
|
||||
|
@ -3786,7 +3673,7 @@ static const struct file_operations shmem_file_operations = {
|
|||
.read_iter = shmem_file_read_iter,
|
||||
.write_iter = generic_file_write_iter,
|
||||
.fsync = noop_fsync,
|
||||
.splice_read = shmem_file_splice_read,
|
||||
.splice_read = generic_file_splice_read,
|
||||
.splice_write = iter_file_splice_write,
|
||||
.fallocate = shmem_fallocate,
|
||||
#endif
|
||||
|
|
|
@ -1962,37 +1962,13 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
|
|||
return false;
|
||||
}
|
||||
|
||||
ssize_t skb_socket_splice(struct sock *sk,
|
||||
struct pipe_inode_info *pipe,
|
||||
struct splice_pipe_desc *spd)
|
||||
{
|
||||
int ret;
|
||||
|
||||
/* Drop the socket lock, otherwise we have reverse
|
||||
* locking dependencies between sk_lock and i_mutex
|
||||
* here as compared to sendfile(). We enter here
|
||||
* with the socket lock held, and splice_to_pipe() will
|
||||
* grab the pipe inode lock. For sendfile() emulation,
|
||||
* we call into ->sendpage() with the i_mutex lock held
|
||||
* and networking will grab the socket lock.
|
||||
*/
|
||||
release_sock(sk);
|
||||
ret = splice_to_pipe(pipe, spd);
|
||||
lock_sock(sk);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Map data from the skb to a pipe. Should handle both the linear part,
|
||||
* the fragments, and the frag list.
|
||||
*/
|
||||
int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
|
||||
struct pipe_inode_info *pipe, unsigned int tlen,
|
||||
unsigned int flags,
|
||||
ssize_t (*splice_cb)(struct sock *,
|
||||
struct pipe_inode_info *,
|
||||
struct splice_pipe_desc *))
|
||||
unsigned int flags)
|
||||
{
|
||||
struct partial_page partial[MAX_SKB_FRAGS];
|
||||
struct page *pages[MAX_SKB_FRAGS];
|
||||
|
@ -2009,7 +1985,7 @@ int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
|
|||
__skb_splice_bits(skb, pipe, &offset, &tlen, &spd, sk);
|
||||
|
||||
if (spd.nr_pages)
|
||||
ret = splice_cb(sk, pipe, &spd);
|
||||
ret = splice_to_pipe(pipe, &spd);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -691,8 +691,7 @@ static int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
|
|||
int ret;
|
||||
|
||||
ret = skb_splice_bits(skb, skb->sk, offset, tss->pipe,
|
||||
min(rd_desc->count, len), tss->flags,
|
||||
skb_socket_splice);
|
||||
min(rd_desc->count, len), tss->flags);
|
||||
if (ret > 0)
|
||||
rd_desc->count -= ret;
|
||||
return ret;
|
||||
|
|
|
@ -1160,19 +1160,6 @@ static int kcm_recvmsg(struct socket *sock, struct msghdr *msg,
|
|||
return copied ? : err;
|
||||
}
|
||||
|
||||
static ssize_t kcm_sock_splice(struct sock *sk,
|
||||
struct pipe_inode_info *pipe,
|
||||
struct splice_pipe_desc *spd)
|
||||
{
|
||||
int ret;
|
||||
|
||||
release_sock(sk);
|
||||
ret = splice_to_pipe(pipe, spd);
|
||||
lock_sock(sk);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos,
|
||||
struct pipe_inode_info *pipe, size_t len,
|
||||
unsigned int flags)
|
||||
|
@ -1202,8 +1189,7 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos,
|
|||
if (len > rxm->full_len)
|
||||
len = rxm->full_len;
|
||||
|
||||
copied = skb_splice_bits(skb, sk, rxm->offset, pipe, len, flags,
|
||||
kcm_sock_splice);
|
||||
copied = skb_splice_bits(skb, sk, rxm->offset, pipe, len, flags);
|
||||
if (copied < 0) {
|
||||
err = copied;
|
||||
goto err_out;
|
||||
|
|
|
@ -2475,28 +2475,13 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
|
|||
return unix_stream_read_generic(&state);
|
||||
}
|
||||
|
||||
static ssize_t skb_unix_socket_splice(struct sock *sk,
|
||||
struct pipe_inode_info *pipe,
|
||||
struct splice_pipe_desc *spd)
|
||||
{
|
||||
int ret;
|
||||
struct unix_sock *u = unix_sk(sk);
|
||||
|
||||
mutex_unlock(&u->iolock);
|
||||
ret = splice_to_pipe(pipe, spd);
|
||||
mutex_lock(&u->iolock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int unix_stream_splice_actor(struct sk_buff *skb,
|
||||
int skip, int chunk,
|
||||
struct unix_stream_read_state *state)
|
||||
{
|
||||
return skb_splice_bits(skb, state->socket->sk,
|
||||
UNIXCB(skb).consumed + skip,
|
||||
state->pipe, chunk, state->splice_flags,
|
||||
skb_unix_socket_splice);
|
||||
state->pipe, chunk, state->splice_flags);
|
||||
}
|
||||
|
||||
static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos,
|
||||
|
|
Loading…
Reference in a new issue