overlayfs fixes for 4.19-rc4
-----BEGIN PGP SIGNATURE----- iHUEABYIAB0WIQSQHSd0lITzzeNWNm3h3BK/laaZPAUCW5qpOgAKCRDh3BK/laaZ PDCQAQCIKLg0aLeWOkfUO76mBjlp5srKgJfrqpFoyuozD6l2fQEAl/W2x9NOduV+ PK4sCYMT8SpI0hMrbv9P4zZ683kmaA8= =RnZU -----END PGP SIGNATURE----- Merge tag 'ovl-fixes-4.19-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs Pull overlayfs fixes from Miklos Szeredi: "This fixes a regression in the recent file stacking update, reported and fixed by Amir Goldstein. The fix is fairly trivial, but involves adding a fadvise() f_op and the associated churn in the vfs. As discussed on -fsdevel, there are other possible uses for this method, than allowing proper stacking for overlays. And there's one other fix for a syzkaller detected oops" * tag 'ovl-fixes-4.19-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs: ovl: fix oopses in ovl_fill_super() failure paths ovl: add ovl_fadvise() vfs: implement readahead(2) using POSIX_FADV_WILLNEED vfs: add the fadvise() file operation Documentation/filesystems: update documentation of file_operations ovl: fix GPF in swapfile_activate of file from overlayfs over xfs ovl: respect FIEMAP_FLAG_SYNC flag
This commit is contained in:
commit
48751b562b
8 changed files with 134 additions and 80 deletions
|
@ -848,7 +848,7 @@ struct file_operations
|
||||||
----------------------
|
----------------------
|
||||||
|
|
||||||
This describes how the VFS can manipulate an open file. As of kernel
|
This describes how the VFS can manipulate an open file. As of kernel
|
||||||
4.1, the following members are defined:
|
4.18, the following members are defined:
|
||||||
|
|
||||||
struct file_operations {
|
struct file_operations {
|
||||||
struct module *owner;
|
struct module *owner;
|
||||||
|
@ -858,11 +858,11 @@ struct file_operations {
|
||||||
ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
|
ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
|
||||||
ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
|
ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
|
||||||
int (*iterate) (struct file *, struct dir_context *);
|
int (*iterate) (struct file *, struct dir_context *);
|
||||||
|
int (*iterate_shared) (struct file *, struct dir_context *);
|
||||||
__poll_t (*poll) (struct file *, struct poll_table_struct *);
|
__poll_t (*poll) (struct file *, struct poll_table_struct *);
|
||||||
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
|
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
|
||||||
long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
|
long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
|
||||||
int (*mmap) (struct file *, struct vm_area_struct *);
|
int (*mmap) (struct file *, struct vm_area_struct *);
|
||||||
int (*mremap)(struct file *, struct vm_area_struct *);
|
|
||||||
int (*open) (struct inode *, struct file *);
|
int (*open) (struct inode *, struct file *);
|
||||||
int (*flush) (struct file *, fl_owner_t id);
|
int (*flush) (struct file *, fl_owner_t id);
|
||||||
int (*release) (struct inode *, struct file *);
|
int (*release) (struct inode *, struct file *);
|
||||||
|
@ -882,6 +882,10 @@ struct file_operations {
|
||||||
#ifndef CONFIG_MMU
|
#ifndef CONFIG_MMU
|
||||||
unsigned (*mmap_capabilities)(struct file *);
|
unsigned (*mmap_capabilities)(struct file *);
|
||||||
#endif
|
#endif
|
||||||
|
ssize_t (*copy_file_range)(struct file *, loff_t, struct file *, loff_t, size_t, unsigned int);
|
||||||
|
int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t, u64);
|
||||||
|
int (*dedupe_file_range)(struct file *, loff_t, struct file *, loff_t, u64);
|
||||||
|
int (*fadvise)(struct file *, loff_t, loff_t, int);
|
||||||
};
|
};
|
||||||
|
|
||||||
Again, all methods are called without any locks being held, unless
|
Again, all methods are called without any locks being held, unless
|
||||||
|
@ -899,6 +903,9 @@ otherwise noted.
|
||||||
|
|
||||||
iterate: called when the VFS needs to read the directory contents
|
iterate: called when the VFS needs to read the directory contents
|
||||||
|
|
||||||
|
iterate_shared: called when the VFS needs to read the directory contents
|
||||||
|
when filesystem supports concurrent dir iterators
|
||||||
|
|
||||||
poll: called by the VFS when a process wants to check if there is
|
poll: called by the VFS when a process wants to check if there is
|
||||||
activity on this file and (optionally) go to sleep until there
|
activity on this file and (optionally) go to sleep until there
|
||||||
is activity. Called by the select(2) and poll(2) system calls
|
is activity. Called by the select(2) and poll(2) system calls
|
||||||
|
@ -951,6 +958,16 @@ otherwise noted.
|
||||||
|
|
||||||
fallocate: called by the VFS to preallocate blocks or punch a hole.
|
fallocate: called by the VFS to preallocate blocks or punch a hole.
|
||||||
|
|
||||||
|
copy_file_range: called by the copy_file_range(2) system call.
|
||||||
|
|
||||||
|
clone_file_range: called by the ioctl(2) system call for FICLONERANGE and
|
||||||
|
FICLONE commands.
|
||||||
|
|
||||||
|
dedupe_file_range: called by the ioctl(2) system call for FIDEDUPERANGE
|
||||||
|
command.
|
||||||
|
|
||||||
|
fadvise: possibly called by the fadvise64() system call.
|
||||||
|
|
||||||
Note that the file operations are implemented by the specific
|
Note that the file operations are implemented by the specific
|
||||||
filesystem in which the inode resides. When opening a device node
|
filesystem in which the inode resides. When opening a device node
|
||||||
(character or block special) most filesystems will call special
|
(character or block special) most filesystems will call special
|
||||||
|
|
|
@ -131,9 +131,6 @@ static int ovl_open(struct inode *inode, struct file *file)
|
||||||
if (IS_ERR(realfile))
|
if (IS_ERR(realfile))
|
||||||
return PTR_ERR(realfile);
|
return PTR_ERR(realfile);
|
||||||
|
|
||||||
/* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO */
|
|
||||||
file->f_mapping = realfile->f_mapping;
|
|
||||||
|
|
||||||
file->private_data = realfile;
|
file->private_data = realfile;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -334,6 +331,25 @@ static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int ovl_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
|
||||||
|
{
|
||||||
|
struct fd real;
|
||||||
|
const struct cred *old_cred;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = ovl_real_fdget(file, &real);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
old_cred = ovl_override_creds(file_inode(file)->i_sb);
|
||||||
|
ret = vfs_fadvise(real.file, offset, len, advice);
|
||||||
|
revert_creds(old_cred);
|
||||||
|
|
||||||
|
fdput(real);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
static long ovl_real_ioctl(struct file *file, unsigned int cmd,
|
static long ovl_real_ioctl(struct file *file, unsigned int cmd,
|
||||||
unsigned long arg)
|
unsigned long arg)
|
||||||
{
|
{
|
||||||
|
@ -502,6 +518,7 @@ const struct file_operations ovl_file_operations = {
|
||||||
.fsync = ovl_fsync,
|
.fsync = ovl_fsync,
|
||||||
.mmap = ovl_mmap,
|
.mmap = ovl_mmap,
|
||||||
.fallocate = ovl_fallocate,
|
.fallocate = ovl_fallocate,
|
||||||
|
.fadvise = ovl_fadvise,
|
||||||
.unlocked_ioctl = ovl_ioctl,
|
.unlocked_ioctl = ovl_ioctl,
|
||||||
.compat_ioctl = ovl_compat_ioctl,
|
.compat_ioctl = ovl_compat_ioctl,
|
||||||
|
|
||||||
|
|
|
@ -467,6 +467,10 @@ static int ovl_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
|
||||||
return -EOPNOTSUPP;
|
return -EOPNOTSUPP;
|
||||||
|
|
||||||
old_cred = ovl_override_creds(inode->i_sb);
|
old_cred = ovl_override_creds(inode->i_sb);
|
||||||
|
|
||||||
|
if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC)
|
||||||
|
filemap_write_and_wait(realinode->i_mapping);
|
||||||
|
|
||||||
err = realinode->i_op->fiemap(realinode, fieinfo, start, len);
|
err = realinode->i_op->fiemap(realinode, fieinfo, start, len);
|
||||||
revert_creds(old_cred);
|
revert_creds(old_cred);
|
||||||
|
|
||||||
|
@ -500,6 +504,11 @@ static const struct inode_operations ovl_special_inode_operations = {
|
||||||
.update_time = ovl_update_time,
|
.update_time = ovl_update_time,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const struct address_space_operations ovl_aops = {
|
||||||
|
/* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO */
|
||||||
|
.direct_IO = noop_direct_IO,
|
||||||
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* It is possible to stack overlayfs instance on top of another
|
* It is possible to stack overlayfs instance on top of another
|
||||||
* overlayfs instance as lower layer. We need to annonate the
|
* overlayfs instance as lower layer. We need to annonate the
|
||||||
|
@ -571,6 +580,7 @@ static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev,
|
||||||
case S_IFREG:
|
case S_IFREG:
|
||||||
inode->i_op = &ovl_file_inode_operations;
|
inode->i_op = &ovl_file_inode_operations;
|
||||||
inode->i_fop = &ovl_file_operations;
|
inode->i_fop = &ovl_file_operations;
|
||||||
|
inode->i_mapping->a_ops = &ovl_aops;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case S_IFDIR:
|
case S_IFDIR:
|
||||||
|
|
|
@ -982,16 +982,6 @@ static int ovl_get_upper(struct ovl_fs *ofs, struct path *upperpath)
|
||||||
if (err)
|
if (err)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
err = -EBUSY;
|
|
||||||
if (ovl_inuse_trylock(upperpath->dentry)) {
|
|
||||||
ofs->upperdir_locked = true;
|
|
||||||
} else if (ofs->config.index) {
|
|
||||||
pr_err("overlayfs: upperdir is in-use by another mount, mount with '-o index=off' to override exclusive upperdir protection.\n");
|
|
||||||
goto out;
|
|
||||||
} else {
|
|
||||||
pr_warn("overlayfs: upperdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
upper_mnt = clone_private_mount(upperpath);
|
upper_mnt = clone_private_mount(upperpath);
|
||||||
err = PTR_ERR(upper_mnt);
|
err = PTR_ERR(upper_mnt);
|
||||||
if (IS_ERR(upper_mnt)) {
|
if (IS_ERR(upper_mnt)) {
|
||||||
|
@ -1002,6 +992,17 @@ static int ovl_get_upper(struct ovl_fs *ofs, struct path *upperpath)
|
||||||
/* Don't inherit atime flags */
|
/* Don't inherit atime flags */
|
||||||
upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME);
|
upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME);
|
||||||
ofs->upper_mnt = upper_mnt;
|
ofs->upper_mnt = upper_mnt;
|
||||||
|
|
||||||
|
err = -EBUSY;
|
||||||
|
if (ovl_inuse_trylock(ofs->upper_mnt->mnt_root)) {
|
||||||
|
ofs->upperdir_locked = true;
|
||||||
|
} else if (ofs->config.index) {
|
||||||
|
pr_err("overlayfs: upperdir is in-use by another mount, mount with '-o index=off' to override exclusive upperdir protection.\n");
|
||||||
|
goto out;
|
||||||
|
} else {
|
||||||
|
pr_warn("overlayfs: upperdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n");
|
||||||
|
}
|
||||||
|
|
||||||
err = 0;
|
err = 0;
|
||||||
out:
|
out:
|
||||||
return err;
|
return err;
|
||||||
|
@ -1101,8 +1102,10 @@ static int ovl_get_workdir(struct ovl_fs *ofs, struct path *upperpath)
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ofs->workbasedir = dget(workpath.dentry);
|
||||||
|
|
||||||
err = -EBUSY;
|
err = -EBUSY;
|
||||||
if (ovl_inuse_trylock(workpath.dentry)) {
|
if (ovl_inuse_trylock(ofs->workbasedir)) {
|
||||||
ofs->workdir_locked = true;
|
ofs->workdir_locked = true;
|
||||||
} else if (ofs->config.index) {
|
} else if (ofs->config.index) {
|
||||||
pr_err("overlayfs: workdir is in-use by another mount, mount with '-o index=off' to override exclusive workdir protection.\n");
|
pr_err("overlayfs: workdir is in-use by another mount, mount with '-o index=off' to override exclusive workdir protection.\n");
|
||||||
|
@ -1111,7 +1114,6 @@ static int ovl_get_workdir(struct ovl_fs *ofs, struct path *upperpath)
|
||||||
pr_warn("overlayfs: workdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n");
|
pr_warn("overlayfs: workdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
ofs->workbasedir = dget(workpath.dentry);
|
|
||||||
err = ovl_make_workdir(ofs, &workpath);
|
err = ovl_make_workdir(ofs, &workpath);
|
||||||
if (err)
|
if (err)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
|
@ -1763,6 +1763,7 @@ struct file_operations {
|
||||||
u64);
|
u64);
|
||||||
int (*dedupe_file_range)(struct file *, loff_t, struct file *, loff_t,
|
int (*dedupe_file_range)(struct file *, loff_t, struct file *, loff_t,
|
||||||
u64);
|
u64);
|
||||||
|
int (*fadvise)(struct file *, loff_t, loff_t, int);
|
||||||
} __randomize_layout;
|
} __randomize_layout;
|
||||||
|
|
||||||
struct inode_operations {
|
struct inode_operations {
|
||||||
|
@ -3459,4 +3460,8 @@ static inline bool dir_relax_shared(struct inode *inode)
|
||||||
extern bool path_noexec(const struct path *path);
|
extern bool path_noexec(const struct path *path);
|
||||||
extern void inode_nohighmem(struct inode *inode);
|
extern void inode_nohighmem(struct inode *inode);
|
||||||
|
|
||||||
|
/* mm/fadvise.c */
|
||||||
|
extern int vfs_fadvise(struct file *file, loff_t offset, loff_t len,
|
||||||
|
int advice);
|
||||||
|
|
||||||
#endif /* _LINUX_FS_H */
|
#endif /* _LINUX_FS_H */
|
||||||
|
|
|
@ -32,7 +32,7 @@ ifdef CONFIG_CROSS_MEMORY_ATTACH
|
||||||
mmu-$(CONFIG_MMU) += process_vm_access.o
|
mmu-$(CONFIG_MMU) += process_vm_access.o
|
||||||
endif
|
endif
|
||||||
|
|
||||||
obj-y := filemap.o mempool.o oom_kill.o \
|
obj-y := filemap.o mempool.o oom_kill.o fadvise.o \
|
||||||
maccess.o page_alloc.o page-writeback.o \
|
maccess.o page_alloc.o page-writeback.o \
|
||||||
readahead.o swap.o truncate.o vmscan.o shmem.o \
|
readahead.o swap.o truncate.o vmscan.o shmem.o \
|
||||||
util.o mmzone.o vmstat.o backing-dev.o \
|
util.o mmzone.o vmstat.o backing-dev.o \
|
||||||
|
@ -49,7 +49,6 @@ else
|
||||||
obj-y += bootmem.o
|
obj-y += bootmem.o
|
||||||
endif
|
endif
|
||||||
|
|
||||||
obj-$(CONFIG_ADVISE_SYSCALLS) += fadvise.o
|
|
||||||
ifdef CONFIG_MMU
|
ifdef CONFIG_MMU
|
||||||
obj-$(CONFIG_ADVISE_SYSCALLS) += madvise.o
|
obj-$(CONFIG_ADVISE_SYSCALLS) += madvise.o
|
||||||
endif
|
endif
|
||||||
|
|
81
mm/fadvise.c
81
mm/fadvise.c
|
@ -27,9 +27,9 @@
|
||||||
* deactivate the pages and clear PG_Referenced.
|
* deactivate the pages and clear PG_Referenced.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
int ksys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice)
|
static int generic_fadvise(struct file *file, loff_t offset, loff_t len,
|
||||||
|
int advice)
|
||||||
{
|
{
|
||||||
struct fd f = fdget(fd);
|
|
||||||
struct inode *inode;
|
struct inode *inode;
|
||||||
struct address_space *mapping;
|
struct address_space *mapping;
|
||||||
struct backing_dev_info *bdi;
|
struct backing_dev_info *bdi;
|
||||||
|
@ -37,22 +37,14 @@ int ksys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice)
|
||||||
pgoff_t start_index;
|
pgoff_t start_index;
|
||||||
pgoff_t end_index;
|
pgoff_t end_index;
|
||||||
unsigned long nrpages;
|
unsigned long nrpages;
|
||||||
int ret = 0;
|
|
||||||
|
|
||||||
if (!f.file)
|
inode = file_inode(file);
|
||||||
return -EBADF;
|
if (S_ISFIFO(inode->i_mode))
|
||||||
|
return -ESPIPE;
|
||||||
|
|
||||||
inode = file_inode(f.file);
|
mapping = file->f_mapping;
|
||||||
if (S_ISFIFO(inode->i_mode)) {
|
if (!mapping || len < 0)
|
||||||
ret = -ESPIPE;
|
return -EINVAL;
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
mapping = f.file->f_mapping;
|
|
||||||
if (!mapping || len < 0) {
|
|
||||||
ret = -EINVAL;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
bdi = inode_to_bdi(mapping->host);
|
bdi = inode_to_bdi(mapping->host);
|
||||||
|
|
||||||
|
@ -67,9 +59,9 @@ int ksys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice)
|
||||||
/* no bad return value, but ignore advice */
|
/* no bad return value, but ignore advice */
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
ret = -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
goto out;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -85,21 +77,21 @@ int ksys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice)
|
||||||
|
|
||||||
switch (advice) {
|
switch (advice) {
|
||||||
case POSIX_FADV_NORMAL:
|
case POSIX_FADV_NORMAL:
|
||||||
f.file->f_ra.ra_pages = bdi->ra_pages;
|
file->f_ra.ra_pages = bdi->ra_pages;
|
||||||
spin_lock(&f.file->f_lock);
|
spin_lock(&file->f_lock);
|
||||||
f.file->f_mode &= ~FMODE_RANDOM;
|
file->f_mode &= ~FMODE_RANDOM;
|
||||||
spin_unlock(&f.file->f_lock);
|
spin_unlock(&file->f_lock);
|
||||||
break;
|
break;
|
||||||
case POSIX_FADV_RANDOM:
|
case POSIX_FADV_RANDOM:
|
||||||
spin_lock(&f.file->f_lock);
|
spin_lock(&file->f_lock);
|
||||||
f.file->f_mode |= FMODE_RANDOM;
|
file->f_mode |= FMODE_RANDOM;
|
||||||
spin_unlock(&f.file->f_lock);
|
spin_unlock(&file->f_lock);
|
||||||
break;
|
break;
|
||||||
case POSIX_FADV_SEQUENTIAL:
|
case POSIX_FADV_SEQUENTIAL:
|
||||||
f.file->f_ra.ra_pages = bdi->ra_pages * 2;
|
file->f_ra.ra_pages = bdi->ra_pages * 2;
|
||||||
spin_lock(&f.file->f_lock);
|
spin_lock(&file->f_lock);
|
||||||
f.file->f_mode &= ~FMODE_RANDOM;
|
file->f_mode &= ~FMODE_RANDOM;
|
||||||
spin_unlock(&f.file->f_lock);
|
spin_unlock(&file->f_lock);
|
||||||
break;
|
break;
|
||||||
case POSIX_FADV_WILLNEED:
|
case POSIX_FADV_WILLNEED:
|
||||||
/* First and last PARTIAL page! */
|
/* First and last PARTIAL page! */
|
||||||
|
@ -115,8 +107,7 @@ int ksys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice)
|
||||||
* Ignore return value because fadvise() shall return
|
* Ignore return value because fadvise() shall return
|
||||||
* success even if filesystem can't retrieve a hint,
|
* success even if filesystem can't retrieve a hint,
|
||||||
*/
|
*/
|
||||||
force_page_cache_readahead(mapping, f.file, start_index,
|
force_page_cache_readahead(mapping, file, start_index, nrpages);
|
||||||
nrpages);
|
|
||||||
break;
|
break;
|
||||||
case POSIX_FADV_NOREUSE:
|
case POSIX_FADV_NOREUSE:
|
||||||
break;
|
break;
|
||||||
|
@ -183,9 +174,32 @@ int ksys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice)
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
ret = -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
out:
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int vfs_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
|
||||||
|
{
|
||||||
|
if (file->f_op->fadvise)
|
||||||
|
return file->f_op->fadvise(file, offset, len, advice);
|
||||||
|
|
||||||
|
return generic_fadvise(file, offset, len, advice);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(vfs_fadvise);
|
||||||
|
|
||||||
|
#ifdef CONFIG_ADVISE_SYSCALLS
|
||||||
|
|
||||||
|
int ksys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice)
|
||||||
|
{
|
||||||
|
struct fd f = fdget(fd);
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if (!f.file)
|
||||||
|
return -EBADF;
|
||||||
|
|
||||||
|
ret = vfs_fadvise(f.file, offset, len, advice);
|
||||||
|
|
||||||
fdput(f);
|
fdput(f);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -203,3 +217,4 @@ SYSCALL_DEFINE4(fadvise64, int, fd, loff_t, offset, size_t, len, int, advice)
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
#endif
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
#include <linux/file.h>
|
#include <linux/file.h>
|
||||||
#include <linux/mm_inline.h>
|
#include <linux/mm_inline.h>
|
||||||
#include <linux/blk-cgroup.h>
|
#include <linux/blk-cgroup.h>
|
||||||
|
#include <linux/fadvise.h>
|
||||||
|
|
||||||
#include "internal.h"
|
#include "internal.h"
|
||||||
|
|
||||||
|
@ -575,24 +576,6 @@ page_cache_async_readahead(struct address_space *mapping,
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(page_cache_async_readahead);
|
EXPORT_SYMBOL_GPL(page_cache_async_readahead);
|
||||||
|
|
||||||
static ssize_t
|
|
||||||
do_readahead(struct address_space *mapping, struct file *filp,
|
|
||||||
pgoff_t index, unsigned long nr)
|
|
||||||
{
|
|
||||||
if (!mapping || !mapping->a_ops)
|
|
||||||
return -EINVAL;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Readahead doesn't make sense for DAX inodes, but we don't want it
|
|
||||||
* to report a failure either. Instead, we just return success and
|
|
||||||
* don't do any work.
|
|
||||||
*/
|
|
||||||
if (dax_mapping(mapping))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
return force_page_cache_readahead(mapping, filp, index, nr);
|
|
||||||
}
|
|
||||||
|
|
||||||
ssize_t ksys_readahead(int fd, loff_t offset, size_t count)
|
ssize_t ksys_readahead(int fd, loff_t offset, size_t count)
|
||||||
{
|
{
|
||||||
ssize_t ret;
|
ssize_t ret;
|
||||||
|
@ -600,16 +583,22 @@ ssize_t ksys_readahead(int fd, loff_t offset, size_t count)
|
||||||
|
|
||||||
ret = -EBADF;
|
ret = -EBADF;
|
||||||
f = fdget(fd);
|
f = fdget(fd);
|
||||||
if (f.file) {
|
if (!f.file || !(f.file->f_mode & FMODE_READ))
|
||||||
if (f.file->f_mode & FMODE_READ) {
|
goto out;
|
||||||
struct address_space *mapping = f.file->f_mapping;
|
|
||||||
pgoff_t start = offset >> PAGE_SHIFT;
|
/*
|
||||||
pgoff_t end = (offset + count - 1) >> PAGE_SHIFT;
|
* The readahead() syscall is intended to run only on files
|
||||||
unsigned long len = end - start + 1;
|
* that can execute readahead. If readahead is not possible
|
||||||
ret = do_readahead(mapping, f.file, start, len);
|
* on this file, then we must return -EINVAL.
|
||||||
}
|
*/
|
||||||
|
ret = -EINVAL;
|
||||||
|
if (!f.file->f_mapping || !f.file->f_mapping->a_ops ||
|
||||||
|
!S_ISREG(file_inode(f.file)->i_mode))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
ret = vfs_fadvise(f.file, offset, count, POSIX_FADV_WILLNEED);
|
||||||
|
out:
|
||||||
fdput(f);
|
fdput(f);
|
||||||
}
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue