New kind of open files - "location only".
New flag for open(2) - O_PATH. Semantics: * pathname is resolved, but the file itself is _NOT_ opened as far as filesystem is concerned. * almost all operations on the resulting descriptors shall fail with -EBADF. Exceptions are: 1) operations on descriptors themselves (i.e. close(), dup(), dup2(), dup3(), fcntl(fd, F_DUPFD), fcntl(fd, F_DUPFD_CLOEXEC, ...), fcntl(fd, F_GETFD), fcntl(fd, F_SETFD, ...)) 2) fcntl(fd, F_GETFL), for a common non-destructive way to check if descriptor is open 3) "dfd" arguments of ...at(2) syscalls, i.e. the starting points of pathname resolution * closing such descriptor does *NOT* affect dnotify or posix locks. * permissions are checked as usual along the way to file; no permission checks are applied to the file itself. Of course, giving such thing to syscall will result in permission checks (at the moment it means checking that starting point of ....at() is a directory and caller has exec permissions on it). fget() and fget_light() return NULL on such descriptors; use of fget_raw() and fget_raw_light() is needed to get them. That protects existing code from dealing with those things. There are two things still missing (they come in the next commits): one is handling of symlinks (right now we refuse to open them that way; see the next commit for semantics related to those) and another is descriptor passing via SCM_RIGHTS datagrams. Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
This commit is contained in:
parent
f2fa2ffc20
commit
1abf0c718f
7 changed files with 119 additions and 17 deletions
37
fs/fcntl.c
37
fs/fcntl.c
|
@ -131,7 +131,7 @@ SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)
|
||||||
SYSCALL_DEFINE1(dup, unsigned int, fildes)
|
SYSCALL_DEFINE1(dup, unsigned int, fildes)
|
||||||
{
|
{
|
||||||
int ret = -EBADF;
|
int ret = -EBADF;
|
||||||
struct file *file = fget(fildes);
|
struct file *file = fget_raw(fildes);
|
||||||
|
|
||||||
if (file) {
|
if (file) {
|
||||||
ret = get_unused_fd();
|
ret = get_unused_fd();
|
||||||
|
@ -426,15 +426,35 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int check_fcntl_cmd(unsigned cmd)
|
||||||
|
{
|
||||||
|
switch (cmd) {
|
||||||
|
case F_DUPFD:
|
||||||
|
case F_DUPFD_CLOEXEC:
|
||||||
|
case F_GETFD:
|
||||||
|
case F_SETFD:
|
||||||
|
case F_GETFL:
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
|
SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
|
||||||
{
|
{
|
||||||
struct file *filp;
|
struct file *filp;
|
||||||
long err = -EBADF;
|
long err = -EBADF;
|
||||||
|
|
||||||
filp = fget(fd);
|
filp = fget_raw(fd);
|
||||||
if (!filp)
|
if (!filp)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
|
if (unlikely(filp->f_mode & FMODE_PATH)) {
|
||||||
|
if (!check_fcntl_cmd(cmd)) {
|
||||||
|
fput(filp);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
err = security_file_fcntl(filp, cmd, arg);
|
err = security_file_fcntl(filp, cmd, arg);
|
||||||
if (err) {
|
if (err) {
|
||||||
fput(filp);
|
fput(filp);
|
||||||
|
@ -456,10 +476,17 @@ SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
|
||||||
long err;
|
long err;
|
||||||
|
|
||||||
err = -EBADF;
|
err = -EBADF;
|
||||||
filp = fget(fd);
|
filp = fget_raw(fd);
|
||||||
if (!filp)
|
if (!filp)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
|
if (unlikely(filp->f_mode & FMODE_PATH)) {
|
||||||
|
if (!check_fcntl_cmd(cmd)) {
|
||||||
|
fput(filp);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
err = security_file_fcntl(filp, cmd, arg);
|
err = security_file_fcntl(filp, cmd, arg);
|
||||||
if (err) {
|
if (err) {
|
||||||
fput(filp);
|
fput(filp);
|
||||||
|
@ -808,14 +835,14 @@ static int __init fcntl_init(void)
|
||||||
* Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
|
* Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
|
||||||
* is defined as O_NONBLOCK on some platforms and not on others.
|
* is defined as O_NONBLOCK on some platforms and not on others.
|
||||||
*/
|
*/
|
||||||
BUILD_BUG_ON(18 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32(
|
BUILD_BUG_ON(19 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32(
|
||||||
O_RDONLY | O_WRONLY | O_RDWR |
|
O_RDONLY | O_WRONLY | O_RDWR |
|
||||||
O_CREAT | O_EXCL | O_NOCTTY |
|
O_CREAT | O_EXCL | O_NOCTTY |
|
||||||
O_TRUNC | O_APPEND | /* O_NONBLOCK | */
|
O_TRUNC | O_APPEND | /* O_NONBLOCK | */
|
||||||
__O_SYNC | O_DSYNC | FASYNC |
|
__O_SYNC | O_DSYNC | FASYNC |
|
||||||
O_DIRECT | O_LARGEFILE | O_DIRECTORY |
|
O_DIRECT | O_LARGEFILE | O_DIRECTORY |
|
||||||
O_NOFOLLOW | O_NOATIME | O_CLOEXEC |
|
O_NOFOLLOW | O_NOATIME | O_CLOEXEC |
|
||||||
__FMODE_EXEC
|
__FMODE_EXEC | O_PATH
|
||||||
));
|
));
|
||||||
|
|
||||||
fasync_cache = kmem_cache_create("fasync_cache",
|
fasync_cache = kmem_cache_create("fasync_cache",
|
||||||
|
|
|
@ -276,11 +276,10 @@ struct file *fget(unsigned int fd)
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
file = fcheck_files(files, fd);
|
file = fcheck_files(files, fd);
|
||||||
if (file) {
|
if (file) {
|
||||||
if (!atomic_long_inc_not_zero(&file->f_count)) {
|
/* File object ref couldn't be taken */
|
||||||
/* File object ref couldn't be taken */
|
if (file->f_mode & FMODE_PATH ||
|
||||||
rcu_read_unlock();
|
!atomic_long_inc_not_zero(&file->f_count))
|
||||||
return NULL;
|
file = NULL;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
|
|
||||||
|
@ -289,6 +288,23 @@ struct file *fget(unsigned int fd)
|
||||||
|
|
||||||
EXPORT_SYMBOL(fget);
|
EXPORT_SYMBOL(fget);
|
||||||
|
|
||||||
|
struct file *fget_raw(unsigned int fd)
|
||||||
|
{
|
||||||
|
struct file *file;
|
||||||
|
struct files_struct *files = current->files;
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
file = fcheck_files(files, fd);
|
||||||
|
if (file) {
|
||||||
|
/* File object ref couldn't be taken */
|
||||||
|
if (!atomic_long_inc_not_zero(&file->f_count))
|
||||||
|
file = NULL;
|
||||||
|
}
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
|
return file;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Lightweight file lookup - no refcnt increment if fd table isn't shared.
|
* Lightweight file lookup - no refcnt increment if fd table isn't shared.
|
||||||
*
|
*
|
||||||
|
@ -310,6 +326,33 @@ struct file *fget_light(unsigned int fd, int *fput_needed)
|
||||||
struct file *file;
|
struct file *file;
|
||||||
struct files_struct *files = current->files;
|
struct files_struct *files = current->files;
|
||||||
|
|
||||||
|
*fput_needed = 0;
|
||||||
|
if (atomic_read(&files->count) == 1) {
|
||||||
|
file = fcheck_files(files, fd);
|
||||||
|
if (file && (file->f_mode & FMODE_PATH))
|
||||||
|
file = NULL;
|
||||||
|
} else {
|
||||||
|
rcu_read_lock();
|
||||||
|
file = fcheck_files(files, fd);
|
||||||
|
if (file) {
|
||||||
|
if (!(file->f_mode & FMODE_PATH) &&
|
||||||
|
atomic_long_inc_not_zero(&file->f_count))
|
||||||
|
*fput_needed = 1;
|
||||||
|
else
|
||||||
|
/* Didn't get the reference, someone's freed */
|
||||||
|
file = NULL;
|
||||||
|
}
|
||||||
|
rcu_read_unlock();
|
||||||
|
}
|
||||||
|
|
||||||
|
return file;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct file *fget_raw_light(unsigned int fd, int *fput_needed)
|
||||||
|
{
|
||||||
|
struct file *file;
|
||||||
|
struct files_struct *files = current->files;
|
||||||
|
|
||||||
*fput_needed = 0;
|
*fput_needed = 0;
|
||||||
if (atomic_read(&files->count) == 1) {
|
if (atomic_read(&files->count) == 1) {
|
||||||
file = fcheck_files(files, fd);
|
file = fcheck_files(files, fd);
|
||||||
|
|
|
@ -1544,7 +1544,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
|
||||||
} else {
|
} else {
|
||||||
struct dentry *dentry;
|
struct dentry *dentry;
|
||||||
|
|
||||||
file = fget_light(dfd, &fput_needed);
|
file = fget_raw_light(dfd, &fput_needed);
|
||||||
retval = -EBADF;
|
retval = -EBADF;
|
||||||
if (!file)
|
if (!file)
|
||||||
goto out_fail;
|
goto out_fail;
|
||||||
|
|
35
fs/open.c
35
fs/open.c
|
@ -669,11 +669,16 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
|
||||||
int (*open)(struct inode *, struct file *),
|
int (*open)(struct inode *, struct file *),
|
||||||
const struct cred *cred)
|
const struct cred *cred)
|
||||||
{
|
{
|
||||||
|
static const struct file_operations empty_fops = {};
|
||||||
struct inode *inode;
|
struct inode *inode;
|
||||||
int error;
|
int error;
|
||||||
|
|
||||||
f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK |
|
f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK |
|
||||||
FMODE_PREAD | FMODE_PWRITE;
|
FMODE_PREAD | FMODE_PWRITE;
|
||||||
|
|
||||||
|
if (unlikely(f->f_flags & O_PATH))
|
||||||
|
f->f_mode = FMODE_PATH;
|
||||||
|
|
||||||
inode = dentry->d_inode;
|
inode = dentry->d_inode;
|
||||||
if (f->f_mode & FMODE_WRITE) {
|
if (f->f_mode & FMODE_WRITE) {
|
||||||
error = __get_file_write_access(inode, mnt);
|
error = __get_file_write_access(inode, mnt);
|
||||||
|
@ -687,9 +692,15 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
|
||||||
f->f_path.dentry = dentry;
|
f->f_path.dentry = dentry;
|
||||||
f->f_path.mnt = mnt;
|
f->f_path.mnt = mnt;
|
||||||
f->f_pos = 0;
|
f->f_pos = 0;
|
||||||
f->f_op = fops_get(inode->i_fop);
|
|
||||||
file_sb_list_add(f, inode->i_sb);
|
file_sb_list_add(f, inode->i_sb);
|
||||||
|
|
||||||
|
if (unlikely(f->f_mode & FMODE_PATH)) {
|
||||||
|
f->f_op = &empty_fops;
|
||||||
|
return f;
|
||||||
|
}
|
||||||
|
|
||||||
|
f->f_op = fops_get(inode->i_fop);
|
||||||
|
|
||||||
error = security_dentry_open(f, cred);
|
error = security_dentry_open(f, cred);
|
||||||
if (error)
|
if (error)
|
||||||
goto cleanup_all;
|
goto cleanup_all;
|
||||||
|
@ -911,9 +922,18 @@ static inline int build_open_flags(int flags, int mode, struct open_flags *op)
|
||||||
if (flags & __O_SYNC)
|
if (flags & __O_SYNC)
|
||||||
flags |= O_DSYNC;
|
flags |= O_DSYNC;
|
||||||
|
|
||||||
op->open_flag = flags;
|
/*
|
||||||
|
* If we have O_PATH in the open flag. Then we
|
||||||
|
* cannot have anything other than the below set of flags
|
||||||
|
*/
|
||||||
|
if (flags & O_PATH) {
|
||||||
|
flags &= O_DIRECTORY | O_NOFOLLOW | O_PATH;
|
||||||
|
acc_mode = 0;
|
||||||
|
} else {
|
||||||
|
acc_mode = MAY_OPEN | ACC_MODE(flags);
|
||||||
|
}
|
||||||
|
|
||||||
acc_mode = MAY_OPEN | ACC_MODE(flags);
|
op->open_flag = flags;
|
||||||
|
|
||||||
/* O_TRUNC implies we need access checks for write permissions */
|
/* O_TRUNC implies we need access checks for write permissions */
|
||||||
if (flags & O_TRUNC)
|
if (flags & O_TRUNC)
|
||||||
|
@ -926,7 +946,8 @@ static inline int build_open_flags(int flags, int mode, struct open_flags *op)
|
||||||
|
|
||||||
op->acc_mode = acc_mode;
|
op->acc_mode = acc_mode;
|
||||||
|
|
||||||
op->intent = LOOKUP_OPEN;
|
op->intent = flags & O_PATH ? 0 : LOOKUP_OPEN;
|
||||||
|
|
||||||
if (flags & O_CREAT) {
|
if (flags & O_CREAT) {
|
||||||
op->intent |= LOOKUP_CREATE;
|
op->intent |= LOOKUP_CREATE;
|
||||||
if (flags & O_EXCL)
|
if (flags & O_EXCL)
|
||||||
|
@ -1053,8 +1074,10 @@ int filp_close(struct file *filp, fl_owner_t id)
|
||||||
if (filp->f_op && filp->f_op->flush)
|
if (filp->f_op && filp->f_op->flush)
|
||||||
retval = filp->f_op->flush(filp, id);
|
retval = filp->f_op->flush(filp, id);
|
||||||
|
|
||||||
dnotify_flush(filp, id);
|
if (likely(!(filp->f_mode & FMODE_PATH))) {
|
||||||
locks_remove_posix(filp, id);
|
dnotify_flush(filp, id);
|
||||||
|
locks_remove_posix(filp, id);
|
||||||
|
}
|
||||||
fput(filp);
|
fput(filp);
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
|
@ -80,6 +80,10 @@
|
||||||
#define O_SYNC (__O_SYNC|O_DSYNC)
|
#define O_SYNC (__O_SYNC|O_DSYNC)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifndef O_PATH
|
||||||
|
#define O_PATH 010000000
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifndef O_NDELAY
|
#ifndef O_NDELAY
|
||||||
#define O_NDELAY O_NONBLOCK
|
#define O_NDELAY O_NONBLOCK
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -29,6 +29,8 @@ static inline void fput_light(struct file *file, int fput_needed)
|
||||||
|
|
||||||
extern struct file *fget(unsigned int fd);
|
extern struct file *fget(unsigned int fd);
|
||||||
extern struct file *fget_light(unsigned int fd, int *fput_needed);
|
extern struct file *fget_light(unsigned int fd, int *fput_needed);
|
||||||
|
extern struct file *fget_raw(unsigned int fd);
|
||||||
|
extern struct file *fget_raw_light(unsigned int fd, int *fput_needed);
|
||||||
extern void set_close_on_exec(unsigned int fd, int flag);
|
extern void set_close_on_exec(unsigned int fd, int flag);
|
||||||
extern void put_filp(struct file *);
|
extern void put_filp(struct file *);
|
||||||
extern int alloc_fd(unsigned start, unsigned flags);
|
extern int alloc_fd(unsigned start, unsigned flags);
|
||||||
|
|
|
@ -102,6 +102,9 @@ struct inodes_stat_t {
|
||||||
/* File is huge (eg. /dev/kmem): treat loff_t as unsigned */
|
/* File is huge (eg. /dev/kmem): treat loff_t as unsigned */
|
||||||
#define FMODE_UNSIGNED_OFFSET ((__force fmode_t)0x2000)
|
#define FMODE_UNSIGNED_OFFSET ((__force fmode_t)0x2000)
|
||||||
|
|
||||||
|
/* File is opened with O_PATH; almost nothing can be done with it */
|
||||||
|
#define FMODE_PATH ((__force fmode_t)0x4000)
|
||||||
|
|
||||||
/* File was opened by fanotify and shouldn't generate fanotify events */
|
/* File was opened by fanotify and shouldn't generate fanotify events */
|
||||||
#define FMODE_NONOTIFY ((__force fmode_t)0x1000000)
|
#define FMODE_NONOTIFY ((__force fmode_t)0x1000000)
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue