Merge branch 'for-linus' of git://git.infradead.org/users/eparis/notify

* 'for-linus' of git://git.infradead.org/users/eparis/notify: (132 commits)
  fanotify: use both marks when possible
  fsnotify: pass both the vfsmount mark and inode mark
  fsnotify: walk the inode and vfsmount lists simultaneously
  fsnotify: rework ignored mark flushing
  fsnotify: remove global fsnotify groups lists
  fsnotify: remove group->mask
  fsnotify: remove the global masks
  fsnotify: cleanup should_send_event
  fanotify: use the mark in handler functions
  audit: use the mark in handler functions
  dnotify: use the mark in handler functions
  inotify: use the mark in handler functions
  fsnotify: send fsnotify_mark to groups in event handling functions
  fsnotify: Exchange list heads instead of moving elements
  fsnotify: srcu to protect read side of inode and vfsmount locks
  fsnotify: use an explicit flag to indicate fsnotify_destroy_mark has been called
  fsnotify: use _rcu functions for mark list traversal
  fsnotify: place marks on object in order of group memory address
  vfs/fsnotify: fsnotify_close can delay the final work in fput
  fsnotify: store struct file not struct path
  ...

Fix up trivial delete/modify conflict in fs/notify/inotify/inotify.c.
This commit is contained in:
Linus Torvalds 2010-08-10 11:39:13 -07:00
commit 8c8946f509
58 changed files with 3208 additions and 2389 deletions

View file

@ -360,14 +360,6 @@ When: 2.6.33
Why: Should be implemented in userspace, policy daemon.
Who: Johannes Berg <johannes@sipsolutions.net>
---------------------------
What: CONFIG_INOTIFY
When: 2.6.33
Why: last user (audit) will be converted to the newer more generic
and more easily maintained fsnotify subsystem
Who: Eric Paris <eparis@redhat.com>
----------------------------
What: sound-slot/service-* module aliases and related clutters in

View file

@ -842,4 +842,6 @@ ia32_sys_call_table:
.quad compat_sys_rt_tgsigqueueinfo /* 335 */
.quad sys_perf_event_open
.quad compat_sys_recvmmsg
.quad sys_fanotify_init
.quad sys32_fanotify_mark
ia32_syscall_end:

View file

@ -546,3 +546,12 @@ asmlinkage long sys32_fallocate(int fd, int mode, unsigned offset_lo,
return sys_fallocate(fd, mode, ((u64)offset_hi << 32) | offset_lo,
((u64)len_hi << 32) | len_lo);
}
asmlinkage long sys32_fanotify_mark(int fanotify_fd, unsigned int flags,
u32 mask_lo, u32 mask_hi,
int fd, const char __user *pathname)
{
return sys_fanotify_mark(fanotify_fd, flags,
((u64)mask_hi << 32) | mask_lo,
fd, pathname);
}

View file

@ -80,4 +80,7 @@ asmlinkage long sys32_rt_sigreturn(struct pt_regs *);
/* ia32/ipc32.c */
asmlinkage long sys32_ipc(u32, int, int, int, compat_uptr_t, u32);
asmlinkage long sys32_fanotify_mark(int, unsigned int, u32, u32, int,
const char __user *);
#endif /* _ASM_X86_SYS_IA32_H */

View file

@ -343,10 +343,12 @@
#define __NR_rt_tgsigqueueinfo 335
#define __NR_perf_event_open 336
#define __NR_recvmmsg 337
#define __NR_fanotify_init 338
#define __NR_fanotify_mark 339
#ifdef __KERNEL__
#define NR_syscalls 338
#define NR_syscalls 340
#define __ARCH_WANT_IPC_PARSE_VERSION
#define __ARCH_WANT_OLD_READDIR

View file

@ -663,6 +663,10 @@ __SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo)
__SYSCALL(__NR_perf_event_open, sys_perf_event_open)
#define __NR_recvmmsg 299
__SYSCALL(__NR_recvmmsg, sys_recvmmsg)
#define __NR_fanotify_init 300
__SYSCALL(__NR_fanotify_init, sys_fanotify_init)
#define __NR_fanotify_mark 301
__SYSCALL(__NR_fanotify_mark, sys_fanotify_mark)
#ifndef __NO_STUBS
#define __ARCH_WANT_OLD_READDIR

View file

@ -337,3 +337,5 @@ ENTRY(sys_call_table)
.long sys_rt_tgsigqueueinfo /* 335 */
.long sys_perf_event_open
.long sys_recvmmsg
.long sys_fanotify_init
.long sys_fanotify_mark

View file

@ -1193,11 +1193,10 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
if (iov != iovstack)
kfree(iov);
if ((ret + (type == READ)) > 0) {
struct dentry *dentry = file->f_path.dentry;
if (type == READ)
fsnotify_access(dentry);
fsnotify_access(file);
else
fsnotify_modify(dentry);
fsnotify_modify(file);
}
return ret;
}

View file

@ -128,7 +128,7 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
if (file->f_path.mnt->mnt_flags & MNT_NOEXEC)
goto exit;
fsnotify_open(file->f_path.dentry);
fsnotify_open(file);
error = -ENOEXEC;
if(file->f_op) {
@ -683,7 +683,7 @@ struct file *open_exec(const char *name)
if (file->f_path.mnt->mnt_flags & MNT_NOEXEC)
goto exit;
fsnotify_open(file->f_path.dentry);
fsnotify_open(file);
err = deny_write_access(file);
if (err)

View file

@ -230,6 +230,15 @@ static void __fput(struct file *file)
might_sleep();
fsnotify_close(file);
/*
* fsnotify_create_event may have taken one or more references on this
* file. If it did so it left one reference for us to drop to make sure
* its calls to fput could not prematurely destroy the file.
*/
if (atomic_long_read(&file->f_count))
return fput(file);
/*
* The function eventpoll_release() should be the first called
* in the file cleanup chain.

View file

@ -20,7 +20,6 @@
#include <linux/pagemap.h>
#include <linux/cdev.h>
#include <linux/bootmem.h>
#include <linux/inotify.h>
#include <linux/fsnotify.h>
#include <linux/mount.h>
#include <linux/async.h>
@ -264,12 +263,8 @@ void inode_init_once(struct inode *inode)
INIT_RAW_PRIO_TREE_ROOT(&inode->i_data.i_mmap);
INIT_LIST_HEAD(&inode->i_data.i_mmap_nonlinear);
i_size_ordered_init(inode);
#ifdef CONFIG_INOTIFY
INIT_LIST_HEAD(&inode->inotify_watches);
mutex_init(&inode->inotify_mutex);
#endif
#ifdef CONFIG_FSNOTIFY
INIT_HLIST_HEAD(&inode->i_fsnotify_mark_entries);
INIT_HLIST_HEAD(&inode->i_fsnotify_marks);
#endif
}
EXPORT_SYMBOL(inode_init_once);
@ -413,7 +408,6 @@ int invalidate_inodes(struct super_block *sb)
down_write(&iprune_sem);
spin_lock(&inode_lock);
inotify_unmount_inodes(&sb->s_inodes);
fsnotify_unmount_inodes(&sb->s_inodes);
busy = invalidate_list(&sb->s_inodes, &throw_away);
spin_unlock(&inode_lock);

View file

@ -2633,7 +2633,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
{
int error;
int is_dir = S_ISDIR(old_dentry->d_inode->i_mode);
const char *old_name;
const unsigned char *old_name;
if (old_dentry->d_inode == new_dentry->d_inode)
return 0;

View file

@ -29,6 +29,7 @@
#include <linux/log2.h>
#include <linux/idr.h>
#include <linux/fs_struct.h>
#include <linux/fsnotify.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
#include "pnode.h"
@ -150,6 +151,9 @@ struct vfsmount *alloc_vfsmnt(const char *name)
INIT_LIST_HEAD(&mnt->mnt_share);
INIT_LIST_HEAD(&mnt->mnt_slave_list);
INIT_LIST_HEAD(&mnt->mnt_slave);
#ifdef CONFIG_FSNOTIFY
INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks);
#endif
#ifdef CONFIG_SMP
mnt->mnt_writers = alloc_percpu(int);
if (!mnt->mnt_writers)
@ -610,6 +614,7 @@ static inline void __mntput(struct vfsmount *mnt)
* provides barriers, so count_mnt_writers() below is safe. AV
*/
WARN_ON(count_mnt_writers(mnt));
fsnotify_vfsmount_delete(mnt);
dput(mnt->mnt_root);
free_vfsmnt(mnt);
deactivate_super(sb);

View file

@ -934,7 +934,7 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
nfsdstats.io_read += host_err;
*count = host_err;
err = 0;
fsnotify_access(file->f_path.dentry);
fsnotify_access(file);
} else
err = nfserrno(host_err);
out:
@ -1045,7 +1045,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
goto out_nfserr;
*cnt = host_err;
nfsdstats.io_write += host_err;
fsnotify_modify(file->f_path.dentry);
fsnotify_modify(file);
/* clear setuid/setgid flag after write */
if (inode->i_mode & (S_ISUID | S_ISGID))

View file

@ -3,3 +3,4 @@ config FSNOTIFY
source "fs/notify/dnotify/Kconfig"
source "fs/notify/inotify/Kconfig"
source "fs/notify/fanotify/Kconfig"

View file

@ -1,4 +1,6 @@
obj-$(CONFIG_FSNOTIFY) += fsnotify.o notification.o group.o inode_mark.o
obj-$(CONFIG_FSNOTIFY) += fsnotify.o notification.o group.o inode_mark.o \
mark.o vfsmount_mark.o
obj-y += dnotify/
obj-y += inotify/
obj-y += fanotify/

View file

@ -29,17 +29,17 @@
int dir_notify_enable __read_mostly = 1;
static struct kmem_cache *dnotify_struct_cache __read_mostly;
static struct kmem_cache *dnotify_mark_entry_cache __read_mostly;
static struct kmem_cache *dnotify_mark_cache __read_mostly;
static struct fsnotify_group *dnotify_group __read_mostly;
static DEFINE_MUTEX(dnotify_mark_mutex);
/*
* dnotify will attach one of these to each inode (i_fsnotify_mark_entries) which
* dnotify will attach one of these to each inode (i_fsnotify_marks) which
* is being watched by dnotify. If multiple userspace applications are watching
* the same directory with dnotify their information is chained in dn
*/
struct dnotify_mark_entry {
struct fsnotify_mark_entry fsn_entry;
struct dnotify_mark {
struct fsnotify_mark fsn_mark;
struct dnotify_struct *dn;
};
@ -51,27 +51,27 @@ struct dnotify_mark_entry {
* it calls the fsnotify function so it can update the set of all events relevant
* to this inode.
*/
static void dnotify_recalc_inode_mask(struct fsnotify_mark_entry *entry)
static void dnotify_recalc_inode_mask(struct fsnotify_mark *fsn_mark)
{
__u32 new_mask, old_mask;
struct dnotify_struct *dn;
struct dnotify_mark_entry *dnentry = container_of(entry,
struct dnotify_mark_entry,
fsn_entry);
struct dnotify_mark *dn_mark = container_of(fsn_mark,
struct dnotify_mark,
fsn_mark);
assert_spin_locked(&entry->lock);
assert_spin_locked(&fsn_mark->lock);
old_mask = entry->mask;
old_mask = fsn_mark->mask;
new_mask = 0;
for (dn = dnentry->dn; dn != NULL; dn = dn->dn_next)
for (dn = dn_mark->dn; dn != NULL; dn = dn->dn_next)
new_mask |= (dn->dn_mask & ~FS_DN_MULTISHOT);
entry->mask = new_mask;
fsnotify_set_mark_mask_locked(fsn_mark, new_mask);
if (old_mask == new_mask)
return;
if (entry->inode)
fsnotify_recalc_inode_mask(entry->inode);
if (fsn_mark->i.inode)
fsnotify_recalc_inode_mask(fsn_mark->i.inode);
}
/*
@ -83,29 +83,25 @@ static void dnotify_recalc_inode_mask(struct fsnotify_mark_entry *entry)
* events.
*/
static int dnotify_handle_event(struct fsnotify_group *group,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *vfsmount_mark,
struct fsnotify_event *event)
{
struct fsnotify_mark_entry *entry = NULL;
struct dnotify_mark_entry *dnentry;
struct dnotify_mark *dn_mark;
struct inode *to_tell;
struct dnotify_struct *dn;
struct dnotify_struct **prev;
struct fown_struct *fown;
__u32 test_mask = event->mask & ~FS_EVENT_ON_CHILD;
BUG_ON(vfsmount_mark);
to_tell = event->to_tell;
spin_lock(&to_tell->i_lock);
entry = fsnotify_find_mark_entry(group, to_tell);
spin_unlock(&to_tell->i_lock);
dn_mark = container_of(inode_mark, struct dnotify_mark, fsn_mark);
/* unlikely since we alreay passed dnotify_should_send_event() */
if (unlikely(!entry))
return 0;
dnentry = container_of(entry, struct dnotify_mark_entry, fsn_entry);
spin_lock(&entry->lock);
prev = &dnentry->dn;
spin_lock(&inode_mark->lock);
prev = &dn_mark->dn;
while ((dn = *prev) != NULL) {
if ((dn->dn_mask & test_mask) == 0) {
prev = &dn->dn_next;
@ -118,12 +114,11 @@ static int dnotify_handle_event(struct fsnotify_group *group,
else {
*prev = dn->dn_next;
kmem_cache_free(dnotify_struct_cache, dn);
dnotify_recalc_inode_mask(entry);
dnotify_recalc_inode_mask(inode_mark);
}
}
spin_unlock(&entry->lock);
fsnotify_put_mark(entry);
spin_unlock(&inode_mark->lock);
return 0;
}
@ -133,44 +128,27 @@ static int dnotify_handle_event(struct fsnotify_group *group,
* userspace notification for that pair.
*/
static bool dnotify_should_send_event(struct fsnotify_group *group,
struct inode *inode, __u32 mask)
struct inode *inode,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *vfsmount_mark,
__u32 mask, void *data, int data_type)
{
struct fsnotify_mark_entry *entry;
bool send;
/* !dir_notify_enable should never get here, don't waste time checking
if (!dir_notify_enable)
return 0; */
/* not a dir, dnotify doesn't care */
if (!S_ISDIR(inode->i_mode))
return false;
spin_lock(&inode->i_lock);
entry = fsnotify_find_mark_entry(group, inode);
spin_unlock(&inode->i_lock);
/* no mark means no dnotify watch */
if (!entry)
return false;
mask = (mask & ~FS_EVENT_ON_CHILD);
send = (mask & entry->mask);
fsnotify_put_mark(entry); /* matches fsnotify_find_mark_entry */
return send;
return true;
}
static void dnotify_free_mark(struct fsnotify_mark_entry *entry)
static void dnotify_free_mark(struct fsnotify_mark *fsn_mark)
{
struct dnotify_mark_entry *dnentry = container_of(entry,
struct dnotify_mark_entry,
fsn_entry);
struct dnotify_mark *dn_mark = container_of(fsn_mark,
struct dnotify_mark,
fsn_mark);
BUG_ON(dnentry->dn);
BUG_ON(dn_mark->dn);
kmem_cache_free(dnotify_mark_entry_cache, dnentry);
kmem_cache_free(dnotify_mark_cache, dn_mark);
}
static struct fsnotify_ops dnotify_fsnotify_ops = {
@ -183,15 +161,15 @@ static struct fsnotify_ops dnotify_fsnotify_ops = {
/*
* Called every time a file is closed. Looks first for a dnotify mark on the
* inode. If one is found run all of the ->dn entries attached to that
* inode. If one is found run all of the ->dn structures attached to that
* mark for one relevant to this process closing the file and remove that
* dnotify_struct. If that was the last dnotify_struct also remove the
* fsnotify_mark_entry.
* fsnotify_mark.
*/
void dnotify_flush(struct file *filp, fl_owner_t id)
{
struct fsnotify_mark_entry *entry;
struct dnotify_mark_entry *dnentry;
struct fsnotify_mark *fsn_mark;
struct dnotify_mark *dn_mark;
struct dnotify_struct *dn;
struct dnotify_struct **prev;
struct inode *inode;
@ -200,38 +178,34 @@ void dnotify_flush(struct file *filp, fl_owner_t id)
if (!S_ISDIR(inode->i_mode))
return;
spin_lock(&inode->i_lock);
entry = fsnotify_find_mark_entry(dnotify_group, inode);
spin_unlock(&inode->i_lock);
if (!entry)
fsn_mark = fsnotify_find_inode_mark(dnotify_group, inode);
if (!fsn_mark)
return;
dnentry = container_of(entry, struct dnotify_mark_entry, fsn_entry);
dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark);
mutex_lock(&dnotify_mark_mutex);
spin_lock(&entry->lock);
prev = &dnentry->dn;
spin_lock(&fsn_mark->lock);
prev = &dn_mark->dn;
while ((dn = *prev) != NULL) {
if ((dn->dn_owner == id) && (dn->dn_filp == filp)) {
*prev = dn->dn_next;
kmem_cache_free(dnotify_struct_cache, dn);
dnotify_recalc_inode_mask(entry);
dnotify_recalc_inode_mask(fsn_mark);
break;
}
prev = &dn->dn_next;
}
spin_unlock(&entry->lock);
spin_unlock(&fsn_mark->lock);
/* nothing else could have found us thanks to the dnotify_mark_mutex */
if (dnentry->dn == NULL)
fsnotify_destroy_mark_by_entry(entry);
fsnotify_recalc_group_mask(dnotify_group);
if (dn_mark->dn == NULL)
fsnotify_destroy_mark(fsn_mark);
mutex_unlock(&dnotify_mark_mutex);
fsnotify_put_mark(entry);
fsnotify_put_mark(fsn_mark);
}
/* this conversion is done only at watch creation */
@ -259,16 +233,16 @@ static __u32 convert_arg(unsigned long arg)
/*
* If multiple processes watch the same inode with dnotify there is only one
* dnotify mark in inode->i_fsnotify_mark_entries but we chain a dnotify_struct
* dnotify mark in inode->i_fsnotify_marks but we chain a dnotify_struct
* onto that mark. This function either attaches the new dnotify_struct onto
* that list, or it |= the mask onto an existing dnofiy_struct.
*/
static int attach_dn(struct dnotify_struct *dn, struct dnotify_mark_entry *dnentry,
static int attach_dn(struct dnotify_struct *dn, struct dnotify_mark *dn_mark,
fl_owner_t id, int fd, struct file *filp, __u32 mask)
{
struct dnotify_struct *odn;
odn = dnentry->dn;
odn = dn_mark->dn;
while (odn != NULL) {
/* adding more events to existing dnofiy_struct? */
if ((odn->dn_owner == id) && (odn->dn_filp == filp)) {
@ -283,8 +257,8 @@ static int attach_dn(struct dnotify_struct *dn, struct dnotify_mark_entry *dnent
dn->dn_fd = fd;
dn->dn_filp = filp;
dn->dn_owner = id;
dn->dn_next = dnentry->dn;
dnentry->dn = dn;
dn->dn_next = dn_mark->dn;
dn_mark->dn = dn;
return 0;
}
@ -296,8 +270,8 @@ static int attach_dn(struct dnotify_struct *dn, struct dnotify_mark_entry *dnent
*/
int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
{
struct dnotify_mark_entry *new_dnentry, *dnentry;
struct fsnotify_mark_entry *new_entry, *entry;
struct dnotify_mark *new_dn_mark, *dn_mark;
struct fsnotify_mark *new_fsn_mark, *fsn_mark;
struct dnotify_struct *dn;
struct inode *inode;
fl_owner_t id = current->files;
@ -306,7 +280,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
__u32 mask;
/* we use these to tell if we need to kfree */
new_entry = NULL;
new_fsn_mark = NULL;
dn = NULL;
if (!dir_notify_enable) {
@ -336,8 +310,8 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
}
/* new fsnotify mark, we expect most fcntl calls to add a new mark */
new_dnentry = kmem_cache_alloc(dnotify_mark_entry_cache, GFP_KERNEL);
if (!new_dnentry) {
new_dn_mark = kmem_cache_alloc(dnotify_mark_cache, GFP_KERNEL);
if (!new_dn_mark) {
error = -ENOMEM;
goto out_err;
}
@ -345,29 +319,27 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
/* convert the userspace DN_* "arg" to the internal FS_* defines in fsnotify */
mask = convert_arg(arg);
/* set up the new_entry and new_dnentry */
new_entry = &new_dnentry->fsn_entry;
fsnotify_init_mark(new_entry, dnotify_free_mark);
new_entry->mask = mask;
new_dnentry->dn = NULL;
/* set up the new_fsn_mark and new_dn_mark */
new_fsn_mark = &new_dn_mark->fsn_mark;
fsnotify_init_mark(new_fsn_mark, dnotify_free_mark);
new_fsn_mark->mask = mask;
new_dn_mark->dn = NULL;
/* this is needed to prevent the fcntl/close race described below */
mutex_lock(&dnotify_mark_mutex);
/* add the new_entry or find an old one. */
spin_lock(&inode->i_lock);
entry = fsnotify_find_mark_entry(dnotify_group, inode);
spin_unlock(&inode->i_lock);
if (entry) {
dnentry = container_of(entry, struct dnotify_mark_entry, fsn_entry);
spin_lock(&entry->lock);
/* add the new_fsn_mark or find an old one. */
fsn_mark = fsnotify_find_inode_mark(dnotify_group, inode);
if (fsn_mark) {
dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark);
spin_lock(&fsn_mark->lock);
} else {
fsnotify_add_mark(new_entry, dnotify_group, inode);
spin_lock(&new_entry->lock);
entry = new_entry;
dnentry = new_dnentry;
/* we used new_entry, so don't free it */
new_entry = NULL;
fsnotify_add_mark(new_fsn_mark, dnotify_group, inode, NULL, 0);
spin_lock(&new_fsn_mark->lock);
fsn_mark = new_fsn_mark;
dn_mark = new_dn_mark;
/* we used new_fsn_mark, so don't free it */
new_fsn_mark = NULL;
}
rcu_read_lock();
@ -376,17 +348,17 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
/* if (f != filp) means that we lost a race and another task/thread
* actually closed the fd we are still playing with before we grabbed
* the dnotify_mark_mutex and entry->lock. Since closing the fd is the
* only time we clean up the mark entries we need to get our mark off
* the dnotify_mark_mutex and fsn_mark->lock. Since closing the fd is the
* only time we clean up the marks we need to get our mark off
* the list. */
if (f != filp) {
/* if we added ourselves, shoot ourselves, it's possible that
* the flush actually did shoot this entry. That's fine too
* the flush actually did shoot this fsn_mark. That's fine too
* since multiple calls to destroy_mark is perfectly safe, if
* we found a dnentry already attached to the inode, just sod
* we found a dn_mark already attached to the inode, just sod
* off silently as the flush at close time dealt with it.
*/
if (dnentry == new_dnentry)
if (dn_mark == new_dn_mark)
destroy = 1;
goto out;
}
@ -394,13 +366,13 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
if (error) {
/* if we added, we must shoot */
if (dnentry == new_dnentry)
if (dn_mark == new_dn_mark)
destroy = 1;
goto out;
}
error = attach_dn(dn, dnentry, id, fd, filp, mask);
/* !error means that we attached the dn to the dnentry, so don't free it */
error = attach_dn(dn, dn_mark, id, fd, filp, mask);
/* !error means that we attached the dn to the dn_mark, so don't free it */
if (!error)
dn = NULL;
/* -EEXIST means that we didn't add this new dn and used an old one.
@ -408,20 +380,18 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
else if (error == -EEXIST)
error = 0;
dnotify_recalc_inode_mask(entry);
dnotify_recalc_inode_mask(fsn_mark);
out:
spin_unlock(&entry->lock);
spin_unlock(&fsn_mark->lock);
if (destroy)
fsnotify_destroy_mark_by_entry(entry);
fsnotify_recalc_group_mask(dnotify_group);
fsnotify_destroy_mark(fsn_mark);
mutex_unlock(&dnotify_mark_mutex);
fsnotify_put_mark(entry);
fsnotify_put_mark(fsn_mark);
out_err:
if (new_entry)
fsnotify_put_mark(new_entry);
if (new_fsn_mark)
fsnotify_put_mark(new_fsn_mark);
if (dn)
kmem_cache_free(dnotify_struct_cache, dn);
return error;
@ -430,10 +400,9 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
static int __init dnotify_init(void)
{
dnotify_struct_cache = KMEM_CACHE(dnotify_struct, SLAB_PANIC);
dnotify_mark_entry_cache = KMEM_CACHE(dnotify_mark_entry, SLAB_PANIC);
dnotify_mark_cache = KMEM_CACHE(dnotify_mark, SLAB_PANIC);
dnotify_group = fsnotify_obtain_group(DNOTIFY_GROUP_NUM,
0, &dnotify_fsnotify_ops);
dnotify_group = fsnotify_alloc_group(&dnotify_fsnotify_ops);
if (IS_ERR(dnotify_group))
panic("unable to allocate fsnotify group for dnotify\n");
return 0;

View file

@ -0,0 +1,26 @@
config FANOTIFY
bool "Filesystem wide access notification"
select FSNOTIFY
select ANON_INODES
default n
---help---
Say Y here to enable fanotify suport. fanotify is a file access
notification system which differs from inotify in that it sends
and open file descriptor to the userspace listener along with
the event.
If unsure, say Y.
config FANOTIFY_ACCESS_PERMISSIONS
bool "fanotify permissions checking"
depends on FANOTIFY
depends on SECURITY
default n
---help---
Say Y here is you want fanotify listeners to be able to make permissions
decisions concerning filesystem events. This is used by some fanotify
listeners which need to scan files before allowing the system access to
use those files. This is used by some anti-malware vendors and by some
hierarchical storage managent systems.
If unsure, say N.

View file

@ -0,0 +1 @@
obj-$(CONFIG_FANOTIFY) += fanotify.o fanotify_user.o

View file

@ -0,0 +1,212 @@
#include <linux/fanotify.h>
#include <linux/fdtable.h>
#include <linux/fsnotify_backend.h>
#include <linux/init.h>
#include <linux/jiffies.h>
#include <linux/kernel.h> /* UINT_MAX */
#include <linux/mount.h>
#include <linux/sched.h>
#include <linux/types.h>
#include <linux/wait.h>
static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new)
{
pr_debug("%s: old=%p new=%p\n", __func__, old, new);
if (old->to_tell == new->to_tell &&
old->data_type == new->data_type &&
old->tgid == new->tgid) {
switch (old->data_type) {
case (FSNOTIFY_EVENT_FILE):
if ((old->file->f_path.mnt == new->file->f_path.mnt) &&
(old->file->f_path.dentry == new->file->f_path.dentry))
return true;
case (FSNOTIFY_EVENT_NONE):
return true;
default:
BUG();
};
}
return false;
}
/* and the list better be locked by something too! */
static struct fsnotify_event *fanotify_merge(struct list_head *list,
struct fsnotify_event *event)
{
struct fsnotify_event_holder *test_holder;
struct fsnotify_event *test_event = NULL;
struct fsnotify_event *new_event;
pr_debug("%s: list=%p event=%p\n", __func__, list, event);
list_for_each_entry_reverse(test_holder, list, event_list) {
if (should_merge(test_holder->event, event)) {
test_event = test_holder->event;
break;
}
}
if (!test_event)
return NULL;
fsnotify_get_event(test_event);
/* if they are exactly the same we are done */
if (test_event->mask == event->mask)
return test_event;
/*
* if the refcnt == 2 this is the only queue
* for this event and so we can update the mask
* in place.
*/
if (atomic_read(&test_event->refcnt) == 2) {
test_event->mask |= event->mask;
return test_event;
}
new_event = fsnotify_clone_event(test_event);
/* done with test_event */
fsnotify_put_event(test_event);
/* couldn't allocate memory, merge was not possible */
if (unlikely(!new_event))
return ERR_PTR(-ENOMEM);
/* build new event and replace it on the list */
new_event->mask = (test_event->mask | event->mask);
fsnotify_replace_event(test_holder, new_event);
/* we hold a reference on new_event from clone_event */
return new_event;
}
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
static int fanotify_get_response_from_access(struct fsnotify_group *group,
struct fsnotify_event *event)
{
int ret;
pr_debug("%s: group=%p event=%p\n", __func__, group, event);
wait_event(group->fanotify_data.access_waitq, event->response);
/* userspace responded, convert to something usable */
spin_lock(&event->lock);
switch (event->response) {
case FAN_ALLOW:
ret = 0;
break;
case FAN_DENY:
default:
ret = -EPERM;
}
event->response = 0;
spin_unlock(&event->lock);
pr_debug("%s: group=%p event=%p about to return ret=%d\n", __func__,
group, event, ret);
return ret;
}
#endif
static int fanotify_handle_event(struct fsnotify_group *group,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *fanotify_mark,
struct fsnotify_event *event)
{
int ret = 0;
struct fsnotify_event *notify_event = NULL;
BUILD_BUG_ON(FAN_ACCESS != FS_ACCESS);
BUILD_BUG_ON(FAN_MODIFY != FS_MODIFY);
BUILD_BUG_ON(FAN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE);
BUILD_BUG_ON(FAN_CLOSE_WRITE != FS_CLOSE_WRITE);
BUILD_BUG_ON(FAN_OPEN != FS_OPEN);
BUILD_BUG_ON(FAN_EVENT_ON_CHILD != FS_EVENT_ON_CHILD);
BUILD_BUG_ON(FAN_Q_OVERFLOW != FS_Q_OVERFLOW);
BUILD_BUG_ON(FAN_OPEN_PERM != FS_OPEN_PERM);
BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM);
pr_debug("%s: group=%p event=%p\n", __func__, group, event);
notify_event = fsnotify_add_notify_event(group, event, NULL, fanotify_merge);
if (IS_ERR(notify_event))
return PTR_ERR(notify_event);
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
if (event->mask & FAN_ALL_PERM_EVENTS) {
/* if we merged we need to wait on the new event */
if (notify_event)
event = notify_event;
ret = fanotify_get_response_from_access(group, event);
}
#endif
if (notify_event)
fsnotify_put_event(notify_event);
return ret;
}
static bool fanotify_should_send_event(struct fsnotify_group *group,
struct inode *to_tell,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *vfsmnt_mark,
__u32 event_mask, void *data, int data_type)
{
__u32 marks_mask, marks_ignored_mask;
pr_debug("%s: group=%p to_tell=%p inode_mark=%p vfsmnt_mark=%p "
"mask=%x data=%p data_type=%d\n", __func__, group, to_tell,
inode_mark, vfsmnt_mark, event_mask, data, data_type);
pr_debug("%s: group=%p vfsmount_mark=%p inode_mark=%p mask=%x\n",
__func__, group, vfsmnt_mark, inode_mark, event_mask);
/* sorry, fanotify only gives a damn about files and dirs */
if (!S_ISREG(to_tell->i_mode) &&
!S_ISDIR(to_tell->i_mode))
return false;
/* if we don't have enough info to send an event to userspace say no */
if (data_type != FSNOTIFY_EVENT_FILE)
return false;
if (inode_mark && vfsmnt_mark) {
marks_mask = (vfsmnt_mark->mask | inode_mark->mask);
marks_ignored_mask = (vfsmnt_mark->ignored_mask | inode_mark->ignored_mask);
} else if (inode_mark) {
/*
* if the event is for a child and this inode doesn't care about
* events on the child, don't send it!
*/
if ((event_mask & FS_EVENT_ON_CHILD) &&
!(inode_mark->mask & FS_EVENT_ON_CHILD))
return false;
marks_mask = inode_mark->mask;
marks_ignored_mask = inode_mark->ignored_mask;
} else if (vfsmnt_mark) {
marks_mask = vfsmnt_mark->mask;
marks_ignored_mask = vfsmnt_mark->ignored_mask;
} else {
BUG();
}
if (event_mask & marks_mask & ~marks_ignored_mask)
return true;
return false;
}
const struct fsnotify_ops fanotify_fsnotify_ops = {
.handle_event = fanotify_handle_event,
.should_send_event = fanotify_should_send_event,
.free_group_priv = NULL,
.free_event_priv = NULL,
.freeing_mark = NULL,
};

View file

@ -0,0 +1,760 @@
#include <linux/fanotify.h>
#include <linux/fcntl.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/anon_inodes.h>
#include <linux/fsnotify_backend.h>
#include <linux/init.h>
#include <linux/mount.h>
#include <linux/namei.h>
#include <linux/poll.h>
#include <linux/security.h>
#include <linux/syscalls.h>
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/uaccess.h>
#include <asm/ioctls.h>
extern const struct fsnotify_ops fanotify_fsnotify_ops;
static struct kmem_cache *fanotify_mark_cache __read_mostly;
static struct kmem_cache *fanotify_response_event_cache __read_mostly;
struct fanotify_response_event {
struct list_head list;
__s32 fd;
struct fsnotify_event *event;
};
/*
* Get an fsnotify notification event if one exists and is small
* enough to fit in "count". Return an error pointer if the count
* is not large enough.
*
* Called with the group->notification_mutex held.
*/
static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
size_t count)
{
BUG_ON(!mutex_is_locked(&group->notification_mutex));
pr_debug("%s: group=%p count=%zd\n", __func__, group, count);
if (fsnotify_notify_queue_is_empty(group))
return NULL;
if (FAN_EVENT_METADATA_LEN > count)
return ERR_PTR(-EINVAL);
/* held the notification_mutex the whole time, so this is the
* same event we peeked above */
return fsnotify_remove_notify_event(group);
}
static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event)
{
int client_fd;
struct dentry *dentry;
struct vfsmount *mnt;
struct file *new_file;
pr_debug("%s: group=%p event=%p\n", __func__, group, event);
client_fd = get_unused_fd();
if (client_fd < 0)
return client_fd;
if (event->data_type != FSNOTIFY_EVENT_FILE) {
WARN_ON(1);
put_unused_fd(client_fd);
return -EINVAL;
}
/*
* we need a new file handle for the userspace program so it can read even if it was
* originally opened O_WRONLY.
*/
dentry = dget(event->file->f_path.dentry);
mnt = mntget(event->file->f_path.mnt);
/* it's possible this event was an overflow event. in that case dentry and mnt
* are NULL; That's fine, just don't call dentry open */
if (dentry && mnt)
new_file = dentry_open(dentry, mnt,
group->fanotify_data.f_flags | FMODE_NONOTIFY,
current_cred());
else
new_file = ERR_PTR(-EOVERFLOW);
if (IS_ERR(new_file)) {
/*
* we still send an event even if we can't open the file. this
* can happen when say tasks are gone and we try to open their
* /proc files or we try to open a WRONLY file like in sysfs
* we just send the errno to userspace since there isn't much
* else we can do.
*/
put_unused_fd(client_fd);
client_fd = PTR_ERR(new_file);
} else {
fd_install(client_fd, new_file);
}
return client_fd;
}
static ssize_t fill_event_metadata(struct fsnotify_group *group,
struct fanotify_event_metadata *metadata,
struct fsnotify_event *event)
{
pr_debug("%s: group=%p metadata=%p event=%p\n", __func__,
group, metadata, event);
metadata->event_len = FAN_EVENT_METADATA_LEN;
metadata->vers = FANOTIFY_METADATA_VERSION;
metadata->mask = event->mask & FAN_ALL_OUTGOING_EVENTS;
metadata->pid = pid_vnr(event->tgid);
metadata->fd = create_fd(group, event);
return metadata->fd;
}
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
static struct fanotify_response_event *dequeue_re(struct fsnotify_group *group,
__s32 fd)
{
struct fanotify_response_event *re, *return_re = NULL;
mutex_lock(&group->fanotify_data.access_mutex);
list_for_each_entry(re, &group->fanotify_data.access_list, list) {
if (re->fd != fd)
continue;
list_del_init(&re->list);
return_re = re;
break;
}
mutex_unlock(&group->fanotify_data.access_mutex);
pr_debug("%s: found return_re=%p\n", __func__, return_re);
return return_re;
}
static int process_access_response(struct fsnotify_group *group,
struct fanotify_response *response_struct)
{
struct fanotify_response_event *re;
__s32 fd = response_struct->fd;
__u32 response = response_struct->response;
pr_debug("%s: group=%p fd=%d response=%d\n", __func__, group,
fd, response);
/*
* make sure the response is valid, if invalid we do nothing and either
* userspace can send a valid responce or we will clean it up after the
* timeout
*/
switch (response) {
case FAN_ALLOW:
case FAN_DENY:
break;
default:
return -EINVAL;
}
if (fd < 0)
return -EINVAL;
re = dequeue_re(group, fd);
if (!re)
return -ENOENT;
re->event->response = response;
wake_up(&group->fanotify_data.access_waitq);
kmem_cache_free(fanotify_response_event_cache, re);
return 0;
}
static int prepare_for_access_response(struct fsnotify_group *group,
struct fsnotify_event *event,
__s32 fd)
{
struct fanotify_response_event *re;
if (!(event->mask & FAN_ALL_PERM_EVENTS))
return 0;
re = kmem_cache_alloc(fanotify_response_event_cache, GFP_KERNEL);
if (!re)
return -ENOMEM;
re->event = event;
re->fd = fd;
mutex_lock(&group->fanotify_data.access_mutex);
list_add_tail(&re->list, &group->fanotify_data.access_list);
mutex_unlock(&group->fanotify_data.access_mutex);
return 0;
}
static void remove_access_response(struct fsnotify_group *group,
struct fsnotify_event *event,
__s32 fd)
{
struct fanotify_response_event *re;
if (!(event->mask & FAN_ALL_PERM_EVENTS))
return;
re = dequeue_re(group, fd);
if (!re)
return;
BUG_ON(re->event != event);
kmem_cache_free(fanotify_response_event_cache, re);
return;
}
#else
static int prepare_for_access_response(struct fsnotify_group *group,
struct fsnotify_event *event,
__s32 fd)
{
return 0;
}
static void remove_access_response(struct fsnotify_group *group,
struct fsnotify_event *event,
__s32 fd)
{
return;
}
#endif
static ssize_t copy_event_to_user(struct fsnotify_group *group,
struct fsnotify_event *event,
char __user *buf)
{
struct fanotify_event_metadata fanotify_event_metadata;
int fd, ret;
pr_debug("%s: group=%p event=%p\n", __func__, group, event);
fd = fill_event_metadata(group, &fanotify_event_metadata, event);
if (fd < 0)
return fd;
ret = prepare_for_access_response(group, event, fd);
if (ret)
goto out_close_fd;
ret = -EFAULT;
if (copy_to_user(buf, &fanotify_event_metadata, FAN_EVENT_METADATA_LEN))
goto out_kill_access_response;
return FAN_EVENT_METADATA_LEN;
out_kill_access_response:
remove_access_response(group, event, fd);
out_close_fd:
sys_close(fd);
return ret;
}
/* intofiy userspace file descriptor functions */
static unsigned int fanotify_poll(struct file *file, poll_table *wait)
{
struct fsnotify_group *group = file->private_data;
int ret = 0;
poll_wait(file, &group->notification_waitq, wait);
mutex_lock(&group->notification_mutex);
if (!fsnotify_notify_queue_is_empty(group))
ret = POLLIN | POLLRDNORM;
mutex_unlock(&group->notification_mutex);
return ret;
}
static ssize_t fanotify_read(struct file *file, char __user *buf,
size_t count, loff_t *pos)
{
struct fsnotify_group *group;
struct fsnotify_event *kevent;
char __user *start;
int ret;
DEFINE_WAIT(wait);
start = buf;
group = file->private_data;
pr_debug("%s: group=%p\n", __func__, group);
while (1) {
prepare_to_wait(&group->notification_waitq, &wait, TASK_INTERRUPTIBLE);
mutex_lock(&group->notification_mutex);
kevent = get_one_event(group, count);
mutex_unlock(&group->notification_mutex);
if (kevent) {
ret = PTR_ERR(kevent);
if (IS_ERR(kevent))
break;
ret = copy_event_to_user(group, kevent, buf);
fsnotify_put_event(kevent);
if (ret < 0)
break;
buf += ret;
count -= ret;
continue;
}
ret = -EAGAIN;
if (file->f_flags & O_NONBLOCK)
break;
ret = -EINTR;
if (signal_pending(current))
break;
if (start != buf)
break;
schedule();
}
finish_wait(&group->notification_waitq, &wait);
if (start != buf && ret != -EFAULT)
ret = buf - start;
return ret;
}
static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
{
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
struct fanotify_response response = { .fd = -1, .response = -1 };
struct fsnotify_group *group;
int ret;
group = file->private_data;
if (count > sizeof(response))
count = sizeof(response);
pr_debug("%s: group=%p count=%zu\n", __func__, group, count);
if (copy_from_user(&response, buf, count))
return -EFAULT;
ret = process_access_response(group, &response);
if (ret < 0)
count = ret;
return count;
#else
return -EINVAL;
#endif
}
static int fanotify_release(struct inode *ignored, struct file *file)
{
struct fsnotify_group *group = file->private_data;
pr_debug("%s: file=%p group=%p\n", __func__, file, group);
/* matches the fanotify_init->fsnotify_alloc_group */
fsnotify_put_group(group);
return 0;
}
static long fanotify_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
struct fsnotify_group *group;
struct fsnotify_event_holder *holder;
void __user *p;
int ret = -ENOTTY;
size_t send_len = 0;
group = file->private_data;
p = (void __user *) arg;
switch (cmd) {
case FIONREAD:
mutex_lock(&group->notification_mutex);
list_for_each_entry(holder, &group->notification_list, event_list)
send_len += FAN_EVENT_METADATA_LEN;
mutex_unlock(&group->notification_mutex);
ret = put_user(send_len, (int __user *) p);
break;
}
return ret;
}
static const struct file_operations fanotify_fops = {
.poll = fanotify_poll,
.read = fanotify_read,
.write = fanotify_write,
.fasync = NULL,
.release = fanotify_release,
.unlocked_ioctl = fanotify_ioctl,
.compat_ioctl = fanotify_ioctl,
};
static void fanotify_free_mark(struct fsnotify_mark *fsn_mark)
{
kmem_cache_free(fanotify_mark_cache, fsn_mark);
}
static int fanotify_find_path(int dfd, const char __user *filename,
struct path *path, unsigned int flags)
{
int ret;
pr_debug("%s: dfd=%d filename=%p flags=%x\n", __func__,
dfd, filename, flags);
if (filename == NULL) {
struct file *file;
int fput_needed;
ret = -EBADF;
file = fget_light(dfd, &fput_needed);
if (!file)
goto out;
ret = -ENOTDIR;
if ((flags & FAN_MARK_ONLYDIR) &&
!(S_ISDIR(file->f_path.dentry->d_inode->i_mode))) {
fput_light(file, fput_needed);
goto out;
}
*path = file->f_path;
path_get(path);
fput_light(file, fput_needed);
} else {
unsigned int lookup_flags = 0;
if (!(flags & FAN_MARK_DONT_FOLLOW))
lookup_flags |= LOOKUP_FOLLOW;
if (flags & FAN_MARK_ONLYDIR)
lookup_flags |= LOOKUP_DIRECTORY;
ret = user_path_at(dfd, filename, lookup_flags, path);
if (ret)
goto out;
}
/* you can only watch an inode if you have read permissions on it */
ret = inode_permission(path->dentry->d_inode, MAY_READ);
if (ret)
path_put(path);
out:
return ret;
}
static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark,
__u32 mask,
unsigned int flags)
{
__u32 oldmask;
spin_lock(&fsn_mark->lock);
if (!(flags & FAN_MARK_IGNORED_MASK)) {
oldmask = fsn_mark->mask;
fsnotify_set_mark_mask_locked(fsn_mark, (oldmask & ~mask));
} else {
oldmask = fsn_mark->ignored_mask;
fsnotify_set_mark_ignored_mask_locked(fsn_mark, (oldmask & ~mask));
}
spin_unlock(&fsn_mark->lock);
if (!(oldmask & ~mask))
fsnotify_destroy_mark(fsn_mark);
return mask & oldmask;
}
static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group,
struct vfsmount *mnt, __u32 mask,
unsigned int flags)
{
struct fsnotify_mark *fsn_mark = NULL;
__u32 removed;
fsn_mark = fsnotify_find_vfsmount_mark(group, mnt);
if (!fsn_mark)
return -ENOENT;
removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags);
fsnotify_put_mark(fsn_mark);
if (removed & mnt->mnt_fsnotify_mask)
fsnotify_recalc_vfsmount_mask(mnt);
return 0;
}
static int fanotify_remove_inode_mark(struct fsnotify_group *group,
struct inode *inode, __u32 mask,
unsigned int flags)
{
struct fsnotify_mark *fsn_mark = NULL;
__u32 removed;
fsn_mark = fsnotify_find_inode_mark(group, inode);
if (!fsn_mark)
return -ENOENT;
removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags);
/* matches the fsnotify_find_inode_mark() */
fsnotify_put_mark(fsn_mark);
if (removed & inode->i_fsnotify_mask)
fsnotify_recalc_inode_mask(inode);
return 0;
}
static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
__u32 mask,
unsigned int flags)
{
__u32 oldmask;
spin_lock(&fsn_mark->lock);
if (!(flags & FAN_MARK_IGNORED_MASK)) {
oldmask = fsn_mark->mask;
fsnotify_set_mark_mask_locked(fsn_mark, (oldmask | mask));
} else {
oldmask = fsn_mark->ignored_mask;
fsnotify_set_mark_ignored_mask_locked(fsn_mark, (oldmask | mask));
if (flags & FAN_MARK_IGNORED_SURV_MODIFY)
fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY;
}
spin_unlock(&fsn_mark->lock);
return mask & ~oldmask;
}
static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
struct vfsmount *mnt, __u32 mask,
unsigned int flags)
{
struct fsnotify_mark *fsn_mark;
__u32 added;
fsn_mark = fsnotify_find_vfsmount_mark(group, mnt);
if (!fsn_mark) {
int ret;
fsn_mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL);
if (!fsn_mark)
return -ENOMEM;
fsnotify_init_mark(fsn_mark, fanotify_free_mark);
ret = fsnotify_add_mark(fsn_mark, group, NULL, mnt, 0);
if (ret) {
fanotify_free_mark(fsn_mark);
return ret;
}
}
added = fanotify_mark_add_to_mask(fsn_mark, mask, flags);
fsnotify_put_mark(fsn_mark);
if (added & ~mnt->mnt_fsnotify_mask)
fsnotify_recalc_vfsmount_mask(mnt);
return 0;
}
static int fanotify_add_inode_mark(struct fsnotify_group *group,
struct inode *inode, __u32 mask,
unsigned int flags)
{
struct fsnotify_mark *fsn_mark;
__u32 added;
pr_debug("%s: group=%p inode=%p\n", __func__, group, inode);
fsn_mark = fsnotify_find_inode_mark(group, inode);
if (!fsn_mark) {
int ret;
fsn_mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL);
if (!fsn_mark)
return -ENOMEM;
fsnotify_init_mark(fsn_mark, fanotify_free_mark);
ret = fsnotify_add_mark(fsn_mark, group, inode, NULL, 0);
if (ret) {
fanotify_free_mark(fsn_mark);
return ret;
}
}
added = fanotify_mark_add_to_mask(fsn_mark, mask, flags);
fsnotify_put_mark(fsn_mark);
if (added & ~inode->i_fsnotify_mask)
fsnotify_recalc_inode_mask(inode);
return 0;
}
/* fanotify syscalls */
SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
{
struct fsnotify_group *group;
int f_flags, fd;
pr_debug("%s: flags=%d event_f_flags=%d\n",
__func__, flags, event_f_flags);
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
if (flags & ~FAN_ALL_INIT_FLAGS)
return -EINVAL;
f_flags = O_RDWR | FMODE_NONOTIFY;
if (flags & FAN_CLOEXEC)
f_flags |= O_CLOEXEC;
if (flags & FAN_NONBLOCK)
f_flags |= O_NONBLOCK;
/* fsnotify_alloc_group takes a ref. Dropped in fanotify_release */
group = fsnotify_alloc_group(&fanotify_fsnotify_ops);
if (IS_ERR(group))
return PTR_ERR(group);
group->fanotify_data.f_flags = event_f_flags;
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
mutex_init(&group->fanotify_data.access_mutex);
init_waitqueue_head(&group->fanotify_data.access_waitq);
INIT_LIST_HEAD(&group->fanotify_data.access_list);
#endif
fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags);
if (fd < 0)
goto out_put_group;
return fd;
out_put_group:
fsnotify_put_group(group);
return fd;
}
SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
__u64 mask, int dfd,
const char __user * pathname)
{
struct inode *inode = NULL;
struct vfsmount *mnt = NULL;
struct fsnotify_group *group;
struct file *filp;
struct path path;
int ret, fput_needed;
pr_debug("%s: fanotify_fd=%d flags=%x dfd=%d pathname=%p mask=%llx\n",
__func__, fanotify_fd, flags, dfd, pathname, mask);
/* we only use the lower 32 bits as of right now. */
if (mask & ((__u64)0xffffffff << 32))
return -EINVAL;
if (flags & ~FAN_ALL_MARK_FLAGS)
return -EINVAL;
switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) {
case FAN_MARK_ADD:
case FAN_MARK_REMOVE:
case FAN_MARK_FLUSH:
break;
default:
return -EINVAL;
}
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
if (mask & ~(FAN_ALL_EVENTS | FAN_ALL_PERM_EVENTS | FAN_EVENT_ON_CHILD))
#else
if (mask & ~(FAN_ALL_EVENTS | FAN_EVENT_ON_CHILD))
#endif
return -EINVAL;
filp = fget_light(fanotify_fd, &fput_needed);
if (unlikely(!filp))
return -EBADF;
/* verify that this is indeed an fanotify instance */
ret = -EINVAL;
if (unlikely(filp->f_op != &fanotify_fops))
goto fput_and_out;
ret = fanotify_find_path(dfd, pathname, &path, flags);
if (ret)
goto fput_and_out;
/* inode held in place by reference to path; group by fget on fd */
if (!(flags & FAN_MARK_MOUNT))
inode = path.dentry->d_inode;
else
mnt = path.mnt;
group = filp->private_data;
/* create/update an inode mark */
switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) {
case FAN_MARK_ADD:
if (flags & FAN_MARK_MOUNT)
ret = fanotify_add_vfsmount_mark(group, mnt, mask, flags);
else
ret = fanotify_add_inode_mark(group, inode, mask, flags);
break;
case FAN_MARK_REMOVE:
if (flags & FAN_MARK_MOUNT)
ret = fanotify_remove_vfsmount_mark(group, mnt, mask, flags);
else
ret = fanotify_remove_inode_mark(group, inode, mask, flags);
break;
case FAN_MARK_FLUSH:
if (flags & FAN_MARK_MOUNT)
fsnotify_clear_vfsmount_marks_by_group(group);
else
fsnotify_clear_inode_marks_by_group(group);
break;
default:
ret = -EINVAL;
}
path_put(&path);
fput_and_out:
fput_light(filp, fput_needed);
return ret;
}
#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
asmlinkage long SyS_fanotify_mark(long fanotify_fd, long flags, __u64 mask,
long dfd, long pathname)
{
return SYSC_fanotify_mark((int) fanotify_fd, (unsigned int) flags,
mask, (int) dfd,
(const char __user *) pathname);
}
SYSCALL_ALIAS(sys_fanotify_mark, SyS_fanotify_mark);
#endif
/*
* fanotify_user_setup - Our initialization function. Note that we cannnot return
* error because we have compiled-in VFS hooks. So an (unlikely) failure here
* must result in panic().
*/
static int __init fanotify_user_setup(void)
{
fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, SLAB_PANIC);
fanotify_response_event_cache = KMEM_CACHE(fanotify_response_event,
SLAB_PANIC);
return 0;
}
device_initcall(fanotify_user_setup);

View file

@ -21,6 +21,7 @@
#include <linux/gfp.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mount.h>
#include <linux/srcu.h>
#include <linux/fsnotify_backend.h>
@ -35,6 +36,11 @@ void __fsnotify_inode_delete(struct inode *inode)
}
EXPORT_SYMBOL_GPL(__fsnotify_inode_delete);
void __fsnotify_vfsmount_delete(struct vfsmount *mnt)
{
fsnotify_clear_marks_by_mount(mnt);
}
/*
* Given an inode, first check if we care what happens to our children. Inotify
* and dnotify both tell their parents about events. If we care about any event
@ -78,13 +84,16 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode)
}
/* Notify this dentry's parent about a child's events. */
void __fsnotify_parent(struct dentry *dentry, __u32 mask)
void __fsnotify_parent(struct file *file, struct dentry *dentry, __u32 mask)
{
struct dentry *parent;
struct inode *p_inode;
bool send = false;
bool should_update_children = false;
if (!dentry)
dentry = file->f_path.dentry;
if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED))
return;
@ -115,8 +124,12 @@ void __fsnotify_parent(struct dentry *dentry, __u32 mask)
* specifies these are events which came from a child. */
mask |= FS_EVENT_ON_CHILD;
fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE,
dentry->d_name.name, 0);
if (file)
fsnotify(p_inode, mask, file, FSNOTIFY_EVENT_FILE,
dentry->d_name.name, 0);
else
fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE,
dentry->d_name.name, 0);
dput(parent);
}
@ -127,63 +140,181 @@ void __fsnotify_parent(struct dentry *dentry, __u32 mask)
}
EXPORT_SYMBOL_GPL(__fsnotify_parent);
static int send_to_group(struct inode *to_tell, struct vfsmount *mnt,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *vfsmount_mark,
__u32 mask, void *data,
int data_is, u32 cookie,
const unsigned char *file_name,
struct fsnotify_event **event)
{
struct fsnotify_group *group = inode_mark->group;
__u32 inode_test_mask = (mask & ~FS_EVENT_ON_CHILD);
__u32 vfsmount_test_mask = (mask & ~FS_EVENT_ON_CHILD);
pr_debug("%s: group=%p to_tell=%p mnt=%p mark=%p mask=%x data=%p"
" data_is=%d cookie=%d event=%p\n", __func__, group, to_tell,
mnt, inode_mark, mask, data, data_is, cookie, *event);
/* clear ignored on inode modification */
if (mask & FS_MODIFY) {
if (inode_mark &&
!(inode_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
inode_mark->ignored_mask = 0;
if (vfsmount_mark &&
!(vfsmount_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
vfsmount_mark->ignored_mask = 0;
}
/* does the inode mark tell us to do something? */
if (inode_mark) {
inode_test_mask &= inode_mark->mask;
inode_test_mask &= ~inode_mark->ignored_mask;
}
/* does the vfsmount_mark tell us to do something? */
if (vfsmount_mark) {
vfsmount_test_mask &= vfsmount_mark->mask;
vfsmount_test_mask &= ~vfsmount_mark->ignored_mask;
if (inode_mark)
vfsmount_test_mask &= ~inode_mark->ignored_mask;
}
if (!inode_test_mask && !vfsmount_test_mask)
return 0;
if (group->ops->should_send_event(group, to_tell, inode_mark,
vfsmount_mark, mask, data,
data_is) == false)
return 0;
if (!*event) {
*event = fsnotify_create_event(to_tell, mask, data,
data_is, file_name,
cookie, GFP_KERNEL);
if (!*event)
return -ENOMEM;
}
return group->ops->handle_event(group, inode_mark, vfsmount_mark, *event);
}
/*
* This is the main call to fsnotify. The VFS calls into hook specific functions
* in linux/fsnotify.h. Those functions then in turn call here. Here will call
* out to all of the registered fsnotify_group. Those groups can then use the
* notification event in whatever means they feel necessary.
*/
void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, const char *file_name, u32 cookie)
int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
const unsigned char *file_name, u32 cookie)
{
struct fsnotify_group *group;
struct hlist_node *inode_node, *vfsmount_node;
struct fsnotify_mark *inode_mark = NULL, *vfsmount_mark = NULL;
struct fsnotify_group *inode_group, *vfsmount_group;
struct fsnotify_event *event = NULL;
int idx;
struct vfsmount *mnt;
int idx, ret = 0;
bool used_inode = false, used_vfsmount = false;
/* global tests shouldn't care about events on child only the specific event */
__u32 test_mask = (mask & ~FS_EVENT_ON_CHILD);
if (list_empty(&fsnotify_groups))
return;
if (data_is == FSNOTIFY_EVENT_FILE)
mnt = ((struct file *)data)->f_path.mnt;
else
mnt = NULL;
if (!(test_mask & fsnotify_mask))
return;
if (!(test_mask & to_tell->i_fsnotify_mask))
return;
/*
* SRCU!! the groups list is very very much read only and the path is
* very hot. The VAST majority of events are not going to need to do
* anything other than walk the list so it's crazy to pre-allocate.
* if this is a modify event we may need to clear the ignored masks
* otherwise return if neither the inode nor the vfsmount care about
* this type of event.
*/
idx = srcu_read_lock(&fsnotify_grp_srcu);
list_for_each_entry_rcu(group, &fsnotify_groups, group_list) {
if (test_mask & group->mask) {
if (!group->ops->should_send_event(group, to_tell, mask))
continue;
if (!event) {
event = fsnotify_create_event(to_tell, mask, data,
data_is, file_name, cookie,
GFP_KERNEL);
/* shit, we OOM'd and now we can't tell, maybe
* someday someone else will want to do something
* here */
if (!event)
break;
}
group->ops->handle_event(group, event);
}
if (!(mask & FS_MODIFY) &&
!(test_mask & to_tell->i_fsnotify_mask) &&
!(mnt && test_mask & mnt->mnt_fsnotify_mask))
return 0;
idx = srcu_read_lock(&fsnotify_mark_srcu);
if ((mask & FS_MODIFY) ||
(test_mask & to_tell->i_fsnotify_mask))
inode_node = srcu_dereference(to_tell->i_fsnotify_marks.first,
&fsnotify_mark_srcu);
else
inode_node = NULL;
if (mnt) {
if ((mask & FS_MODIFY) ||
(test_mask & mnt->mnt_fsnotify_mask))
vfsmount_node = srcu_dereference(mnt->mnt_fsnotify_marks.first,
&fsnotify_mark_srcu);
else
vfsmount_node = NULL;
} else {
mnt = NULL;
vfsmount_node = NULL;
}
srcu_read_unlock(&fsnotify_grp_srcu, idx);
while (inode_node || vfsmount_node) {
if (inode_node) {
inode_mark = hlist_entry(srcu_dereference(inode_node, &fsnotify_mark_srcu),
struct fsnotify_mark, i.i_list);
inode_group = inode_mark->group;
} else
inode_group = (void *)-1;
if (vfsmount_node) {
vfsmount_mark = hlist_entry(srcu_dereference(vfsmount_node, &fsnotify_mark_srcu),
struct fsnotify_mark, m.m_list);
vfsmount_group = vfsmount_mark->group;
} else
vfsmount_group = (void *)-1;
if (inode_group < vfsmount_group) {
/* handle inode */
send_to_group(to_tell, NULL, inode_mark, NULL, mask, data,
data_is, cookie, file_name, &event);
used_inode = true;
} else if (vfsmount_group < inode_group) {
send_to_group(to_tell, mnt, NULL, vfsmount_mark, mask, data,
data_is, cookie, file_name, &event);
used_vfsmount = true;
} else {
send_to_group(to_tell, mnt, inode_mark, vfsmount_mark,
mask, data, data_is, cookie, file_name,
&event);
used_vfsmount = true;
used_inode = true;
}
if (used_inode)
inode_node = srcu_dereference(inode_node->next,
&fsnotify_mark_srcu);
if (used_vfsmount)
vfsmount_node = srcu_dereference(vfsmount_node->next,
&fsnotify_mark_srcu);
}
srcu_read_unlock(&fsnotify_mark_srcu, idx);
/*
* fsnotify_create_event() took a reference so the event can't be cleaned
* up while we are still trying to add it to lists, drop that one.
*/
if (event)
fsnotify_put_event(event);
return ret;
}
EXPORT_SYMBOL_GPL(fsnotify);
static __init int fsnotify_init(void)
{
return init_srcu_struct(&fsnotify_grp_srcu);
int ret;
BUG_ON(hweight32(ALL_FSNOTIFY_EVENTS) != 23);
ret = init_srcu_struct(&fsnotify_mark_srcu);
if (ret)
panic("initializing fsnotify_mark_srcu");
return 0;
}
subsys_initcall(fsnotify_init);
core_initcall(fsnotify_init);

View file

@ -6,21 +6,34 @@
#include <linux/srcu.h>
#include <linux/types.h>
/* protects reads of fsnotify_groups */
extern struct srcu_struct fsnotify_grp_srcu;
/* all groups which receive fsnotify events */
extern struct list_head fsnotify_groups;
/* all bitwise OR of all event types (FS_*) for all fsnotify_groups */
extern __u32 fsnotify_mask;
/* destroy all events sitting in this groups notification queue */
extern void fsnotify_flush_notify(struct fsnotify_group *group);
/* protects reads of inode and vfsmount marks list */
extern struct srcu_struct fsnotify_mark_srcu;
extern void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *fsn_mark,
__u32 mask);
/* add a mark to an inode */
extern int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
struct fsnotify_group *group, struct inode *inode,
int allow_dups);
/* add a mark to a vfsmount */
extern int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
struct fsnotify_group *group, struct vfsmount *mnt,
int allow_dups);
/* final kfree of a group */
extern void fsnotify_final_destroy_group(struct fsnotify_group *group);
/* vfsmount specific destruction of a mark */
extern void fsnotify_destroy_vfsmount_mark(struct fsnotify_mark *mark);
/* inode specific destruction of a mark */
extern void fsnotify_destroy_inode_mark(struct fsnotify_mark *mark);
/* run the list of all marks associated with inode and flag them to be freed */
extern void fsnotify_clear_marks_by_inode(struct inode *inode);
/* run the list of all marks associated with vfsmount and flag them to be freed */
extern void fsnotify_clear_marks_by_mount(struct vfsmount *mnt);
/*
* update the dentry->d_flags of all of inode's children to indicate if inode cares
* about events that happen to its children.

View file

@ -28,64 +28,6 @@
#include <asm/atomic.h>
/* protects writes to fsnotify_groups and fsnotify_mask */
static DEFINE_MUTEX(fsnotify_grp_mutex);
/* protects reads while running the fsnotify_groups list */
struct srcu_struct fsnotify_grp_srcu;
/* all groups registered to receive filesystem notifications */
LIST_HEAD(fsnotify_groups);
/* bitwise OR of all events (FS_*) interesting to some group on this system */
__u32 fsnotify_mask;
/*
* When a new group registers or changes it's set of interesting events
* this function updates the fsnotify_mask to contain all interesting events
*/
void fsnotify_recalc_global_mask(void)
{
struct fsnotify_group *group;
__u32 mask = 0;
int idx;
idx = srcu_read_lock(&fsnotify_grp_srcu);
list_for_each_entry_rcu(group, &fsnotify_groups, group_list)
mask |= group->mask;
srcu_read_unlock(&fsnotify_grp_srcu, idx);
fsnotify_mask = mask;
}
/*
* Update the group->mask by running all of the marks associated with this
* group and finding the bitwise | of all of the mark->mask. If we change
* the group->mask we need to update the global mask of events interesting
* to the system.
*/
void fsnotify_recalc_group_mask(struct fsnotify_group *group)
{
__u32 mask = 0;
__u32 old_mask = group->mask;
struct fsnotify_mark_entry *entry;
spin_lock(&group->mark_lock);
list_for_each_entry(entry, &group->mark_entries, g_list)
mask |= entry->mask;
spin_unlock(&group->mark_lock);
group->mask = mask;
if (old_mask != mask)
fsnotify_recalc_global_mask();
}
/*
* Take a reference to a group so things found under the fsnotify_grp_mutex
* can't get freed under us
*/
static void fsnotify_get_group(struct fsnotify_group *group)
{
atomic_inc(&group->refcnt);
}
/*
* Final freeing of a group
*/
@ -110,145 +52,53 @@ void fsnotify_final_destroy_group(struct fsnotify_group *group)
*/
static void fsnotify_destroy_group(struct fsnotify_group *group)
{
/* clear all inode mark entries for this group */
/* clear all inode marks for this group */
fsnotify_clear_marks_by_group(group);
synchronize_srcu(&fsnotify_mark_srcu);
/* past the point of no return, matches the initial value of 1 */
if (atomic_dec_and_test(&group->num_marks))
fsnotify_final_destroy_group(group);
}
/*
* Remove this group from the global list of groups that will get events
* this can be done even if there are still references and things still using
* this group. This just stops the group from getting new events.
*/
static void __fsnotify_evict_group(struct fsnotify_group *group)
{
BUG_ON(!mutex_is_locked(&fsnotify_grp_mutex));
if (group->on_group_list)
list_del_rcu(&group->group_list);
group->on_group_list = 0;
}
/*
* Called when a group is no longer interested in getting events. This can be
* used if a group is misbehaving or if for some reason a group should no longer
* get any filesystem events.
*/
void fsnotify_evict_group(struct fsnotify_group *group)
{
mutex_lock(&fsnotify_grp_mutex);
__fsnotify_evict_group(group);
mutex_unlock(&fsnotify_grp_mutex);
}
/*
* Drop a reference to a group. Free it if it's through.
*/
void fsnotify_put_group(struct fsnotify_group *group)
{
if (!atomic_dec_and_mutex_lock(&group->refcnt, &fsnotify_grp_mutex))
return;
/*
* OK, now we know that there's no other users *and* we hold mutex,
* so no new references will appear
*/
__fsnotify_evict_group(group);
/*
* now it's off the list, so the only thing we might care about is
* srcu access....
*/
mutex_unlock(&fsnotify_grp_mutex);
synchronize_srcu(&fsnotify_grp_srcu);
/* and now it is really dead. _Nothing_ could be seeing it */
fsnotify_recalc_global_mask();
fsnotify_destroy_group(group);
if (atomic_dec_and_test(&group->refcnt))
fsnotify_destroy_group(group);
}
/*
* Simply run the fsnotify_groups list and find a group which matches
* the given parameters. If a group is found we take a reference to that
* group.
* Create a new fsnotify_group and hold a reference for the group returned.
*/
static struct fsnotify_group *fsnotify_find_group(unsigned int group_num, __u32 mask,
const struct fsnotify_ops *ops)
struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
{
struct fsnotify_group *group_iter;
struct fsnotify_group *group = NULL;
struct fsnotify_group *group;
BUG_ON(!mutex_is_locked(&fsnotify_grp_mutex));
list_for_each_entry_rcu(group_iter, &fsnotify_groups, group_list) {
if (group_iter->group_num == group_num) {
if ((group_iter->mask == mask) &&
(group_iter->ops == ops)) {
fsnotify_get_group(group_iter);
group = group_iter;
} else
group = ERR_PTR(-EEXIST);
}
}
return group;
}
/*
* Either finds an existing group which matches the group_num, mask, and ops or
* creates a new group and adds it to the global group list. In either case we
* take a reference for the group returned.
*/
struct fsnotify_group *fsnotify_obtain_group(unsigned int group_num, __u32 mask,
const struct fsnotify_ops *ops)
{
struct fsnotify_group *group, *tgroup;
/* very low use, simpler locking if we just always alloc */
group = kmalloc(sizeof(struct fsnotify_group), GFP_KERNEL);
group = kzalloc(sizeof(struct fsnotify_group), GFP_KERNEL);
if (!group)
return ERR_PTR(-ENOMEM);
/* set to 0 when there a no external references to this group */
atomic_set(&group->refcnt, 1);
group->on_group_list = 0;
group->group_num = group_num;
group->mask = mask;
/*
* hits 0 when there are no external references AND no marks for
* this group
*/
atomic_set(&group->num_marks, 1);
mutex_init(&group->notification_mutex);
INIT_LIST_HEAD(&group->notification_list);
init_waitqueue_head(&group->notification_waitq);
group->q_len = 0;
group->max_events = UINT_MAX;
spin_lock_init(&group->mark_lock);
atomic_set(&group->num_marks, 0);
INIT_LIST_HEAD(&group->mark_entries);
INIT_LIST_HEAD(&group->marks_list);
group->ops = ops;
mutex_lock(&fsnotify_grp_mutex);
tgroup = fsnotify_find_group(group_num, mask, ops);
if (tgroup) {
/* group already exists */
mutex_unlock(&fsnotify_grp_mutex);
/* destroy the new one we made */
fsnotify_put_group(group);
return tgroup;
}
/* group not found, add a new one */
list_add_rcu(&group->group_list, &fsnotify_groups);
group->on_group_list = 1;
/* being on the fsnotify_groups list holds one num_marks */
atomic_inc(&group->num_marks);
mutex_unlock(&fsnotify_grp_mutex);
if (mask)
fsnotify_recalc_global_mask();
return group;
}

View file

@ -16,72 +16,6 @@
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* fsnotify inode mark locking/lifetime/and refcnting
*
* REFCNT:
* The mark->refcnt tells how many "things" in the kernel currently are
* referencing this object. The object typically will live inside the kernel
* with a refcnt of 2, one for each list it is on (i_list, g_list). Any task
* which can find this object holding the appropriete locks, can take a reference
* and the object itself is guarenteed to survive until the reference is dropped.
*
* LOCKING:
* There are 3 spinlocks involved with fsnotify inode marks and they MUST
* be taken in order as follows:
*
* entry->lock
* group->mark_lock
* inode->i_lock
*
* entry->lock protects 2 things, entry->group and entry->inode. You must hold
* that lock to dereference either of these things (they could be NULL even with
* the lock)
*
* group->mark_lock protects the mark_entries list anchored inside a given group
* and each entry is hooked via the g_list. It also sorta protects the
* free_g_list, which when used is anchored by a private list on the stack of the
* task which held the group->mark_lock.
*
* inode->i_lock protects the i_fsnotify_mark_entries list anchored inside a
* given inode and each entry is hooked via the i_list. (and sorta the
* free_i_list)
*
*
* LIFETIME:
* Inode marks survive between when they are added to an inode and when their
* refcnt==0.
*
* The inode mark can be cleared for a number of different reasons including:
* - The inode is unlinked for the last time. (fsnotify_inode_remove)
* - The inode is being evicted from cache. (fsnotify_inode_delete)
* - The fs the inode is on is unmounted. (fsnotify_inode_delete/fsnotify_unmount_inodes)
* - Something explicitly requests that it be removed. (fsnotify_destroy_mark_by_entry)
* - The fsnotify_group associated with the mark is going away and all such marks
* need to be cleaned up. (fsnotify_clear_marks_by_group)
*
* Worst case we are given an inode and need to clean up all the marks on that
* inode. We take i_lock and walk the i_fsnotify_mark_entries safely. For each
* mark on the list we take a reference (so the mark can't disappear under us).
* We remove that mark form the inode's list of marks and we add this mark to a
* private list anchored on the stack using i_free_list; At this point we no
* longer fear anything finding the mark using the inode's list of marks.
*
* We can safely and locklessly run the private list on the stack of everything
* we just unattached from the original inode. For each mark on the private list
* we grab the mark-> and can thus dereference mark->group and mark->inode. If
* we see the group and inode are not NULL we take those locks. Now holding all
* 3 locks we can completely remove the mark from other tasks finding it in the
* future. Remember, 10 things might already be referencing this mark, but they
* better be holding a ref. We drop our reference we took before we unhooked it
* from the inode. When the ref hits 0 we can free the mark.
*
* Very similarly for freeing by group, except we use free_g_list.
*
* This has the very interesting property of being able to run concurrently with
* any (or all) other directions.
*/
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/kernel.h>
@ -95,30 +29,19 @@
#include <linux/fsnotify_backend.h>
#include "fsnotify.h"
void fsnotify_get_mark(struct fsnotify_mark_entry *entry)
{
atomic_inc(&entry->refcnt);
}
void fsnotify_put_mark(struct fsnotify_mark_entry *entry)
{
if (atomic_dec_and_test(&entry->refcnt))
entry->free_mark(entry);
}
/*
* Recalculate the mask of events relevant to a given inode locked.
*/
static void fsnotify_recalc_inode_mask_locked(struct inode *inode)
{
struct fsnotify_mark_entry *entry;
struct fsnotify_mark *mark;
struct hlist_node *pos;
__u32 new_mask = 0;
assert_spin_locked(&inode->i_lock);
hlist_for_each_entry(entry, pos, &inode->i_fsnotify_mark_entries, i_list)
new_mask |= entry->mask;
hlist_for_each_entry(mark, pos, &inode->i_fsnotify_marks, i.i_list)
new_mask |= mark->mask;
inode->i_fsnotify_mask = new_mask;
}
@ -135,107 +58,26 @@ void fsnotify_recalc_inode_mask(struct inode *inode)
__fsnotify_update_child_dentry_flags(inode);
}
/*
* Any time a mark is getting freed we end up here.
* The caller had better be holding a reference to this mark so we don't actually
* do the final put under the entry->lock
*/
void fsnotify_destroy_mark_by_entry(struct fsnotify_mark_entry *entry)
void fsnotify_destroy_inode_mark(struct fsnotify_mark *mark)
{
struct fsnotify_group *group;
struct inode *inode;
struct inode *inode = mark->i.inode;
spin_lock(&entry->lock);
assert_spin_locked(&mark->lock);
assert_spin_locked(&mark->group->mark_lock);
group = entry->group;
inode = entry->inode;
BUG_ON(group && !inode);
BUG_ON(!group && inode);
/* if !group something else already marked this to die */
if (!group) {
spin_unlock(&entry->lock);
return;
}
/* 1 from caller and 1 for being on i_list/g_list */
BUG_ON(atomic_read(&entry->refcnt) < 2);
spin_lock(&group->mark_lock);
spin_lock(&inode->i_lock);
hlist_del_init(&entry->i_list);
entry->inode = NULL;
list_del_init(&entry->g_list);
entry->group = NULL;
fsnotify_put_mark(entry); /* for i_list and g_list */
hlist_del_init_rcu(&mark->i.i_list);
mark->i.inode = NULL;
/*
* this mark is now off the inode->i_fsnotify_mark_entries list and we
* this mark is now off the inode->i_fsnotify_marks list and we
* hold the inode->i_lock, so this is the perfect time to update the
* inode->i_fsnotify_mask
*/
fsnotify_recalc_inode_mask_locked(inode);
spin_unlock(&inode->i_lock);
spin_unlock(&group->mark_lock);
spin_unlock(&entry->lock);
/*
* Some groups like to know that marks are being freed. This is a
* callback to the group function to let it know that this entry
* is being freed.
*/
if (group->ops->freeing_mark)
group->ops->freeing_mark(entry, group);
/*
* __fsnotify_update_child_dentry_flags(inode);
*
* I really want to call that, but we can't, we have no idea if the inode
* still exists the second we drop the entry->lock.
*
* The next time an event arrive to this inode from one of it's children
* __fsnotify_parent will see that the inode doesn't care about it's
* children and will update all of these flags then. So really this
* is just a lazy update (and could be a perf win...)
*/
iput(inode);
/*
* it's possible that this group tried to destroy itself, but this
* this mark was simultaneously being freed by inode. If that's the
* case, we finish freeing the group here.
*/
if (unlikely(atomic_dec_and_test(&group->num_marks)))
fsnotify_final_destroy_group(group);
}
/*
* Given a group, destroy all of the marks associated with that group.
*/
void fsnotify_clear_marks_by_group(struct fsnotify_group *group)
{
struct fsnotify_mark_entry *lentry, *entry;
LIST_HEAD(free_list);
spin_lock(&group->mark_lock);
list_for_each_entry_safe(entry, lentry, &group->mark_entries, g_list) {
list_add(&entry->free_g_list, &free_list);
list_del_init(&entry->g_list);
fsnotify_get_mark(entry);
}
spin_unlock(&group->mark_lock);
list_for_each_entry_safe(entry, lentry, &free_list, free_g_list) {
fsnotify_destroy_mark_by_entry(entry);
fsnotify_put_mark(entry);
}
}
/*
@ -243,112 +85,145 @@ void fsnotify_clear_marks_by_group(struct fsnotify_group *group)
*/
void fsnotify_clear_marks_by_inode(struct inode *inode)
{
struct fsnotify_mark_entry *entry, *lentry;
struct fsnotify_mark *mark, *lmark;
struct hlist_node *pos, *n;
LIST_HEAD(free_list);
spin_lock(&inode->i_lock);
hlist_for_each_entry_safe(entry, pos, n, &inode->i_fsnotify_mark_entries, i_list) {
list_add(&entry->free_i_list, &free_list);
hlist_del_init(&entry->i_list);
fsnotify_get_mark(entry);
hlist_for_each_entry_safe(mark, pos, n, &inode->i_fsnotify_marks, i.i_list) {
list_add(&mark->i.free_i_list, &free_list);
hlist_del_init_rcu(&mark->i.i_list);
fsnotify_get_mark(mark);
}
spin_unlock(&inode->i_lock);
list_for_each_entry_safe(entry, lentry, &free_list, free_i_list) {
fsnotify_destroy_mark_by_entry(entry);
fsnotify_put_mark(entry);
list_for_each_entry_safe(mark, lmark, &free_list, i.free_i_list) {
fsnotify_destroy_mark(mark);
fsnotify_put_mark(mark);
}
}
/*
* Given a group clear all of the inode marks associated with that group.
*/
void fsnotify_clear_inode_marks_by_group(struct fsnotify_group *group)
{
fsnotify_clear_marks_by_group_flags(group, FSNOTIFY_MARK_FLAG_INODE);
}
/*
* given a group and inode, find the mark associated with that combination.
* if found take a reference to that mark and return it, else return NULL
*/
struct fsnotify_mark_entry *fsnotify_find_mark_entry(struct fsnotify_group *group,
struct inode *inode)
struct fsnotify_mark *fsnotify_find_inode_mark_locked(struct fsnotify_group *group,
struct inode *inode)
{
struct fsnotify_mark_entry *entry;
struct fsnotify_mark *mark;
struct hlist_node *pos;
assert_spin_locked(&inode->i_lock);
hlist_for_each_entry(entry, pos, &inode->i_fsnotify_mark_entries, i_list) {
if (entry->group == group) {
fsnotify_get_mark(entry);
return entry;
hlist_for_each_entry(mark, pos, &inode->i_fsnotify_marks, i.i_list) {
if (mark->group == group) {
fsnotify_get_mark(mark);
return mark;
}
}
return NULL;
}
/*
* Nothing fancy, just initialize lists and locks and counters.
* given a group and inode, find the mark associated with that combination.
* if found take a reference to that mark and return it, else return NULL
*/
void fsnotify_init_mark(struct fsnotify_mark_entry *entry,
void (*free_mark)(struct fsnotify_mark_entry *entry))
struct fsnotify_mark *fsnotify_find_inode_mark(struct fsnotify_group *group,
struct inode *inode)
{
spin_lock_init(&entry->lock);
atomic_set(&entry->refcnt, 1);
INIT_HLIST_NODE(&entry->i_list);
entry->group = NULL;
entry->mask = 0;
entry->inode = NULL;
entry->free_mark = free_mark;
struct fsnotify_mark *mark;
spin_lock(&inode->i_lock);
mark = fsnotify_find_inode_mark_locked(group, inode);
spin_unlock(&inode->i_lock);
return mark;
}
/*
* Attach an initialized mark entry to a given group and inode.
* These marks may be used for the fsnotify backend to determine which
* event types should be delivered to which group and for which inodes.
* If we are setting a mark mask on an inode mark we should pin the inode
* in memory.
*/
int fsnotify_add_mark(struct fsnotify_mark_entry *entry,
struct fsnotify_group *group, struct inode *inode)
void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *mark,
__u32 mask)
{
struct fsnotify_mark_entry *lentry;
struct inode *inode;
assert_spin_locked(&mark->lock);
if (mask &&
mark->i.inode &&
!(mark->flags & FSNOTIFY_MARK_FLAG_OBJECT_PINNED)) {
mark->flags |= FSNOTIFY_MARK_FLAG_OBJECT_PINNED;
inode = igrab(mark->i.inode);
/*
* we shouldn't be able to get here if the inode wasn't
* already safely held in memory. But bug in case it
* ever is wrong.
*/
BUG_ON(!inode);
}
}
/*
* Attach an initialized mark to a given inode.
* These marks may be used for the fsnotify backend to determine which
* event types should be delivered to which group and for which inodes. These
* marks are ordered according to the group's location in memory.
*/
int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
struct fsnotify_group *group, struct inode *inode,
int allow_dups)
{
struct fsnotify_mark *lmark;
struct hlist_node *node, *last = NULL;
int ret = 0;
inode = igrab(inode);
if (unlikely(!inode))
return -EINVAL;
mark->flags |= FSNOTIFY_MARK_FLAG_INODE;
assert_spin_locked(&mark->lock);
assert_spin_locked(&group->mark_lock);
/*
* LOCKING ORDER!!!!
* entry->lock
* group->mark_lock
* inode->i_lock
*/
spin_lock(&entry->lock);
spin_lock(&group->mark_lock);
spin_lock(&inode->i_lock);
lentry = fsnotify_find_mark_entry(group, inode);
if (!lentry) {
entry->group = group;
entry->inode = inode;
mark->i.inode = inode;
hlist_add_head(&entry->i_list, &inode->i_fsnotify_mark_entries);
list_add(&entry->g_list, &group->mark_entries);
fsnotify_get_mark(entry); /* for i_list and g_list */
atomic_inc(&group->num_marks);
fsnotify_recalc_inode_mask_locked(inode);
/* is mark the first mark? */
if (hlist_empty(&inode->i_fsnotify_marks)) {
hlist_add_head_rcu(&mark->i.i_list, &inode->i_fsnotify_marks);
goto out;
}
/* should mark be in the middle of the current list? */
hlist_for_each_entry(lmark, node, &inode->i_fsnotify_marks, i.i_list) {
last = node;
if ((lmark->group == group) && !allow_dups) {
ret = -EEXIST;
goto out;
}
if (mark->group < lmark->group)
continue;
hlist_add_before_rcu(&mark->i.i_list, &lmark->i.i_list);
goto out;
}
BUG_ON(last == NULL);
/* mark should be the last entry. last is the current last entry */
hlist_add_after_rcu(last, &mark->i.i_list);
out:
fsnotify_recalc_inode_mask_locked(inode);
spin_unlock(&inode->i_lock);
spin_unlock(&group->mark_lock);
spin_unlock(&entry->lock);
if (lentry) {
ret = -EEXIST;
iput(inode);
fsnotify_put_mark(lentry);
} else {
__fsnotify_update_child_dentry_flags(inode);
}
return ret;
}

View file

@ -1,18 +1,3 @@
config INOTIFY
bool "Inotify file change notification support"
default n
---help---
Say Y here to enable legacy in kernel inotify support. Inotify is a
file change notification system. It is a replacement for dnotify.
This option only provides the legacy inotify in kernel API. There
are no in tree kernel users of this interface since it is deprecated.
You only need this if you are loading an out of tree kernel module
that uses inotify.
For more information, see <file:Documentation/filesystems/inotify.txt>
If unsure, say N.
config INOTIFY_USER
bool "Inotify support for userspace"
select ANON_INODES

View file

@ -1,2 +1 @@
obj-$(CONFIG_INOTIFY) += inotify.o
obj-$(CONFIG_INOTIFY_USER) += inotify_fsnotify.o inotify_user.o

View file

@ -1,872 +0,0 @@
/*
* fs/inotify.c - inode-based file event notifications
*
* Authors:
* John McCutchan <ttb@tentacle.dhs.org>
* Robert Love <rml@novell.com>
*
* Kernel API added by: Amy Griffis <amy.griffis@hp.com>
*
* Copyright (C) 2005 John McCutchan
* Copyright 2006 Hewlett-Packard Development Company, L.P.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2, or (at your option) any
* later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/spinlock.h>
#include <linux/idr.h>
#include <linux/slab.h>
#include <linux/fs.h>
#include <linux/sched.h>
#include <linux/init.h>
#include <linux/list.h>
#include <linux/writeback.h>
#include <linux/inotify.h>
#include <linux/fsnotify_backend.h>
static atomic_t inotify_cookie;
/*
* Lock ordering:
*
* dentry->d_lock (used to keep d_move() away from dentry->d_parent)
* iprune_mutex (synchronize shrink_icache_memory())
* inode_lock (protects the super_block->s_inodes list)
* inode->inotify_mutex (protects inode->inotify_watches and watches->i_list)
* inotify_handle->mutex (protects inotify_handle and watches->h_list)
*
* The inode->inotify_mutex and inotify_handle->mutex and held during execution
* of a caller's event handler. Thus, the caller must not hold any locks
* taken in their event handler while calling any of the published inotify
* interfaces.
*/
/*
* Lifetimes of the three main data structures--inotify_handle, inode, and
* inotify_watch--are managed by reference count.
*
* inotify_handle: Lifetime is from inotify_init() to inotify_destroy().
* Additional references can bump the count via get_inotify_handle() and drop
* the count via put_inotify_handle().
*
* inotify_watch: for inotify's purposes, lifetime is from inotify_add_watch()
* to remove_watch_no_event(). Additional references can bump the count via
* get_inotify_watch() and drop the count via put_inotify_watch(). The caller
* is reponsible for the final put after receiving IN_IGNORED, or when using
* IN_ONESHOT after receiving the first event. Inotify does the final put if
* inotify_destroy() is called.
*
* inode: Pinned so long as the inode is associated with a watch, from
* inotify_add_watch() to the final put_inotify_watch().
*/
/*
* struct inotify_handle - represents an inotify instance
*
* This structure is protected by the mutex 'mutex'.
*/
struct inotify_handle {
struct idr idr; /* idr mapping wd -> watch */
struct mutex mutex; /* protects this bad boy */
struct list_head watches; /* list of watches */
atomic_t count; /* reference count */
u32 last_wd; /* the last wd allocated */
const struct inotify_operations *in_ops; /* inotify caller operations */
};
static inline void get_inotify_handle(struct inotify_handle *ih)
{
atomic_inc(&ih->count);
}
static inline void put_inotify_handle(struct inotify_handle *ih)
{
if (atomic_dec_and_test(&ih->count)) {
idr_destroy(&ih->idr);
kfree(ih);
}
}
/**
* get_inotify_watch - grab a reference to an inotify_watch
* @watch: watch to grab
*/
void get_inotify_watch(struct inotify_watch *watch)
{
atomic_inc(&watch->count);
}
EXPORT_SYMBOL_GPL(get_inotify_watch);
int pin_inotify_watch(struct inotify_watch *watch)
{
struct super_block *sb = watch->inode->i_sb;
if (atomic_inc_not_zero(&sb->s_active)) {
atomic_inc(&watch->count);
return 1;
}
return 0;
}
/**
* put_inotify_watch - decrements the ref count on a given watch. cleans up
* watch references if the count reaches zero. inotify_watch is freed by
* inotify callers via the destroy_watch() op.
* @watch: watch to release
*/
void put_inotify_watch(struct inotify_watch *watch)
{
if (atomic_dec_and_test(&watch->count)) {
struct inotify_handle *ih = watch->ih;
iput(watch->inode);
ih->in_ops->destroy_watch(watch);
put_inotify_handle(ih);
}
}
EXPORT_SYMBOL_GPL(put_inotify_watch);
void unpin_inotify_watch(struct inotify_watch *watch)
{
struct super_block *sb = watch->inode->i_sb;
put_inotify_watch(watch);
deactivate_super(sb);
}
/*
* inotify_handle_get_wd - returns the next WD for use by the given handle
*
* Callers must hold ih->mutex. This function can sleep.
*/
static int inotify_handle_get_wd(struct inotify_handle *ih,
struct inotify_watch *watch)
{
int ret;
do {
if (unlikely(!idr_pre_get(&ih->idr, GFP_NOFS)))
return -ENOSPC;
ret = idr_get_new_above(&ih->idr, watch, ih->last_wd+1, &watch->wd);
} while (ret == -EAGAIN);
if (likely(!ret))
ih->last_wd = watch->wd;
return ret;
}
/*
* inotify_inode_watched - returns nonzero if there are watches on this inode
* and zero otherwise. We call this lockless, we do not care if we race.
*/
static inline int inotify_inode_watched(struct inode *inode)
{
return !list_empty(&inode->inotify_watches);
}
/*
* Get child dentry flag into synch with parent inode.
* Flag should always be clear for negative dentrys.
*/
static void set_dentry_child_flags(struct inode *inode, int watched)
{
struct dentry *alias;
spin_lock(&dcache_lock);
list_for_each_entry(alias, &inode->i_dentry, d_alias) {
struct dentry *child;
list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) {
if (!child->d_inode)
continue;
spin_lock(&child->d_lock);
if (watched)
child->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
else
child->d_flags &=~DCACHE_INOTIFY_PARENT_WATCHED;
spin_unlock(&child->d_lock);
}
}
spin_unlock(&dcache_lock);
}
/*
* inotify_find_handle - find the watch associated with the given inode and
* handle
*
* Callers must hold inode->inotify_mutex.
*/
static struct inotify_watch *inode_find_handle(struct inode *inode,
struct inotify_handle *ih)
{
struct inotify_watch *watch;
list_for_each_entry(watch, &inode->inotify_watches, i_list) {
if (watch->ih == ih)
return watch;
}
return NULL;
}
/*
* remove_watch_no_event - remove watch without the IN_IGNORED event.
*
* Callers must hold both inode->inotify_mutex and ih->mutex.
*/
static void remove_watch_no_event(struct inotify_watch *watch,
struct inotify_handle *ih)
{
list_del(&watch->i_list);
list_del(&watch->h_list);
if (!inotify_inode_watched(watch->inode))
set_dentry_child_flags(watch->inode, 0);
idr_remove(&ih->idr, watch->wd);
}
/**
* inotify_remove_watch_locked - Remove a watch from both the handle and the
* inode. Sends the IN_IGNORED event signifying that the inode is no longer
* watched. May be invoked from a caller's event handler.
* @ih: inotify handle associated with watch
* @watch: watch to remove
*
* Callers must hold both inode->inotify_mutex and ih->mutex.
*/
void inotify_remove_watch_locked(struct inotify_handle *ih,
struct inotify_watch *watch)
{
remove_watch_no_event(watch, ih);
ih->in_ops->handle_event(watch, watch->wd, IN_IGNORED, 0, NULL, NULL);
}
EXPORT_SYMBOL_GPL(inotify_remove_watch_locked);
/* Kernel API for producing events */
/*
* inotify_d_instantiate - instantiate dcache entry for inode
*/
void inotify_d_instantiate(struct dentry *entry, struct inode *inode)
{
struct dentry *parent;
if (!inode)
return;
spin_lock(&entry->d_lock);
parent = entry->d_parent;
if (parent->d_inode && inotify_inode_watched(parent->d_inode))
entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
spin_unlock(&entry->d_lock);
}
/*
* inotify_d_move - dcache entry has been moved
*/
void inotify_d_move(struct dentry *entry)
{
struct dentry *parent;
parent = entry->d_parent;
if (inotify_inode_watched(parent->d_inode))
entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
else
entry->d_flags &= ~DCACHE_INOTIFY_PARENT_WATCHED;
}
/**
* inotify_inode_queue_event - queue an event to all watches on this inode
* @inode: inode event is originating from
* @mask: event mask describing this event
* @cookie: cookie for synchronization, or zero
* @name: filename, if any
* @n_inode: inode associated with name
*/
void inotify_inode_queue_event(struct inode *inode, u32 mask, u32 cookie,
const char *name, struct inode *n_inode)
{
struct inotify_watch *watch, *next;
if (!inotify_inode_watched(inode))
return;
mutex_lock(&inode->inotify_mutex);
list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) {
u32 watch_mask = watch->mask;
if (watch_mask & mask) {
struct inotify_handle *ih= watch->ih;
mutex_lock(&ih->mutex);
if (watch_mask & IN_ONESHOT)
remove_watch_no_event(watch, ih);
ih->in_ops->handle_event(watch, watch->wd, mask, cookie,
name, n_inode);
mutex_unlock(&ih->mutex);
}
}
mutex_unlock(&inode->inotify_mutex);
}
EXPORT_SYMBOL_GPL(inotify_inode_queue_event);
/**
* inotify_dentry_parent_queue_event - queue an event to a dentry's parent
* @dentry: the dentry in question, we queue against this dentry's parent
* @mask: event mask describing this event
* @cookie: cookie for synchronization, or zero
* @name: filename, if any
*/
void inotify_dentry_parent_queue_event(struct dentry *dentry, u32 mask,
u32 cookie, const char *name)
{
struct dentry *parent;
struct inode *inode;
if (!(dentry->d_flags & DCACHE_INOTIFY_PARENT_WATCHED))
return;
spin_lock(&dentry->d_lock);
parent = dentry->d_parent;
inode = parent->d_inode;
if (inotify_inode_watched(inode)) {
dget(parent);
spin_unlock(&dentry->d_lock);
inotify_inode_queue_event(inode, mask, cookie, name,
dentry->d_inode);
dput(parent);
} else
spin_unlock(&dentry->d_lock);
}
EXPORT_SYMBOL_GPL(inotify_dentry_parent_queue_event);
/**
* inotify_get_cookie - return a unique cookie for use in synchronizing events.
*/
u32 inotify_get_cookie(void)
{
return atomic_inc_return(&inotify_cookie);
}
EXPORT_SYMBOL_GPL(inotify_get_cookie);
/**
* inotify_unmount_inodes - an sb is unmounting. handle any watched inodes.
* @list: list of inodes being unmounted (sb->s_inodes)
*
* Called with inode_lock held, protecting the unmounting super block's list
* of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay.
* We temporarily drop inode_lock, however, and CAN block.
*/
void inotify_unmount_inodes(struct list_head *list)
{
struct inode *inode, *next_i, *need_iput = NULL;
list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
struct inotify_watch *watch, *next_w;
struct inode *need_iput_tmp;
struct list_head *watches;
/*
* We cannot __iget() an inode in state I_FREEING,
* I_WILL_FREE, or I_NEW which is fine because by that point
* the inode cannot have any associated watches.
*/
if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW))
continue;
/*
* If i_count is zero, the inode cannot have any watches and
* doing an __iget/iput with MS_ACTIVE clear would actually
* evict all inodes with zero i_count from icache which is
* unnecessarily violent and may in fact be illegal to do.
*/
if (!atomic_read(&inode->i_count))
continue;
need_iput_tmp = need_iput;
need_iput = NULL;
/* In case inotify_remove_watch_locked() drops a reference. */
if (inode != need_iput_tmp)
__iget(inode);
else
need_iput_tmp = NULL;
/* In case the dropping of a reference would nuke next_i. */
if ((&next_i->i_sb_list != list) &&
atomic_read(&next_i->i_count) &&
!(next_i->i_state & (I_FREEING|I_WILL_FREE))) {
__iget(next_i);
need_iput = next_i;
}
/*
* We can safely drop inode_lock here because we hold
* references on both inode and next_i. Also no new inodes
* will be added since the umount has begun. Finally,
* iprune_mutex keeps shrink_icache_memory() away.
*/
spin_unlock(&inode_lock);
if (need_iput_tmp)
iput(need_iput_tmp);
/* for each watch, send IN_UNMOUNT and then remove it */
mutex_lock(&inode->inotify_mutex);
watches = &inode->inotify_watches;
list_for_each_entry_safe(watch, next_w, watches, i_list) {
struct inotify_handle *ih= watch->ih;
get_inotify_watch(watch);
mutex_lock(&ih->mutex);
ih->in_ops->handle_event(watch, watch->wd, IN_UNMOUNT, 0,
NULL, NULL);
inotify_remove_watch_locked(ih, watch);
mutex_unlock(&ih->mutex);
put_inotify_watch(watch);
}
mutex_unlock(&inode->inotify_mutex);
iput(inode);
spin_lock(&inode_lock);
}
}
EXPORT_SYMBOL_GPL(inotify_unmount_inodes);
/**
* inotify_inode_is_dead - an inode has been deleted, cleanup any watches
* @inode: inode that is about to be removed
*/
void inotify_inode_is_dead(struct inode *inode)
{
struct inotify_watch *watch, *next;
mutex_lock(&inode->inotify_mutex);
list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) {
struct inotify_handle *ih = watch->ih;
mutex_lock(&ih->mutex);
inotify_remove_watch_locked(ih, watch);
mutex_unlock(&ih->mutex);
}
mutex_unlock(&inode->inotify_mutex);
}
EXPORT_SYMBOL_GPL(inotify_inode_is_dead);
/* Kernel Consumer API */
/**
* inotify_init - allocate and initialize an inotify instance
* @ops: caller's inotify operations
*/
struct inotify_handle *inotify_init(const struct inotify_operations *ops)
{
struct inotify_handle *ih;
ih = kmalloc(sizeof(struct inotify_handle), GFP_KERNEL);
if (unlikely(!ih))
return ERR_PTR(-ENOMEM);
idr_init(&ih->idr);
INIT_LIST_HEAD(&ih->watches);
mutex_init(&ih->mutex);
ih->last_wd = 0;
ih->in_ops = ops;
atomic_set(&ih->count, 0);
get_inotify_handle(ih);
return ih;
}
EXPORT_SYMBOL_GPL(inotify_init);
/**
* inotify_init_watch - initialize an inotify watch
* @watch: watch to initialize
*/
void inotify_init_watch(struct inotify_watch *watch)
{
INIT_LIST_HEAD(&watch->h_list);
INIT_LIST_HEAD(&watch->i_list);
atomic_set(&watch->count, 0);
get_inotify_watch(watch); /* initial get */
}
EXPORT_SYMBOL_GPL(inotify_init_watch);
/*
* Watch removals suck violently. To kick the watch out we need (in this
* order) inode->inotify_mutex and ih->mutex. That's fine if we have
* a hold on inode; however, for all other cases we need to make damn sure
* we don't race with umount. We can *NOT* just grab a reference to a
* watch - inotify_unmount_inodes() will happily sail past it and we'll end
* with reference to inode potentially outliving its superblock. Ideally
* we just want to grab an active reference to superblock if we can; that
* will make sure we won't go into inotify_umount_inodes() until we are
* done. Cleanup is just deactivate_super(). However, that leaves a messy
* case - what if we *are* racing with umount() and active references to
* superblock can't be acquired anymore? We can bump ->s_count, grab
* ->s_umount, which will wait until the superblock is shut down and the
* watch in question is pining for fjords.
*
* And yes, this is far beyond mere "not very pretty"; so's the entire
* concept of inotify to start with.
*/
/**
* pin_to_kill - pin the watch down for removal
* @ih: inotify handle
* @watch: watch to kill
*
* Called with ih->mutex held, drops it. Possible return values:
* 0 - nothing to do, it has died
* 1 - remove it, drop the reference and deactivate_super()
*/
static int pin_to_kill(struct inotify_handle *ih, struct inotify_watch *watch)
{
struct super_block *sb = watch->inode->i_sb;
if (atomic_inc_not_zero(&sb->s_active)) {
get_inotify_watch(watch);
mutex_unlock(&ih->mutex);
return 1; /* the best outcome */
}
spin_lock(&sb_lock);
sb->s_count++;
spin_unlock(&sb_lock);
mutex_unlock(&ih->mutex); /* can't grab ->s_umount under it */
down_read(&sb->s_umount);
/* fs is already shut down; the watch is dead */
drop_super(sb);
return 0;
}
static void unpin_and_kill(struct inotify_watch *watch)
{
struct super_block *sb = watch->inode->i_sb;
put_inotify_watch(watch);
deactivate_super(sb);
}
/**
* inotify_destroy - clean up and destroy an inotify instance
* @ih: inotify handle
*/
void inotify_destroy(struct inotify_handle *ih)
{
/*
* Destroy all of the watches for this handle. Unfortunately, not very
* pretty. We cannot do a simple iteration over the list, because we
* do not know the inode until we iterate to the watch. But we need to
* hold inode->inotify_mutex before ih->mutex. The following works.
*
* AV: it had to become even uglier to start working ;-/
*/
while (1) {
struct inotify_watch *watch;
struct list_head *watches;
struct super_block *sb;
struct inode *inode;
mutex_lock(&ih->mutex);
watches = &ih->watches;
if (list_empty(watches)) {
mutex_unlock(&ih->mutex);
break;
}
watch = list_first_entry(watches, struct inotify_watch, h_list);
sb = watch->inode->i_sb;
if (!pin_to_kill(ih, watch))
continue;
inode = watch->inode;
mutex_lock(&inode->inotify_mutex);
mutex_lock(&ih->mutex);
/* make sure we didn't race with another list removal */
if (likely(idr_find(&ih->idr, watch->wd))) {
remove_watch_no_event(watch, ih);
put_inotify_watch(watch);
}
mutex_unlock(&ih->mutex);
mutex_unlock(&inode->inotify_mutex);
unpin_and_kill(watch);
}
/* free this handle: the put matching the get in inotify_init() */
put_inotify_handle(ih);
}
EXPORT_SYMBOL_GPL(inotify_destroy);
/**
* inotify_find_watch - find an existing watch for an (ih,inode) pair
* @ih: inotify handle
* @inode: inode to watch
* @watchp: pointer to existing inotify_watch
*
* Caller must pin given inode (via nameidata).
*/
s32 inotify_find_watch(struct inotify_handle *ih, struct inode *inode,
struct inotify_watch **watchp)
{
struct inotify_watch *old;
int ret = -ENOENT;
mutex_lock(&inode->inotify_mutex);
mutex_lock(&ih->mutex);
old = inode_find_handle(inode, ih);
if (unlikely(old)) {
get_inotify_watch(old); /* caller must put watch */
*watchp = old;
ret = old->wd;
}
mutex_unlock(&ih->mutex);
mutex_unlock(&inode->inotify_mutex);
return ret;
}
EXPORT_SYMBOL_GPL(inotify_find_watch);
/**
* inotify_find_update_watch - find and update the mask of an existing watch
* @ih: inotify handle
* @inode: inode's watch to update
* @mask: mask of events to watch
*
* Caller must pin given inode (via nameidata).
*/
s32 inotify_find_update_watch(struct inotify_handle *ih, struct inode *inode,
u32 mask)
{
struct inotify_watch *old;
int mask_add = 0;
int ret;
if (mask & IN_MASK_ADD)
mask_add = 1;
/* don't allow invalid bits: we don't want flags set */
mask &= IN_ALL_EVENTS | IN_ONESHOT;
if (unlikely(!mask))
return -EINVAL;
mutex_lock(&inode->inotify_mutex);
mutex_lock(&ih->mutex);
/*
* Handle the case of re-adding a watch on an (inode,ih) pair that we
* are already watching. We just update the mask and return its wd.
*/
old = inode_find_handle(inode, ih);
if (unlikely(!old)) {
ret = -ENOENT;
goto out;
}
if (mask_add)
old->mask |= mask;
else
old->mask = mask;
ret = old->wd;
out:
mutex_unlock(&ih->mutex);
mutex_unlock(&inode->inotify_mutex);
return ret;
}
EXPORT_SYMBOL_GPL(inotify_find_update_watch);
/**
* inotify_add_watch - add a watch to an inotify instance
* @ih: inotify handle
* @watch: caller allocated watch structure
* @inode: inode to watch
* @mask: mask of events to watch
*
* Caller must pin given inode (via nameidata).
* Caller must ensure it only calls inotify_add_watch() once per watch.
* Calls inotify_handle_get_wd() so may sleep.
*/
s32 inotify_add_watch(struct inotify_handle *ih, struct inotify_watch *watch,
struct inode *inode, u32 mask)
{
int ret = 0;
int newly_watched;
/* don't allow invalid bits: we don't want flags set */
mask &= IN_ALL_EVENTS | IN_ONESHOT;
if (unlikely(!mask))
return -EINVAL;
watch->mask = mask;
mutex_lock(&inode->inotify_mutex);
mutex_lock(&ih->mutex);
/* Initialize a new watch */
ret = inotify_handle_get_wd(ih, watch);
if (unlikely(ret))
goto out;
ret = watch->wd;
/* save a reference to handle and bump the count to make it official */
get_inotify_handle(ih);
watch->ih = ih;
/*
* Save a reference to the inode and bump the ref count to make it
* official. We hold a reference to nameidata, which makes this safe.
*/
watch->inode = igrab(inode);
/* Add the watch to the handle's and the inode's list */
newly_watched = !inotify_inode_watched(inode);
list_add(&watch->h_list, &ih->watches);
list_add(&watch->i_list, &inode->inotify_watches);
/*
* Set child flags _after_ adding the watch, so there is no race
* windows where newly instantiated children could miss their parent's
* watched flag.
*/
if (newly_watched)
set_dentry_child_flags(inode, 1);
out:
mutex_unlock(&ih->mutex);
mutex_unlock(&inode->inotify_mutex);
return ret;
}
EXPORT_SYMBOL_GPL(inotify_add_watch);
/**
* inotify_clone_watch - put the watch next to existing one
* @old: already installed watch
* @new: new watch
*
* Caller must hold the inotify_mutex of inode we are dealing with;
* it is expected to remove the old watch before unlocking the inode.
*/
s32 inotify_clone_watch(struct inotify_watch *old, struct inotify_watch *new)
{
struct inotify_handle *ih = old->ih;
int ret = 0;
new->mask = old->mask;
new->ih = ih;
mutex_lock(&ih->mutex);
/* Initialize a new watch */
ret = inotify_handle_get_wd(ih, new);
if (unlikely(ret))
goto out;
ret = new->wd;
get_inotify_handle(ih);
new->inode = igrab(old->inode);
list_add(&new->h_list, &ih->watches);
list_add(&new->i_list, &old->inode->inotify_watches);
out:
mutex_unlock(&ih->mutex);
return ret;
}
void inotify_evict_watch(struct inotify_watch *watch)
{
get_inotify_watch(watch);
mutex_lock(&watch->ih->mutex);
inotify_remove_watch_locked(watch->ih, watch);
mutex_unlock(&watch->ih->mutex);
}
/**
* inotify_rm_wd - remove a watch from an inotify instance
* @ih: inotify handle
* @wd: watch descriptor to remove
*
* Can sleep.
*/
int inotify_rm_wd(struct inotify_handle *ih, u32 wd)
{
struct inotify_watch *watch;
struct super_block *sb;
struct inode *inode;
mutex_lock(&ih->mutex);
watch = idr_find(&ih->idr, wd);
if (unlikely(!watch)) {
mutex_unlock(&ih->mutex);
return -EINVAL;
}
sb = watch->inode->i_sb;
if (!pin_to_kill(ih, watch))
return 0;
inode = watch->inode;
mutex_lock(&inode->inotify_mutex);
mutex_lock(&ih->mutex);
/* make sure that we did not race */
if (likely(idr_find(&ih->idr, wd) == watch))
inotify_remove_watch_locked(ih, watch);
mutex_unlock(&ih->mutex);
mutex_unlock(&inode->inotify_mutex);
unpin_and_kill(watch);
return 0;
}
EXPORT_SYMBOL_GPL(inotify_rm_wd);
/**
* inotify_rm_watch - remove a watch from an inotify instance
* @ih: inotify handle
* @watch: watch to remove
*
* Can sleep.
*/
int inotify_rm_watch(struct inotify_handle *ih,
struct inotify_watch *watch)
{
return inotify_rm_wd(ih, watch->wd);
}
EXPORT_SYMBOL_GPL(inotify_rm_watch);
/*
* inotify_setup - core initialization function
*/
static int __init inotify_setup(void)
{
BUILD_BUG_ON(IN_ACCESS != FS_ACCESS);
BUILD_BUG_ON(IN_MODIFY != FS_MODIFY);
BUILD_BUG_ON(IN_ATTRIB != FS_ATTRIB);
BUILD_BUG_ON(IN_CLOSE_WRITE != FS_CLOSE_WRITE);
BUILD_BUG_ON(IN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE);
BUILD_BUG_ON(IN_OPEN != FS_OPEN);
BUILD_BUG_ON(IN_MOVED_FROM != FS_MOVED_FROM);
BUILD_BUG_ON(IN_MOVED_TO != FS_MOVED_TO);
BUILD_BUG_ON(IN_CREATE != FS_CREATE);
BUILD_BUG_ON(IN_DELETE != FS_DELETE);
BUILD_BUG_ON(IN_DELETE_SELF != FS_DELETE_SELF);
BUILD_BUG_ON(IN_MOVE_SELF != FS_MOVE_SELF);
BUILD_BUG_ON(IN_Q_OVERFLOW != FS_Q_OVERFLOW);
BUILD_BUG_ON(IN_UNMOUNT != FS_UNMOUNT);
BUILD_BUG_ON(IN_ISDIR != FS_IN_ISDIR);
BUILD_BUG_ON(IN_IGNORED != FS_IN_IGNORED);
BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT);
atomic_set(&inotify_cookie, 0);
return 0;
}
module_init(inotify_setup);

View file

@ -9,13 +9,12 @@ struct inotify_event_private_data {
int wd;
};
struct inotify_inode_mark_entry {
/* fsnotify_mark_entry MUST be the first thing */
struct fsnotify_mark_entry fsn_entry;
struct inotify_inode_mark {
struct fsnotify_mark fsn_mark;
int wd;
};
extern void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry,
extern void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
struct fsnotify_group *group);
extern void inotify_free_event_priv(struct fsnotify_event_private_data *event_priv);

View file

@ -22,6 +22,7 @@
* General Public License for more details.
*/
#include <linux/dcache.h> /* d_unlinked */
#include <linux/fs.h> /* struct inode */
#include <linux/fsnotify_backend.h>
#include <linux/inotify.h>
@ -32,26 +33,84 @@
#include "inotify.h"
static int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_event *event)
/*
* Check if 2 events contain the same information. We do not compare private data
* but at this moment that isn't a problem for any know fsnotify listeners.
*/
static bool event_compare(struct fsnotify_event *old, struct fsnotify_event *new)
{
struct fsnotify_mark_entry *entry;
struct inotify_inode_mark_entry *ientry;
if ((old->mask == new->mask) &&
(old->to_tell == new->to_tell) &&
(old->data_type == new->data_type) &&
(old->name_len == new->name_len)) {
switch (old->data_type) {
case (FSNOTIFY_EVENT_INODE):
/* remember, after old was put on the wait_q we aren't
* allowed to look at the inode any more, only thing
* left to check was if the file_name is the same */
if (!old->name_len ||
!strcmp(old->file_name, new->file_name))
return true;
break;
case (FSNOTIFY_EVENT_FILE):
if ((old->file->f_path.mnt == new->file->f_path.mnt) &&
(old->file->f_path.dentry == new->file->f_path.dentry))
return true;
break;
case (FSNOTIFY_EVENT_NONE):
if (old->mask & FS_Q_OVERFLOW)
return true;
else if (old->mask & FS_IN_IGNORED)
return false;
return true;
};
}
return false;
}
static struct fsnotify_event *inotify_merge(struct list_head *list,
struct fsnotify_event *event)
{
struct fsnotify_event_holder *last_holder;
struct fsnotify_event *last_event;
/* and the list better be locked by something too */
spin_lock(&event->lock);
last_holder = list_entry(list->prev, struct fsnotify_event_holder, event_list);
last_event = last_holder->event;
if (event_compare(last_event, event))
fsnotify_get_event(last_event);
else
last_event = NULL;
spin_unlock(&event->lock);
return last_event;
}
static int inotify_handle_event(struct fsnotify_group *group,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *vfsmount_mark,
struct fsnotify_event *event)
{
struct inotify_inode_mark *i_mark;
struct inode *to_tell;
struct inotify_event_private_data *event_priv;
struct fsnotify_event_private_data *fsn_event_priv;
int wd, ret;
struct fsnotify_event *added_event;
int wd, ret = 0;
BUG_ON(vfsmount_mark);
pr_debug("%s: group=%p event=%p to_tell=%p mask=%x\n", __func__, group,
event, event->to_tell, event->mask);
to_tell = event->to_tell;
spin_lock(&to_tell->i_lock);
entry = fsnotify_find_mark_entry(group, to_tell);
spin_unlock(&to_tell->i_lock);
/* race with watch removal? We already passes should_send */
if (unlikely(!entry))
return 0;
ientry = container_of(entry, struct inotify_inode_mark_entry,
fsn_entry);
wd = ientry->wd;
i_mark = container_of(inode_mark, struct inotify_inode_mark,
fsn_mark);
wd = i_mark->wd;
event_priv = kmem_cache_alloc(event_priv_cachep, GFP_KERNEL);
if (unlikely(!event_priv))
@ -62,48 +121,40 @@ static int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_ev
fsn_event_priv->group = group;
event_priv->wd = wd;
ret = fsnotify_add_notify_event(group, event, fsn_event_priv);
if (ret) {
added_event = fsnotify_add_notify_event(group, event, fsn_event_priv, inotify_merge);
if (added_event) {
inotify_free_event_priv(fsn_event_priv);
/* EEXIST says we tail matched, EOVERFLOW isn't something
* to report up the stack. */
if ((ret == -EEXIST) ||
(ret == -EOVERFLOW))
ret = 0;
if (!IS_ERR(added_event))
fsnotify_put_event(added_event);
else
ret = PTR_ERR(added_event);
}
/*
* If we hold the entry until after the event is on the queue
* IN_IGNORED won't be able to pass this event in the queue
*/
fsnotify_put_mark(entry);
if (inode_mark->mask & IN_ONESHOT)
fsnotify_destroy_mark(inode_mark);
return ret;
}
static void inotify_freeing_mark(struct fsnotify_mark_entry *entry, struct fsnotify_group *group)
static void inotify_freeing_mark(struct fsnotify_mark *fsn_mark, struct fsnotify_group *group)
{
inotify_ignored_and_remove_idr(entry, group);
inotify_ignored_and_remove_idr(fsn_mark, group);
}
static bool inotify_should_send_event(struct fsnotify_group *group, struct inode *inode, __u32 mask)
static bool inotify_should_send_event(struct fsnotify_group *group, struct inode *inode,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *vfsmount_mark,
__u32 mask, void *data, int data_type)
{
struct fsnotify_mark_entry *entry;
bool send;
if ((inode_mark->mask & FS_EXCL_UNLINK) &&
(data_type == FSNOTIFY_EVENT_FILE)) {
struct file *file = data;
spin_lock(&inode->i_lock);
entry = fsnotify_find_mark_entry(group, inode);
spin_unlock(&inode->i_lock);
if (!entry)
return false;
if (d_unlinked(file->f_path.dentry))
return false;
}
mask = (mask & ~FS_EVENT_ON_CHILD);
send = (entry->mask & mask);
/* find took a reference */
fsnotify_put_mark(entry);
return send;
return true;
}
/*
@ -115,18 +166,18 @@ static bool inotify_should_send_event(struct fsnotify_group *group, struct inode
*/
static int idr_callback(int id, void *p, void *data)
{
struct fsnotify_mark_entry *entry;
struct inotify_inode_mark_entry *ientry;
struct fsnotify_mark *fsn_mark;
struct inotify_inode_mark *i_mark;
static bool warned = false;
if (warned)
return 0;
warned = true;
entry = p;
ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
fsn_mark = p;
i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
WARN(1, "inotify closing but id=%d for entry=%p in group=%p still in "
WARN(1, "inotify closing but id=%d for fsn_mark=%p in group=%p still in "
"idr. Probably leaking memory\n", id, p, data);
/*
@ -135,9 +186,9 @@ static int idr_callback(int id, void *p, void *data)
* out why we got here and the panic is no worse than the original
* BUG() that was here.
*/
if (entry)
printk(KERN_WARNING "entry->group=%p inode=%p wd=%d\n",
entry->group, entry->inode, ientry->wd);
if (fsn_mark)
printk(KERN_WARNING "fsn_mark->group=%p inode=%p wd=%d\n",
fsn_mark->group, fsn_mark->i.inode, i_mark->wd);
return 0;
}

View file

@ -46,17 +46,11 @@
/* these are configurable via /proc/sys/fs/inotify/ */
static int inotify_max_user_instances __read_mostly;
static int inotify_max_queued_events __read_mostly;
int inotify_max_user_watches __read_mostly;
static int inotify_max_user_watches __read_mostly;
static struct kmem_cache *inotify_inode_mark_cachep __read_mostly;
struct kmem_cache *event_priv_cachep __read_mostly;
/*
* When inotify registers a new group it increments this and uses that
* value as an offset to set the fsnotify group "name" and priority.
*/
static atomic_t inotify_grp_num;
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
@ -96,11 +90,14 @@ static inline __u32 inotify_arg_to_mask(u32 arg)
{
__u32 mask;
/* everything should accept their own ignored and cares about children */
mask = (FS_IN_IGNORED | FS_EVENT_ON_CHILD);
/*
* everything should accept their own ignored, cares about children,
* and should receive events when the inode is unmounted
*/
mask = (FS_IN_IGNORED | FS_EVENT_ON_CHILD | FS_UNMOUNT);
/* mask off the flags used to open the fd */
mask |= (arg & (IN_ALL_EVENTS | IN_ONESHOT));
mask |= (arg & (IN_ALL_EVENTS | IN_ONESHOT | IN_EXCL_UNLINK));
return mask;
}
@ -144,6 +141,8 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
event = fsnotify_peek_notify_event(group);
pr_debug("%s: group=%p event=%p\n", __func__, group, event);
if (event->name_len)
event_size += roundup(event->name_len + 1, event_size);
@ -173,6 +172,8 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
size_t event_size = sizeof(struct inotify_event);
size_t name_len = 0;
pr_debug("%s: group=%p event=%p\n", __func__, group, event);
/* we get the inotify watch descriptor from the event private data */
spin_lock(&event->lock);
fsn_priv = fsnotify_remove_priv_from_event(group, event);
@ -245,6 +246,8 @@ static ssize_t inotify_read(struct file *file, char __user *buf,
kevent = get_one_event(group, count);
mutex_unlock(&group->notification_mutex);
pr_debug("%s: group=%p kevent=%p\n", __func__, group, kevent);
if (kevent) {
ret = PTR_ERR(kevent);
if (IS_ERR(kevent))
@ -289,6 +292,8 @@ static int inotify_release(struct inode *ignored, struct file *file)
struct fsnotify_group *group = file->private_data;
struct user_struct *user = group->inotify_data.user;
pr_debug("%s: group=%p\n", __func__, group);
fsnotify_clear_marks_by_group(group);
/* free this group, matching get was inotify_init->fsnotify_obtain_group */
@ -312,6 +317,8 @@ static long inotify_ioctl(struct file *file, unsigned int cmd,
group = file->private_data;
p = (void __user *) arg;
pr_debug("%s: group=%p cmd=%u\n", __func__, group, cmd);
switch (cmd) {
case FIONREAD:
mutex_lock(&group->notification_mutex);
@ -357,59 +364,159 @@ static int inotify_find_inode(const char __user *dirname, struct path *path, uns
return error;
}
static int inotify_add_to_idr(struct idr *idr, spinlock_t *idr_lock,
int *last_wd,
struct inotify_inode_mark *i_mark)
{
int ret;
do {
if (unlikely(!idr_pre_get(idr, GFP_KERNEL)))
return -ENOMEM;
spin_lock(idr_lock);
ret = idr_get_new_above(idr, i_mark, *last_wd + 1,
&i_mark->wd);
/* we added the mark to the idr, take a reference */
if (!ret) {
*last_wd = i_mark->wd;
fsnotify_get_mark(&i_mark->fsn_mark);
}
spin_unlock(idr_lock);
} while (ret == -EAGAIN);
return ret;
}
static struct inotify_inode_mark *inotify_idr_find_locked(struct fsnotify_group *group,
int wd)
{
struct idr *idr = &group->inotify_data.idr;
spinlock_t *idr_lock = &group->inotify_data.idr_lock;
struct inotify_inode_mark *i_mark;
assert_spin_locked(idr_lock);
i_mark = idr_find(idr, wd);
if (i_mark) {
struct fsnotify_mark *fsn_mark = &i_mark->fsn_mark;
fsnotify_get_mark(fsn_mark);
/* One ref for being in the idr, one ref we just took */
BUG_ON(atomic_read(&fsn_mark->refcnt) < 2);
}
return i_mark;
}
static struct inotify_inode_mark *inotify_idr_find(struct fsnotify_group *group,
int wd)
{
struct inotify_inode_mark *i_mark;
spinlock_t *idr_lock = &group->inotify_data.idr_lock;
spin_lock(idr_lock);
i_mark = inotify_idr_find_locked(group, wd);
spin_unlock(idr_lock);
return i_mark;
}
static void do_inotify_remove_from_idr(struct fsnotify_group *group,
struct inotify_inode_mark *i_mark)
{
struct idr *idr = &group->inotify_data.idr;
spinlock_t *idr_lock = &group->inotify_data.idr_lock;
int wd = i_mark->wd;
assert_spin_locked(idr_lock);
idr_remove(idr, wd);
/* removed from the idr, drop that ref */
fsnotify_put_mark(&i_mark->fsn_mark);
}
/*
* Remove the mark from the idr (if present) and drop the reference
* on the mark because it was in the idr.
*/
static void inotify_remove_from_idr(struct fsnotify_group *group,
struct inotify_inode_mark_entry *ientry)
struct inotify_inode_mark *i_mark)
{
struct idr *idr;
struct fsnotify_mark_entry *entry;
struct inotify_inode_mark_entry *found_ientry;
spinlock_t *idr_lock = &group->inotify_data.idr_lock;
struct inotify_inode_mark *found_i_mark = NULL;
int wd;
spin_lock(&group->inotify_data.idr_lock);
idr = &group->inotify_data.idr;
wd = ientry->wd;
spin_lock(idr_lock);
wd = i_mark->wd;
if (wd == -1)
goto out;
entry = idr_find(&group->inotify_data.idr, wd);
if (unlikely(!entry))
goto out;
found_ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
if (unlikely(found_ientry != ientry)) {
/* We found an entry in the idr with the right wd, but it's
* not the entry we were told to remove. eparis seriously
* fucked up somewhere. */
WARN_ON(1);
ientry->wd = -1;
/*
* does this i_mark think it is in the idr? we shouldn't get called
* if it wasn't....
*/
if (wd == -1) {
WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p"
" i_mark->inode=%p\n", __func__, i_mark, i_mark->wd,
i_mark->fsn_mark.group, i_mark->fsn_mark.i.inode);
goto out;
}
/* One ref for being in the idr, one ref held by the caller */
BUG_ON(atomic_read(&entry->refcnt) < 2);
/* Lets look in the idr to see if we find it */
found_i_mark = inotify_idr_find_locked(group, wd);
if (unlikely(!found_i_mark)) {
WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p"
" i_mark->inode=%p\n", __func__, i_mark, i_mark->wd,
i_mark->fsn_mark.group, i_mark->fsn_mark.i.inode);
goto out;
}
idr_remove(idr, wd);
ientry->wd = -1;
/*
* We found an mark in the idr at the right wd, but it's
* not the mark we were told to remove. eparis seriously
* fucked up somewhere.
*/
if (unlikely(found_i_mark != i_mark)) {
WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p "
"mark->inode=%p found_i_mark=%p found_i_mark->wd=%d "
"found_i_mark->group=%p found_i_mark->inode=%p\n",
__func__, i_mark, i_mark->wd, i_mark->fsn_mark.group,
i_mark->fsn_mark.i.inode, found_i_mark, found_i_mark->wd,
found_i_mark->fsn_mark.group,
found_i_mark->fsn_mark.i.inode);
goto out;
}
/* removed from the idr, drop that ref */
fsnotify_put_mark(entry);
/*
* One ref for being in the idr
* one ref held by the caller trying to kill us
* one ref grabbed by inotify_idr_find
*/
if (unlikely(atomic_read(&i_mark->fsn_mark.refcnt) < 3)) {
printk(KERN_ERR "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p"
" i_mark->inode=%p\n", __func__, i_mark, i_mark->wd,
i_mark->fsn_mark.group, i_mark->fsn_mark.i.inode);
/* we can't really recover with bad ref cnting.. */
BUG();
}
do_inotify_remove_from_idr(group, i_mark);
out:
spin_unlock(&group->inotify_data.idr_lock);
/* match the ref taken by inotify_idr_find_locked() */
if (found_i_mark)
fsnotify_put_mark(&found_i_mark->fsn_mark);
i_mark->wd = -1;
spin_unlock(idr_lock);
}
/*
* Send IN_IGNORED for this wd, remove this wd from the idr.
*/
void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry,
void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
struct fsnotify_group *group)
{
struct inotify_inode_mark_entry *ientry;
struct fsnotify_event *ignored_event;
struct inotify_inode_mark *i_mark;
struct fsnotify_event *ignored_event, *notify_event;
struct inotify_event_private_data *event_priv;
struct fsnotify_event_private_data *fsn_event_priv;
int ret;
@ -420,7 +527,7 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry,
if (!ignored_event)
return;
ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
event_priv = kmem_cache_alloc(event_priv_cachep, GFP_NOFS);
if (unlikely(!event_priv))
@ -429,37 +536,44 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry,
fsn_event_priv = &event_priv->fsnotify_event_priv_data;
fsn_event_priv->group = group;
event_priv->wd = ientry->wd;
event_priv->wd = i_mark->wd;
ret = fsnotify_add_notify_event(group, ignored_event, fsn_event_priv);
if (ret)
notify_event = fsnotify_add_notify_event(group, ignored_event, fsn_event_priv, NULL);
if (notify_event) {
if (IS_ERR(notify_event))
ret = PTR_ERR(notify_event);
else
fsnotify_put_event(notify_event);
inotify_free_event_priv(fsn_event_priv);
}
skip_send_ignore:
/* matches the reference taken when the event was created */
fsnotify_put_event(ignored_event);
/* remove this entry from the idr */
inotify_remove_from_idr(group, ientry);
/* remove this mark from the idr */
inotify_remove_from_idr(group, i_mark);
atomic_dec(&group->inotify_data.user->inotify_watches);
}
/* ding dong the mark is dead */
static void inotify_free_mark(struct fsnotify_mark_entry *entry)
static void inotify_free_mark(struct fsnotify_mark *fsn_mark)
{
struct inotify_inode_mark_entry *ientry = (struct inotify_inode_mark_entry *)entry;
struct inotify_inode_mark *i_mark;
kmem_cache_free(inotify_inode_mark_cachep, ientry);
i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
kmem_cache_free(inotify_inode_mark_cachep, i_mark);
}
static int inotify_update_existing_watch(struct fsnotify_group *group,
struct inode *inode,
u32 arg)
{
struct fsnotify_mark_entry *entry;
struct inotify_inode_mark_entry *ientry;
struct fsnotify_mark *fsn_mark;
struct inotify_inode_mark *i_mark;
__u32 old_mask, new_mask;
__u32 mask;
int add = (arg & IN_MASK_ADD);
@ -467,52 +581,43 @@ static int inotify_update_existing_watch(struct fsnotify_group *group,
/* don't allow invalid bits: we don't want flags set */
mask = inotify_arg_to_mask(arg);
if (unlikely(!mask))
if (unlikely(!(mask & IN_ALL_EVENTS)))
return -EINVAL;
spin_lock(&inode->i_lock);
entry = fsnotify_find_mark_entry(group, inode);
spin_unlock(&inode->i_lock);
if (!entry)
fsn_mark = fsnotify_find_inode_mark(group, inode);
if (!fsn_mark)
return -ENOENT;
ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
spin_lock(&entry->lock);
spin_lock(&fsn_mark->lock);
old_mask = entry->mask;
if (add) {
entry->mask |= mask;
new_mask = entry->mask;
} else {
entry->mask = mask;
new_mask = entry->mask;
}
old_mask = fsn_mark->mask;
if (add)
fsnotify_set_mark_mask_locked(fsn_mark, (fsn_mark->mask | mask));
else
fsnotify_set_mark_mask_locked(fsn_mark, mask);
new_mask = fsn_mark->mask;
spin_unlock(&entry->lock);
spin_unlock(&fsn_mark->lock);
if (old_mask != new_mask) {
/* more bits in old than in new? */
int dropped = (old_mask & ~new_mask);
/* more bits in this entry than the inode's mask? */
/* more bits in this fsn_mark than the inode's mask? */
int do_inode = (new_mask & ~inode->i_fsnotify_mask);
/* more bits in this entry than the group? */
int do_group = (new_mask & ~group->mask);
/* update the inode with this new entry */
/* update the inode with this new fsn_mark */
if (dropped || do_inode)
fsnotify_recalc_inode_mask(inode);
/* update the group mask with the new mask */
if (dropped || do_group)
fsnotify_recalc_group_mask(group);
}
/* return the wd */
ret = ientry->wd;
ret = i_mark->wd;
/* match the get from fsnotify_find_mark_entry() */
fsnotify_put_mark(entry);
/* match the get from fsnotify_find_mark() */
fsnotify_put_mark(fsn_mark);
return ret;
}
@ -521,73 +626,51 @@ static int inotify_new_watch(struct fsnotify_group *group,
struct inode *inode,
u32 arg)
{
struct inotify_inode_mark_entry *tmp_ientry;
struct inotify_inode_mark *tmp_i_mark;
__u32 mask;
int ret;
struct idr *idr = &group->inotify_data.idr;
spinlock_t *idr_lock = &group->inotify_data.idr_lock;
/* don't allow invalid bits: we don't want flags set */
mask = inotify_arg_to_mask(arg);
if (unlikely(!mask))
if (unlikely(!(mask & IN_ALL_EVENTS)))
return -EINVAL;
tmp_ientry = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL);
if (unlikely(!tmp_ientry))
tmp_i_mark = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL);
if (unlikely(!tmp_i_mark))
return -ENOMEM;
fsnotify_init_mark(&tmp_ientry->fsn_entry, inotify_free_mark);
tmp_ientry->fsn_entry.mask = mask;
tmp_ientry->wd = -1;
fsnotify_init_mark(&tmp_i_mark->fsn_mark, inotify_free_mark);
tmp_i_mark->fsn_mark.mask = mask;
tmp_i_mark->wd = -1;
ret = -ENOSPC;
if (atomic_read(&group->inotify_data.user->inotify_watches) >= inotify_max_user_watches)
goto out_err;
retry:
ret = -ENOMEM;
if (unlikely(!idr_pre_get(&group->inotify_data.idr, GFP_KERNEL)))
ret = inotify_add_to_idr(idr, idr_lock, &group->inotify_data.last_wd,
tmp_i_mark);
if (ret)
goto out_err;
/* we are putting the mark on the idr, take a reference */
fsnotify_get_mark(&tmp_ientry->fsn_entry);
spin_lock(&group->inotify_data.idr_lock);
ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry,
group->inotify_data.last_wd+1,
&tmp_ientry->wd);
spin_unlock(&group->inotify_data.idr_lock);
if (ret) {
/* we didn't get on the idr, drop the idr reference */
fsnotify_put_mark(&tmp_ientry->fsn_entry);
/* idr was out of memory allocate and try again */
if (ret == -EAGAIN)
goto retry;
goto out_err;
}
/* we are on the idr, now get on the inode */
ret = fsnotify_add_mark(&tmp_ientry->fsn_entry, group, inode);
ret = fsnotify_add_mark(&tmp_i_mark->fsn_mark, group, inode, NULL, 0);
if (ret) {
/* we failed to get on the inode, get off the idr */
inotify_remove_from_idr(group, tmp_ientry);
inotify_remove_from_idr(group, tmp_i_mark);
goto out_err;
}
/* update the idr hint, who cares about races, it's just a hint */
group->inotify_data.last_wd = tmp_ientry->wd;
/* increment the number of watches the user has */
atomic_inc(&group->inotify_data.user->inotify_watches);
/* return the watch descriptor for this new entry */
ret = tmp_ientry->wd;
/* if this mark added a new event update the group mask */
if (mask & ~group->mask)
fsnotify_recalc_group_mask(group);
/* return the watch descriptor for this new mark */
ret = tmp_i_mark->wd;
out_err:
/* match the ref from fsnotify_init_markentry() */
fsnotify_put_mark(&tmp_ientry->fsn_entry);
/* match the ref from fsnotify_init_mark() */
fsnotify_put_mark(&tmp_i_mark->fsn_mark);
return ret;
}
@ -616,11 +699,8 @@ static int inotify_update_watch(struct fsnotify_group *group, struct inode *inod
static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsigned int max_events)
{
struct fsnotify_group *group;
unsigned int grp_num;
/* fsnotify_obtain_group took a reference to group, we put this when we kill the file in the end */
grp_num = (INOTIFY_GROUP_NUM - atomic_inc_return(&inotify_grp_num));
group = fsnotify_obtain_group(grp_num, 0, &inotify_fsnotify_ops);
group = fsnotify_alloc_group(&inotify_fsnotify_ops);
if (IS_ERR(group))
return group;
@ -726,7 +806,7 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname,
SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd)
{
struct fsnotify_group *group;
struct fsnotify_mark_entry *entry;
struct inotify_inode_mark *i_mark;
struct file *filp;
int ret = 0, fput_needed;
@ -735,25 +815,23 @@ SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd)
return -EBADF;
/* verify that this is indeed an inotify instance */
if (unlikely(filp->f_op != &inotify_fops)) {
ret = -EINVAL;
ret = -EINVAL;
if (unlikely(filp->f_op != &inotify_fops))
goto out;
}
group = filp->private_data;
spin_lock(&group->inotify_data.idr_lock);
entry = idr_find(&group->inotify_data.idr, wd);
if (unlikely(!entry)) {
spin_unlock(&group->inotify_data.idr_lock);
ret = -EINVAL;
ret = -EINVAL;
i_mark = inotify_idr_find(group, wd);
if (unlikely(!i_mark))
goto out;
}
fsnotify_get_mark(entry);
spin_unlock(&group->inotify_data.idr_lock);
fsnotify_destroy_mark_by_entry(entry);
fsnotify_put_mark(entry);
ret = 0;
fsnotify_destroy_mark(&i_mark->fsn_mark);
/* match ref taken by inotify_idr_find */
fsnotify_put_mark(&i_mark->fsn_mark);
out:
fput_light(filp, fput_needed);
@ -767,7 +845,28 @@ SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd)
*/
static int __init inotify_user_setup(void)
{
inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark_entry, SLAB_PANIC);
BUILD_BUG_ON(IN_ACCESS != FS_ACCESS);
BUILD_BUG_ON(IN_MODIFY != FS_MODIFY);
BUILD_BUG_ON(IN_ATTRIB != FS_ATTRIB);
BUILD_BUG_ON(IN_CLOSE_WRITE != FS_CLOSE_WRITE);
BUILD_BUG_ON(IN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE);
BUILD_BUG_ON(IN_OPEN != FS_OPEN);
BUILD_BUG_ON(IN_MOVED_FROM != FS_MOVED_FROM);
BUILD_BUG_ON(IN_MOVED_TO != FS_MOVED_TO);
BUILD_BUG_ON(IN_CREATE != FS_CREATE);
BUILD_BUG_ON(IN_DELETE != FS_DELETE);
BUILD_BUG_ON(IN_DELETE_SELF != FS_DELETE_SELF);
BUILD_BUG_ON(IN_MOVE_SELF != FS_MOVE_SELF);
BUILD_BUG_ON(IN_UNMOUNT != FS_UNMOUNT);
BUILD_BUG_ON(IN_Q_OVERFLOW != FS_Q_OVERFLOW);
BUILD_BUG_ON(IN_IGNORED != FS_IN_IGNORED);
BUILD_BUG_ON(IN_EXCL_UNLINK != FS_EXCL_UNLINK);
BUILD_BUG_ON(IN_ISDIR != FS_IN_ISDIR);
BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT);
BUG_ON(hweight32(ALL_INOTIFY_BITS) != 21);
inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, SLAB_PANIC);
event_priv_cachep = KMEM_CACHE(inotify_event_private_data, SLAB_PANIC);
inotify_max_queued_events = 16384;

371
fs/notify/mark.c Normal file
View file

@ -0,0 +1,371 @@
/*
* Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* fsnotify inode mark locking/lifetime/and refcnting
*
* REFCNT:
* The mark->refcnt tells how many "things" in the kernel currently are
* referencing this object. The object typically will live inside the kernel
* with a refcnt of 2, one for each list it is on (i_list, g_list). Any task
* which can find this object holding the appropriete locks, can take a reference
* and the object itself is guarenteed to survive until the reference is dropped.
*
* LOCKING:
* There are 3 spinlocks involved with fsnotify inode marks and they MUST
* be taken in order as follows:
*
* mark->lock
* group->mark_lock
* inode->i_lock
*
* mark->lock protects 2 things, mark->group and mark->inode. You must hold
* that lock to dereference either of these things (they could be NULL even with
* the lock)
*
* group->mark_lock protects the marks_list anchored inside a given group
* and each mark is hooked via the g_list. It also sorta protects the
* free_g_list, which when used is anchored by a private list on the stack of the
* task which held the group->mark_lock.
*
* inode->i_lock protects the i_fsnotify_marks list anchored inside a
* given inode and each mark is hooked via the i_list. (and sorta the
* free_i_list)
*
*
* LIFETIME:
* Inode marks survive between when they are added to an inode and when their
* refcnt==0.
*
* The inode mark can be cleared for a number of different reasons including:
* - The inode is unlinked for the last time. (fsnotify_inode_remove)
* - The inode is being evicted from cache. (fsnotify_inode_delete)
* - The fs the inode is on is unmounted. (fsnotify_inode_delete/fsnotify_unmount_inodes)
* - Something explicitly requests that it be removed. (fsnotify_destroy_mark)
* - The fsnotify_group associated with the mark is going away and all such marks
* need to be cleaned up. (fsnotify_clear_marks_by_group)
*
* Worst case we are given an inode and need to clean up all the marks on that
* inode. We take i_lock and walk the i_fsnotify_marks safely. For each
* mark on the list we take a reference (so the mark can't disappear under us).
* We remove that mark form the inode's list of marks and we add this mark to a
* private list anchored on the stack using i_free_list; At this point we no
* longer fear anything finding the mark using the inode's list of marks.
*
* We can safely and locklessly run the private list on the stack of everything
* we just unattached from the original inode. For each mark on the private list
* we grab the mark-> and can thus dereference mark->group and mark->inode. If
* we see the group and inode are not NULL we take those locks. Now holding all
* 3 locks we can completely remove the mark from other tasks finding it in the
* future. Remember, 10 things might already be referencing this mark, but they
* better be holding a ref. We drop our reference we took before we unhooked it
* from the inode. When the ref hits 0 we can free the mark.
*
* Very similarly for freeing by group, except we use free_g_list.
*
* This has the very interesting property of being able to run concurrently with
* any (or all) other directions.
*/
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/kthread.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/srcu.h>
#include <linux/writeback.h> /* for inode_lock */
#include <asm/atomic.h>
#include <linux/fsnotify_backend.h>
#include "fsnotify.h"
struct srcu_struct fsnotify_mark_srcu;
static DEFINE_SPINLOCK(destroy_lock);
static LIST_HEAD(destroy_list);
static DECLARE_WAIT_QUEUE_HEAD(destroy_waitq);
void fsnotify_get_mark(struct fsnotify_mark *mark)
{
atomic_inc(&mark->refcnt);
}
void fsnotify_put_mark(struct fsnotify_mark *mark)
{
if (atomic_dec_and_test(&mark->refcnt))
mark->free_mark(mark);
}
/*
* Any time a mark is getting freed we end up here.
* The caller had better be holding a reference to this mark so we don't actually
* do the final put under the mark->lock
*/
void fsnotify_destroy_mark(struct fsnotify_mark *mark)
{
struct fsnotify_group *group;
struct inode *inode = NULL;
spin_lock(&mark->lock);
group = mark->group;
/* something else already called this function on this mark */
if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) {
spin_unlock(&mark->lock);
return;
}
mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE;
/* 1 from caller and 1 for being on i_list/g_list */
BUG_ON(atomic_read(&mark->refcnt) < 2);
spin_lock(&group->mark_lock);
if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) {
inode = mark->i.inode;
fsnotify_destroy_inode_mark(mark);
} else if (mark->flags & FSNOTIFY_MARK_FLAG_VFSMOUNT)
fsnotify_destroy_vfsmount_mark(mark);
else
BUG();
list_del_init(&mark->g_list);
spin_unlock(&group->mark_lock);
spin_unlock(&mark->lock);
spin_lock(&destroy_lock);
list_add(&mark->destroy_list, &destroy_list);
spin_unlock(&destroy_lock);
wake_up(&destroy_waitq);
/*
* Some groups like to know that marks are being freed. This is a
* callback to the group function to let it know that this mark
* is being freed.
*/
if (group->ops->freeing_mark)
group->ops->freeing_mark(mark, group);
/*
* __fsnotify_update_child_dentry_flags(inode);
*
* I really want to call that, but we can't, we have no idea if the inode
* still exists the second we drop the mark->lock.
*
* The next time an event arrive to this inode from one of it's children
* __fsnotify_parent will see that the inode doesn't care about it's
* children and will update all of these flags then. So really this
* is just a lazy update (and could be a perf win...)
*/
if (inode && (mark->flags & FSNOTIFY_MARK_FLAG_OBJECT_PINNED))
iput(inode);
/*
* it's possible that this group tried to destroy itself, but this
* this mark was simultaneously being freed by inode. If that's the
* case, we finish freeing the group here.
*/
if (unlikely(atomic_dec_and_test(&group->num_marks)))
fsnotify_final_destroy_group(group);
}
void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask)
{
assert_spin_locked(&mark->lock);
mark->mask = mask;
if (mark->flags & FSNOTIFY_MARK_FLAG_INODE)
fsnotify_set_inode_mark_mask_locked(mark, mask);
}
void fsnotify_set_mark_ignored_mask_locked(struct fsnotify_mark *mark, __u32 mask)
{
assert_spin_locked(&mark->lock);
mark->ignored_mask = mask;
}
/*
* Attach an initialized mark to a given group and fs object.
* These marks may be used for the fsnotify backend to determine which
* event types should be delivered to which group.
*/
int fsnotify_add_mark(struct fsnotify_mark *mark,
struct fsnotify_group *group, struct inode *inode,
struct vfsmount *mnt, int allow_dups)
{
int ret = 0;
BUG_ON(inode && mnt);
BUG_ON(!inode && !mnt);
/*
* LOCKING ORDER!!!!
* mark->lock
* group->mark_lock
* inode->i_lock
*/
spin_lock(&mark->lock);
spin_lock(&group->mark_lock);
mark->flags |= FSNOTIFY_MARK_FLAG_ALIVE;
mark->group = group;
list_add(&mark->g_list, &group->marks_list);
atomic_inc(&group->num_marks);
fsnotify_get_mark(mark); /* for i_list and g_list */
if (inode) {
ret = fsnotify_add_inode_mark(mark, group, inode, allow_dups);
if (ret)
goto err;
} else if (mnt) {
ret = fsnotify_add_vfsmount_mark(mark, group, mnt, allow_dups);
if (ret)
goto err;
} else {
BUG();
}
spin_unlock(&group->mark_lock);
/* this will pin the object if appropriate */
fsnotify_set_mark_mask_locked(mark, mark->mask);
spin_unlock(&mark->lock);
if (inode)
__fsnotify_update_child_dentry_flags(inode);
return ret;
err:
mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE;
list_del_init(&mark->g_list);
mark->group = NULL;
atomic_dec(&group->num_marks);
spin_unlock(&group->mark_lock);
spin_unlock(&mark->lock);
spin_lock(&destroy_lock);
list_add(&mark->destroy_list, &destroy_list);
spin_unlock(&destroy_lock);
wake_up(&destroy_waitq);
return ret;
}
/*
* clear any marks in a group in which mark->flags & flags is true
*/
void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group,
unsigned int flags)
{
struct fsnotify_mark *lmark, *mark;
LIST_HEAD(free_list);
spin_lock(&group->mark_lock);
list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) {
if (mark->flags & flags) {
list_add(&mark->free_g_list, &free_list);
list_del_init(&mark->g_list);
fsnotify_get_mark(mark);
}
}
spin_unlock(&group->mark_lock);
list_for_each_entry_safe(mark, lmark, &free_list, free_g_list) {
fsnotify_destroy_mark(mark);
fsnotify_put_mark(mark);
}
}
/*
* Given a group, destroy all of the marks associated with that group.
*/
void fsnotify_clear_marks_by_group(struct fsnotify_group *group)
{
fsnotify_clear_marks_by_group_flags(group, (unsigned int)-1);
}
void fsnotify_duplicate_mark(struct fsnotify_mark *new, struct fsnotify_mark *old)
{
assert_spin_locked(&old->lock);
new->i.inode = old->i.inode;
new->m.mnt = old->m.mnt;
new->group = old->group;
new->mask = old->mask;
new->free_mark = old->free_mark;
}
/*
* Nothing fancy, just initialize lists and locks and counters.
*/
void fsnotify_init_mark(struct fsnotify_mark *mark,
void (*free_mark)(struct fsnotify_mark *mark))
{
memset(mark, 0, sizeof(*mark));
spin_lock_init(&mark->lock);
atomic_set(&mark->refcnt, 1);
mark->free_mark = free_mark;
}
static int fsnotify_mark_destroy(void *ignored)
{
struct fsnotify_mark *mark, *next;
LIST_HEAD(private_destroy_list);
for (;;) {
spin_lock(&destroy_lock);
/* exchange the list head */
list_replace_init(&destroy_list, &private_destroy_list);
spin_unlock(&destroy_lock);
synchronize_srcu(&fsnotify_mark_srcu);
list_for_each_entry_safe(mark, next, &private_destroy_list, destroy_list) {
list_del_init(&mark->destroy_list);
fsnotify_put_mark(mark);
}
wait_event_interruptible(destroy_waitq, !list_empty(&destroy_list));
}
return 0;
}
static int __init fsnotify_mark_init(void)
{
struct task_struct *thread;
thread = kthread_run(fsnotify_mark_destroy, NULL,
"fsnotify_mark");
if (IS_ERR(thread))
panic("unable to start fsnotify mark destruction thread.");
return 0;
}
device_initcall(fsnotify_mark_init);

View file

@ -31,6 +31,7 @@
* allocated and used.
*/
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/kernel.h>
@ -56,7 +57,7 @@ static struct kmem_cache *fsnotify_event_holder_cachep;
* it is needed. It's refcnt is set 1 at kernel init time and will never
* get set to 0 so it will never get 'freed'
*/
static struct fsnotify_event q_overflow_event;
static struct fsnotify_event *q_overflow_event;
static atomic_t fsnotify_sync_cookie = ATOMIC_INIT(0);
/**
@ -87,12 +88,15 @@ void fsnotify_put_event(struct fsnotify_event *event)
return;
if (atomic_dec_and_test(&event->refcnt)) {
if (event->data_type == FSNOTIFY_EVENT_PATH)
path_put(&event->path);
pr_debug("%s: event=%p\n", __func__, event);
if (event->data_type == FSNOTIFY_EVENT_FILE)
fput(event->file);
BUG_ON(!list_empty(&event->private_data_list));
kfree(event->file_name);
put_pid(event->tgid);
kmem_cache_free(fsnotify_event_cachep, event);
}
}
@ -104,7 +108,8 @@ struct fsnotify_event_holder *fsnotify_alloc_event_holder(void)
void fsnotify_destroy_event_holder(struct fsnotify_event_holder *holder)
{
kmem_cache_free(fsnotify_event_holder_cachep, holder);
if (holder)
kmem_cache_free(fsnotify_event_holder_cachep, holder);
}
/*
@ -128,54 +133,21 @@ struct fsnotify_event_private_data *fsnotify_remove_priv_from_event(struct fsnot
return priv;
}
/*
* Check if 2 events contain the same information. We do not compare private data
* but at this moment that isn't a problem for any know fsnotify listeners.
*/
static bool event_compare(struct fsnotify_event *old, struct fsnotify_event *new)
{
if ((old->mask == new->mask) &&
(old->to_tell == new->to_tell) &&
(old->data_type == new->data_type) &&
(old->name_len == new->name_len)) {
switch (old->data_type) {
case (FSNOTIFY_EVENT_INODE):
/* remember, after old was put on the wait_q we aren't
* allowed to look at the inode any more, only thing
* left to check was if the file_name is the same */
if (!old->name_len ||
!strcmp(old->file_name, new->file_name))
return true;
break;
case (FSNOTIFY_EVENT_PATH):
if ((old->path.mnt == new->path.mnt) &&
(old->path.dentry == new->path.dentry))
return true;
break;
case (FSNOTIFY_EVENT_NONE):
if (old->mask & FS_Q_OVERFLOW)
return true;
else if (old->mask & FS_IN_IGNORED)
return false;
return false;
};
}
return false;
}
/*
* Add an event to the group notification queue. The group can later pull this
* event off the queue to deal with. If the event is successfully added to the
* group's notification queue, a reference is taken on event.
*/
int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_event *event,
struct fsnotify_event_private_data *priv)
struct fsnotify_event *fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_event *event,
struct fsnotify_event_private_data *priv,
struct fsnotify_event *(*merge)(struct list_head *,
struct fsnotify_event *))
{
struct fsnotify_event *return_event = NULL;
struct fsnotify_event_holder *holder = NULL;
struct list_head *list = &group->notification_list;
struct fsnotify_event_holder *last_holder;
struct fsnotify_event *last_event;
int ret = 0;
pr_debug("%s: group=%p event=%p priv=%p\n", __func__, group, event, priv);
/*
* There is one fsnotify_event_holder embedded inside each fsnotify_event.
@ -189,18 +161,40 @@ int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_even
alloc_holder:
holder = fsnotify_alloc_event_holder();
if (!holder)
return -ENOMEM;
return ERR_PTR(-ENOMEM);
}
mutex_lock(&group->notification_mutex);
if (group->q_len >= group->max_events) {
event = &q_overflow_event;
ret = -EOVERFLOW;
event = q_overflow_event;
/*
* we need to return the overflow event
* which means we need a ref
*/
fsnotify_get_event(event);
return_event = event;
/* sorry, no private data on the overflow event */
priv = NULL;
}
if (!list_empty(list) && merge) {
struct fsnotify_event *tmp;
tmp = merge(list, event);
if (tmp) {
mutex_unlock(&group->notification_mutex);
if (return_event)
fsnotify_put_event(return_event);
if (holder != &event->holder)
fsnotify_destroy_event_holder(holder);
return tmp;
}
}
spin_lock(&event->lock);
if (list_empty(&event->holder.event_list)) {
@ -212,19 +206,13 @@ int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_even
* event holder was used, go back and get a new one */
spin_unlock(&event->lock);
mutex_unlock(&group->notification_mutex);
goto alloc_holder;
}
if (!list_empty(list)) {
last_holder = list_entry(list->prev, struct fsnotify_event_holder, event_list);
last_event = last_holder->event;
if (event_compare(last_event, event)) {
spin_unlock(&event->lock);
mutex_unlock(&group->notification_mutex);
if (holder != &event->holder)
fsnotify_destroy_event_holder(holder);
return -EEXIST;
if (return_event) {
fsnotify_put_event(return_event);
return_event = NULL;
}
goto alloc_holder;
}
group->q_len++;
@ -238,7 +226,7 @@ int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_even
mutex_unlock(&group->notification_mutex);
wake_up(&group->notification_waitq);
return ret;
return return_event;
}
/*
@ -253,6 +241,8 @@ struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group
BUG_ON(!mutex_is_locked(&group->notification_mutex));
pr_debug("%s: group=%p\n", __func__, group);
holder = list_first_entry(&group->notification_list, struct fsnotify_event_holder, event_list);
event = holder->event;
@ -314,25 +304,82 @@ void fsnotify_flush_notify(struct fsnotify_group *group)
static void initialize_event(struct fsnotify_event *event)
{
event->holder.event = NULL;
INIT_LIST_HEAD(&event->holder.event_list);
atomic_set(&event->refcnt, 1);
spin_lock_init(&event->lock);
event->path.dentry = NULL;
event->path.mnt = NULL;
event->inode = NULL;
event->data_type = FSNOTIFY_EVENT_NONE;
INIT_LIST_HEAD(&event->private_data_list);
}
event->to_tell = NULL;
/*
* Caller damn well better be holding whatever mutex is protecting the
* old_holder->event_list and the new_event must be a clean event which
* cannot be found anywhere else in the kernel.
*/
int fsnotify_replace_event(struct fsnotify_event_holder *old_holder,
struct fsnotify_event *new_event)
{
struct fsnotify_event *old_event = old_holder->event;
struct fsnotify_event_holder *new_holder = &new_event->holder;
event->file_name = NULL;
event->name_len = 0;
enum event_spinlock_class {
SPINLOCK_OLD,
SPINLOCK_NEW,
};
event->sync_cookie = 0;
pr_debug("%s: old_event=%p new_event=%p\n", __func__, old_event, new_event);
/*
* if the new_event's embedded holder is in use someone
* screwed up and didn't give us a clean new event.
*/
BUG_ON(!list_empty(&new_holder->event_list));
spin_lock_nested(&old_event->lock, SPINLOCK_OLD);
spin_lock_nested(&new_event->lock, SPINLOCK_NEW);
new_holder->event = new_event;
list_replace_init(&old_holder->event_list, &new_holder->event_list);
spin_unlock(&new_event->lock);
spin_unlock(&old_event->lock);
/* event == holder means we are referenced through the in event holder */
if (old_holder != &old_event->holder)
fsnotify_destroy_event_holder(old_holder);
fsnotify_get_event(new_event); /* on the list take reference */
fsnotify_put_event(old_event); /* off the list, drop reference */
return 0;
}
struct fsnotify_event *fsnotify_clone_event(struct fsnotify_event *old_event)
{
struct fsnotify_event *event;
event = kmem_cache_alloc(fsnotify_event_cachep, GFP_KERNEL);
if (!event)
return NULL;
pr_debug("%s: old_event=%p new_event=%p\n", __func__, old_event, event);
memcpy(event, old_event, sizeof(*event));
initialize_event(event);
if (event->name_len) {
event->file_name = kstrdup(old_event->file_name, GFP_KERNEL);
if (!event->file_name) {
kmem_cache_free(fsnotify_event_cachep, event);
return NULL;
}
}
event->tgid = get_pid(old_event->tgid);
if (event->data_type == FSNOTIFY_EVENT_FILE)
get_file(event->file);
return event;
}
/*
@ -348,15 +395,18 @@ static void initialize_event(struct fsnotify_event *event)
* @name the filename, if available
*/
struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, void *data,
int data_type, const char *name, u32 cookie,
gfp_t gfp)
int data_type, const unsigned char *name,
u32 cookie, gfp_t gfp)
{
struct fsnotify_event *event;
event = kmem_cache_alloc(fsnotify_event_cachep, gfp);
event = kmem_cache_zalloc(fsnotify_event_cachep, gfp);
if (!event)
return NULL;
pr_debug("%s: event=%p to_tell=%p mask=%x data=%p data_type=%d\n",
__func__, event, to_tell, mask, data, data_type);
initialize_event(event);
if (name) {
@ -368,35 +418,36 @@ struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
event->name_len = strlen(event->file_name);
}
event->tgid = get_pid(task_tgid(current));
event->sync_cookie = cookie;
event->to_tell = to_tell;
event->data_type = data_type;
switch (data_type) {
case FSNOTIFY_EVENT_FILE: {
struct file *file = data;
struct path *path = &file->f_path;
event->path.dentry = path->dentry;
event->path.mnt = path->mnt;
path_get(&event->path);
event->data_type = FSNOTIFY_EVENT_PATH;
break;
}
case FSNOTIFY_EVENT_PATH: {
struct path *path = data;
event->path.dentry = path->dentry;
event->path.mnt = path->mnt;
path_get(&event->path);
event->data_type = FSNOTIFY_EVENT_PATH;
event->file = data;
/*
* if this file is about to disappear hold an extra reference
* until we return to __fput so we don't have to worry about
* future get/put destroying the file under us or generating
* additional events. Notice that we change f_mode without
* holding f_lock. This is safe since this is the only possible
* reference to this object in the kernel (it was about to be
* freed, remember?)
*/
if (!atomic_long_read(&event->file->f_count)) {
event->file->f_mode |= FMODE_NONOTIFY;
get_file(event->file);
}
get_file(event->file);
break;
}
case FSNOTIFY_EVENT_INODE:
event->inode = data;
event->data_type = FSNOTIFY_EVENT_INODE;
break;
case FSNOTIFY_EVENT_NONE:
event->inode = NULL;
event->path.dentry = NULL;
event->path.mnt = NULL;
event->file = NULL;
break;
default:
BUG();
@ -412,8 +463,11 @@ __init int fsnotify_notification_init(void)
fsnotify_event_cachep = KMEM_CACHE(fsnotify_event, SLAB_PANIC);
fsnotify_event_holder_cachep = KMEM_CACHE(fsnotify_event_holder, SLAB_PANIC);
initialize_event(&q_overflow_event);
q_overflow_event.mask = FS_Q_OVERFLOW;
q_overflow_event = fsnotify_create_event(NULL, FS_Q_OVERFLOW, NULL,
FSNOTIFY_EVENT_NONE, NULL, 0,
GFP_KERNEL);
if (!q_overflow_event)
panic("unable to allocate fsnotify q_overflow_event\n");
return 0;
}

187
fs/notify/vfsmount_mark.c Normal file
View file

@ -0,0 +1,187 @@
/*
* Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/mount.h>
#include <linux/mutex.h>
#include <linux/spinlock.h>
#include <linux/writeback.h> /* for inode_lock */
#include <asm/atomic.h>
#include <linux/fsnotify_backend.h>
#include "fsnotify.h"
void fsnotify_clear_marks_by_mount(struct vfsmount *mnt)
{
struct fsnotify_mark *mark, *lmark;
struct hlist_node *pos, *n;
LIST_HEAD(free_list);
spin_lock(&mnt->mnt_root->d_lock);
hlist_for_each_entry_safe(mark, pos, n, &mnt->mnt_fsnotify_marks, m.m_list) {
list_add(&mark->m.free_m_list, &free_list);
hlist_del_init_rcu(&mark->m.m_list);
fsnotify_get_mark(mark);
}
spin_unlock(&mnt->mnt_root->d_lock);
list_for_each_entry_safe(mark, lmark, &free_list, m.free_m_list) {
fsnotify_destroy_mark(mark);
fsnotify_put_mark(mark);
}
}
void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group)
{
fsnotify_clear_marks_by_group_flags(group, FSNOTIFY_MARK_FLAG_VFSMOUNT);
}
/*
* Recalculate the mask of events relevant to a given vfsmount locked.
*/
static void fsnotify_recalc_vfsmount_mask_locked(struct vfsmount *mnt)
{
struct fsnotify_mark *mark;
struct hlist_node *pos;
__u32 new_mask = 0;
assert_spin_locked(&mnt->mnt_root->d_lock);
hlist_for_each_entry(mark, pos, &mnt->mnt_fsnotify_marks, m.m_list)
new_mask |= mark->mask;
mnt->mnt_fsnotify_mask = new_mask;
}
/*
* Recalculate the mnt->mnt_fsnotify_mask, or the mask of all FS_* event types
* any notifier is interested in hearing for this mount point
*/
void fsnotify_recalc_vfsmount_mask(struct vfsmount *mnt)
{
spin_lock(&mnt->mnt_root->d_lock);
fsnotify_recalc_vfsmount_mask_locked(mnt);
spin_unlock(&mnt->mnt_root->d_lock);
}
void fsnotify_destroy_vfsmount_mark(struct fsnotify_mark *mark)
{
struct vfsmount *mnt = mark->m.mnt;
assert_spin_locked(&mark->lock);
assert_spin_locked(&mark->group->mark_lock);
spin_lock(&mnt->mnt_root->d_lock);
hlist_del_init_rcu(&mark->m.m_list);
mark->m.mnt = NULL;
fsnotify_recalc_vfsmount_mask_locked(mnt);
spin_unlock(&mnt->mnt_root->d_lock);
}
static struct fsnotify_mark *fsnotify_find_vfsmount_mark_locked(struct fsnotify_group *group,
struct vfsmount *mnt)
{
struct fsnotify_mark *mark;
struct hlist_node *pos;
assert_spin_locked(&mnt->mnt_root->d_lock);
hlist_for_each_entry(mark, pos, &mnt->mnt_fsnotify_marks, m.m_list) {
if (mark->group == group) {
fsnotify_get_mark(mark);
return mark;
}
}
return NULL;
}
/*
* given a group and vfsmount, find the mark associated with that combination.
* if found take a reference to that mark and return it, else return NULL
*/
struct fsnotify_mark *fsnotify_find_vfsmount_mark(struct fsnotify_group *group,
struct vfsmount *mnt)
{
struct fsnotify_mark *mark;
spin_lock(&mnt->mnt_root->d_lock);
mark = fsnotify_find_vfsmount_mark_locked(group, mnt);
spin_unlock(&mnt->mnt_root->d_lock);
return mark;
}
/*
* Attach an initialized mark to a given group and vfsmount.
* These marks may be used for the fsnotify backend to determine which
* event types should be delivered to which groups.
*/
int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
struct fsnotify_group *group, struct vfsmount *mnt,
int allow_dups)
{
struct fsnotify_mark *lmark;
struct hlist_node *node, *last = NULL;
int ret = 0;
mark->flags |= FSNOTIFY_MARK_FLAG_VFSMOUNT;
assert_spin_locked(&mark->lock);
assert_spin_locked(&group->mark_lock);
spin_lock(&mnt->mnt_root->d_lock);
mark->m.mnt = mnt;
/* is mark the first mark? */
if (hlist_empty(&mnt->mnt_fsnotify_marks)) {
hlist_add_head_rcu(&mark->m.m_list, &mnt->mnt_fsnotify_marks);
goto out;
}
/* should mark be in the middle of the current list? */
hlist_for_each_entry(lmark, node, &mnt->mnt_fsnotify_marks, m.m_list) {
last = node;
if ((lmark->group == group) && !allow_dups) {
ret = -EEXIST;
goto out;
}
if (mark->group < lmark->group)
continue;
hlist_add_before_rcu(&mark->m.m_list, &lmark->m.m_list);
goto out;
}
BUG_ON(last == NULL);
/* mark should be the last entry. last is the current last entry */
hlist_add_after_rcu(last, &mark->m.m_list);
out:
fsnotify_recalc_vfsmount_mask_locked(mnt);
spin_unlock(&mnt->mnt_root->d_lock);
return ret;
}

View file

@ -29,6 +29,7 @@
#include <linux/falloc.h>
#include <linux/fs_struct.h>
#include <linux/ima.h>
#include <linux/dnotify.h>
#include "internal.h"
@ -887,7 +888,7 @@ long do_sys_open(int dfd, const char __user *filename, int flags, int mode)
put_unused_fd(fd);
fd = PTR_ERR(f);
} else {
fsnotify_open(f->f_path.dentry);
fsnotify_open(f);
fd_install(fd, f);
}
}

View file

@ -311,7 +311,7 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
else
ret = do_sync_read(file, buf, count, pos);
if (ret > 0) {
fsnotify_access(file->f_path.dentry);
fsnotify_access(file);
add_rchar(current, ret);
}
inc_syscr(current);
@ -367,7 +367,7 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_
else
ret = do_sync_write(file, buf, count, pos);
if (ret > 0) {
fsnotify_modify(file->f_path.dentry);
fsnotify_modify(file);
add_wchar(current, ret);
}
inc_syscw(current);
@ -675,9 +675,9 @@ static ssize_t do_readv_writev(int type, struct file *file,
kfree(iov);
if ((ret + (type == READ)) > 0) {
if (type == READ)
fsnotify_access(file->f_path.dentry);
fsnotify_access(file);
else
fsnotify_modify(file->f_path.dentry);
fsnotify_modify(file);
}
return ret;
}

View file

@ -3,6 +3,14 @@
#include <linux/types.h>
/*
* FMODE_EXEC is 0x20
* FMODE_NONOTIFY is 0x1000000
* These cannot be used by userspace O_* until internal and external open
* flags are split.
* -Eric Paris
*/
#define O_ACCMODE 00000003
#define O_RDONLY 00000000
#define O_WRONLY 00000001

View file

@ -210,6 +210,7 @@ unifdef-y += ethtool.h
unifdef-y += eventpoll.h
unifdef-y += signalfd.h
unifdef-y += ext2_fs.h
unifdef-y += fanotify.h
unifdef-y += fb.h
unifdef-y += fcntl.h
unifdef-y += filter.h

View file

@ -28,6 +28,7 @@ struct dnotify_struct {
FS_CREATE | FS_DN_RENAME |\
FS_MOVED_FROM | FS_MOVED_TO)
extern int dir_notify_enable;
extern void dnotify_flush(struct file *, fl_owner_t);
extern int fcntl_dirnotify(int, struct file *, unsigned long);

105
include/linux/fanotify.h Normal file
View file

@ -0,0 +1,105 @@
#ifndef _LINUX_FANOTIFY_H
#define _LINUX_FANOTIFY_H
#include <linux/types.h>
/* the following events that user-space can register for */
#define FAN_ACCESS 0x00000001 /* File was accessed */
#define FAN_MODIFY 0x00000002 /* File was modified */
#define FAN_CLOSE_WRITE 0x00000008 /* Unwrittable file closed */
#define FAN_CLOSE_NOWRITE 0x00000010 /* Writtable file closed */
#define FAN_OPEN 0x00000020 /* File was opened */
#define FAN_EVENT_ON_CHILD 0x08000000 /* interested in child events */
/* FIXME currently Q's have no limit.... */
#define FAN_Q_OVERFLOW 0x00004000 /* Event queued overflowed */
#define FAN_OPEN_PERM 0x00010000 /* File open in perm check */
#define FAN_ACCESS_PERM 0x00020000 /* File accessed in perm check */
/* helper events */
#define FAN_CLOSE (FAN_CLOSE_WRITE | FAN_CLOSE_NOWRITE) /* close */
/* flags used for fanotify_init() */
#define FAN_CLOEXEC 0x00000001
#define FAN_NONBLOCK 0x00000002
#define FAN_ALL_INIT_FLAGS (FAN_CLOEXEC | FAN_NONBLOCK)
/* flags used for fanotify_modify_mark() */
#define FAN_MARK_ADD 0x00000001
#define FAN_MARK_REMOVE 0x00000002
#define FAN_MARK_DONT_FOLLOW 0x00000004
#define FAN_MARK_ONLYDIR 0x00000008
#define FAN_MARK_MOUNT 0x00000010
#define FAN_MARK_IGNORED_MASK 0x00000020
#define FAN_MARK_IGNORED_SURV_MODIFY 0x00000040
#define FAN_MARK_FLUSH 0x00000080
#define FAN_ALL_MARK_FLAGS (FAN_MARK_ADD |\
FAN_MARK_REMOVE |\
FAN_MARK_DONT_FOLLOW |\
FAN_MARK_ONLYDIR |\
FAN_MARK_MOUNT |\
FAN_MARK_IGNORED_MASK |\
FAN_MARK_IGNORED_SURV_MODIFY)
/*
* All of the events - we build the list by hand so that we can add flags in
* the future and not break backward compatibility. Apps will get only the
* events that they originally wanted. Be sure to add new events here!
*/
#define FAN_ALL_EVENTS (FAN_ACCESS |\
FAN_MODIFY |\
FAN_CLOSE |\
FAN_OPEN)
/*
* All events which require a permission response from userspace
*/
#define FAN_ALL_PERM_EVENTS (FAN_OPEN_PERM |\
FAN_ACCESS_PERM)
#define FAN_ALL_OUTGOING_EVENTS (FAN_ALL_EVENTS |\
FAN_ALL_PERM_EVENTS |\
FAN_Q_OVERFLOW)
#define FANOTIFY_METADATA_VERSION 1
struct fanotify_event_metadata {
__u32 event_len;
__u32 vers;
__s32 fd;
__u64 mask;
__s64 pid;
} __attribute__ ((packed));
struct fanotify_response {
__s32 fd;
__u32 response;
} __attribute__ ((packed));
/* Legit userspace responses to a _PERM event */
#define FAN_ALLOW 0x01
#define FAN_DENY 0x02
/* Helper functions to deal with fanotify_event_metadata buffers */
#define FAN_EVENT_METADATA_LEN (sizeof(struct fanotify_event_metadata))
#define FAN_EVENT_NEXT(meta, len) ((len) -= (meta)->event_len, \
(struct fanotify_event_metadata*)(((char *)(meta)) + \
(meta)->event_len))
#define FAN_EVENT_OK(meta, len) ((long)(len) >= (long)FAN_EVENT_METADATA_LEN && \
(long)(meta)->event_len >= (long)FAN_EVENT_METADATA_LEN && \
(long)(meta)->event_len <= (long)(len))
#ifdef __KERNEL__
struct fanotify_wait {
struct fsnotify_event *event;
__s32 fd;
};
#endif /* __KERNEL__ */
#endif /* _LINUX_FANOTIFY_H */

View file

@ -91,6 +91,9 @@ struct inodes_stat_t {
/* Expect random access pattern */
#define FMODE_RANDOM ((__force fmode_t)0x1000)
/* File was opened by fanotify and shouldn't generate fanotify events */
#define FMODE_NONOTIFY ((__force fmode_t)16777216) /* 0x1000000 */
/*
* The below are the various read and write types that we support. Some of
* them include behavioral modifiers that send information down to the
@ -409,9 +412,6 @@ extern int get_max_files(void);
extern int sysctl_nr_open;
extern struct inodes_stat_t inodes_stat;
extern int leases_enable, lease_break_time;
#ifdef CONFIG_DNOTIFY
extern int dir_notify_enable;
#endif
struct buffer_head;
typedef int (get_block_t)(struct inode *inode, sector_t iblock,
@ -772,12 +772,7 @@ struct inode {
#ifdef CONFIG_FSNOTIFY
__u32 i_fsnotify_mask; /* all events this inode cares about */
struct hlist_head i_fsnotify_mark_entries; /* fsnotify mark entries */
#endif
#ifdef CONFIG_INOTIFY
struct list_head inotify_watches; /* watches on this inode */
struct mutex inotify_mutex; /* protects the watches list */
struct hlist_head i_fsnotify_marks;
#endif
unsigned long i_state;
@ -2484,7 +2479,8 @@ int proc_nr_files(struct ctl_table *table, int write,
int __init get_filesystem_list(char *buf);
#define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE])
#define OPEN_FMODE(flag) ((__force fmode_t)((flag + 1) & O_ACCMODE))
#define OPEN_FMODE(flag) ((__force fmode_t)(((flag + 1) & O_ACCMODE) | \
(flag & FMODE_NONOTIFY)))
#endif /* __KERNEL__ */
#endif /* _LINUX_FS_H */

View file

@ -11,8 +11,6 @@
* (C) Copyright 2005 Robert Love
*/
#include <linux/dnotify.h>
#include <linux/inotify.h>
#include <linux/fsnotify_backend.h>
#include <linux/audit.h>
#include <linux/slab.h>
@ -21,35 +19,52 @@
* fsnotify_d_instantiate - instantiate a dentry for inode
* Called with dcache_lock held.
*/
static inline void fsnotify_d_instantiate(struct dentry *entry,
struct inode *inode)
static inline void fsnotify_d_instantiate(struct dentry *dentry,
struct inode *inode)
{
__fsnotify_d_instantiate(entry, inode);
inotify_d_instantiate(entry, inode);
__fsnotify_d_instantiate(dentry, inode);
}
/* Notify this dentry's parent about a child's events. */
static inline void fsnotify_parent(struct dentry *dentry, __u32 mask)
static inline void fsnotify_parent(struct file *file, struct dentry *dentry, __u32 mask)
{
__fsnotify_parent(dentry, mask);
if (!dentry)
dentry = file->f_path.dentry;
inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name);
__fsnotify_parent(file, dentry, mask);
}
/* simple call site for access decisions */
static inline int fsnotify_perm(struct file *file, int mask)
{
struct inode *inode = file->f_path.dentry->d_inode;
__u32 fsnotify_mask = 0;
if (file->f_mode & FMODE_NONOTIFY)
return 0;
if (!(mask & (MAY_READ | MAY_OPEN)))
return 0;
if (mask & MAY_OPEN)
fsnotify_mask = FS_OPEN_PERM;
else if (mask & MAY_READ)
fsnotify_mask = FS_ACCESS_PERM;
else
BUG();
return fsnotify(inode, fsnotify_mask, file, FSNOTIFY_EVENT_FILE, NULL, 0);
}
/*
* fsnotify_d_move - entry has been moved
* Called with dcache_lock and entry->d_lock held.
* fsnotify_d_move - dentry has been moved
* Called with dcache_lock and dentry->d_lock held.
*/
static inline void fsnotify_d_move(struct dentry *entry)
static inline void fsnotify_d_move(struct dentry *dentry)
{
/*
* On move we need to update entry->d_flags to indicate if the new parent
* cares about events from this entry.
* On move we need to update dentry->d_flags to indicate if the new parent
* cares about events from this dentry.
*/
__fsnotify_update_dcache_flags(entry);
inotify_d_move(entry);
__fsnotify_update_dcache_flags(dentry);
}
/*
@ -57,8 +72,6 @@ static inline void fsnotify_d_move(struct dentry *entry)
*/
static inline void fsnotify_link_count(struct inode *inode)
{
inotify_inode_queue_event(inode, IN_ATTRIB, 0, NULL, NULL);
fsnotify(inode, FS_ATTRIB, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
}
@ -66,45 +79,31 @@ static inline void fsnotify_link_count(struct inode *inode)
* fsnotify_move - file old_name at old_dir was moved to new_name at new_dir
*/
static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir,
const char *old_name,
const unsigned char *old_name,
int isdir, struct inode *target, struct dentry *moved)
{
struct inode *source = moved->d_inode;
u32 in_cookie = inotify_get_cookie();
u32 fs_cookie = fsnotify_get_cookie();
__u32 old_dir_mask = (FS_EVENT_ON_CHILD | FS_MOVED_FROM);
__u32 new_dir_mask = (FS_EVENT_ON_CHILD | FS_MOVED_TO);
const char *new_name = moved->d_name.name;
const unsigned char *new_name = moved->d_name.name;
if (old_dir == new_dir)
old_dir_mask |= FS_DN_RENAME;
if (isdir) {
isdir = IN_ISDIR;
old_dir_mask |= FS_IN_ISDIR;
new_dir_mask |= FS_IN_ISDIR;
}
inotify_inode_queue_event(old_dir, IN_MOVED_FROM|isdir, in_cookie, old_name,
source);
inotify_inode_queue_event(new_dir, IN_MOVED_TO|isdir, in_cookie, new_name,
source);
fsnotify(old_dir, old_dir_mask, old_dir, FSNOTIFY_EVENT_INODE, old_name, fs_cookie);
fsnotify(new_dir, new_dir_mask, new_dir, FSNOTIFY_EVENT_INODE, new_name, fs_cookie);
if (target) {
inotify_inode_queue_event(target, IN_DELETE_SELF, 0, NULL, NULL);
inotify_inode_is_dead(target);
/* this is really a link_count change not a removal */
if (target)
fsnotify_link_count(target);
}
if (source) {
inotify_inode_queue_event(source, IN_MOVE_SELF, 0, NULL, NULL);
if (source)
fsnotify(source, FS_MOVE_SELF, moved->d_inode, FSNOTIFY_EVENT_INODE, NULL, 0);
}
audit_inode_child(moved, new_dir);
}
@ -116,6 +115,14 @@ static inline void fsnotify_inode_delete(struct inode *inode)
__fsnotify_inode_delete(inode);
}
/*
* fsnotify_vfsmount_delete - a vfsmount is being destroyed, clean up is needed
*/
static inline void fsnotify_vfsmount_delete(struct vfsmount *mnt)
{
__fsnotify_vfsmount_delete(mnt);
}
/*
* fsnotify_nameremove - a filename was removed from a directory
*/
@ -126,7 +133,7 @@ static inline void fsnotify_nameremove(struct dentry *dentry, int isdir)
if (isdir)
mask |= FS_IN_ISDIR;
fsnotify_parent(dentry, mask);
fsnotify_parent(NULL, dentry, mask);
}
/*
@ -134,9 +141,6 @@ static inline void fsnotify_nameremove(struct dentry *dentry, int isdir)
*/
static inline void fsnotify_inoderemove(struct inode *inode)
{
inotify_inode_queue_event(inode, IN_DELETE_SELF, 0, NULL, NULL);
inotify_inode_is_dead(inode);
fsnotify(inode, FS_DELETE_SELF, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
__fsnotify_inode_delete(inode);
}
@ -146,8 +150,6 @@ static inline void fsnotify_inoderemove(struct inode *inode)
*/
static inline void fsnotify_create(struct inode *inode, struct dentry *dentry)
{
inotify_inode_queue_event(inode, IN_CREATE, 0, dentry->d_name.name,
dentry->d_inode);
audit_inode_child(dentry, inode);
fsnotify(inode, FS_CREATE, dentry->d_inode, FSNOTIFY_EVENT_INODE, dentry->d_name.name, 0);
@ -160,8 +162,6 @@ static inline void fsnotify_create(struct inode *inode, struct dentry *dentry)
*/
static inline void fsnotify_link(struct inode *dir, struct inode *inode, struct dentry *new_dentry)
{
inotify_inode_queue_event(dir, IN_CREATE, 0, new_dentry->d_name.name,
inode);
fsnotify_link_count(inode);
audit_inode_child(new_dentry, dir);
@ -176,7 +176,6 @@ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry)
__u32 mask = (FS_CREATE | FS_IN_ISDIR);
struct inode *d_inode = dentry->d_inode;
inotify_inode_queue_event(inode, mask, 0, dentry->d_name.name, d_inode);
audit_inode_child(dentry, inode);
fsnotify(inode, mask, d_inode, FSNOTIFY_EVENT_INODE, dentry->d_name.name, 0);
@ -185,52 +184,52 @@ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry)
/*
* fsnotify_access - file was read
*/
static inline void fsnotify_access(struct dentry *dentry)
static inline void fsnotify_access(struct file *file)
{
struct inode *inode = dentry->d_inode;
struct inode *inode = file->f_path.dentry->d_inode;
__u32 mask = FS_ACCESS;
if (S_ISDIR(inode->i_mode))
mask |= FS_IN_ISDIR;
inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
fsnotify_parent(dentry, mask);
fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
if (!(file->f_mode & FMODE_NONOTIFY)) {
fsnotify_parent(file, NULL, mask);
fsnotify(inode, mask, file, FSNOTIFY_EVENT_FILE, NULL, 0);
}
}
/*
* fsnotify_modify - file was modified
*/
static inline void fsnotify_modify(struct dentry *dentry)
static inline void fsnotify_modify(struct file *file)
{
struct inode *inode = dentry->d_inode;
struct inode *inode = file->f_path.dentry->d_inode;
__u32 mask = FS_MODIFY;
if (S_ISDIR(inode->i_mode))
mask |= FS_IN_ISDIR;
inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
fsnotify_parent(dentry, mask);
fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
if (!(file->f_mode & FMODE_NONOTIFY)) {
fsnotify_parent(file, NULL, mask);
fsnotify(inode, mask, file, FSNOTIFY_EVENT_FILE, NULL, 0);
}
}
/*
* fsnotify_open - file was opened
*/
static inline void fsnotify_open(struct dentry *dentry)
static inline void fsnotify_open(struct file *file)
{
struct inode *inode = dentry->d_inode;
struct inode *inode = file->f_path.dentry->d_inode;
__u32 mask = FS_OPEN;
if (S_ISDIR(inode->i_mode))
mask |= FS_IN_ISDIR;
inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
fsnotify_parent(dentry, mask);
fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
if (!(file->f_mode & FMODE_NONOTIFY)) {
fsnotify_parent(file, NULL, mask);
fsnotify(inode, mask, file, FSNOTIFY_EVENT_FILE, NULL, 0);
}
}
/*
@ -238,18 +237,17 @@ static inline void fsnotify_open(struct dentry *dentry)
*/
static inline void fsnotify_close(struct file *file)
{
struct dentry *dentry = file->f_path.dentry;
struct inode *inode = dentry->d_inode;
struct inode *inode = file->f_path.dentry->d_inode;
fmode_t mode = file->f_mode;
__u32 mask = (mode & FMODE_WRITE) ? FS_CLOSE_WRITE : FS_CLOSE_NOWRITE;
if (S_ISDIR(inode->i_mode))
mask |= FS_IN_ISDIR;
inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
fsnotify_parent(dentry, mask);
fsnotify(inode, mask, file, FSNOTIFY_EVENT_FILE, NULL, 0);
if (!(file->f_mode & FMODE_NONOTIFY)) {
fsnotify_parent(file, NULL, mask);
fsnotify(inode, mask, file, FSNOTIFY_EVENT_FILE, NULL, 0);
}
}
/*
@ -263,9 +261,7 @@ static inline void fsnotify_xattr(struct dentry *dentry)
if (S_ISDIR(inode->i_mode))
mask |= FS_IN_ISDIR;
inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
fsnotify_parent(dentry, mask);
fsnotify_parent(NULL, dentry, mask);
fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
}
@ -299,19 +295,18 @@ static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid)
if (mask) {
if (S_ISDIR(inode->i_mode))
mask |= FS_IN_ISDIR;
inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
fsnotify_parent(dentry, mask);
fsnotify_parent(NULL, dentry, mask);
fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
}
}
#if defined(CONFIG_INOTIFY) || defined(CONFIG_FSNOTIFY) /* notify helpers */
#if defined(CONFIG_FSNOTIFY) /* notify helpers */
/*
* fsnotify_oldname_init - save off the old filename before we change it
*/
static inline const char *fsnotify_oldname_init(const char *name)
static inline const unsigned char *fsnotify_oldname_init(const unsigned char *name)
{
return kstrdup(name, GFP_KERNEL);
}
@ -319,22 +314,22 @@ static inline const char *fsnotify_oldname_init(const char *name)
/*
* fsnotify_oldname_free - free the name we got from fsnotify_oldname_init
*/
static inline void fsnotify_oldname_free(const char *old_name)
static inline void fsnotify_oldname_free(const unsigned char *old_name)
{
kfree(old_name);
}
#else /* CONFIG_INOTIFY || CONFIG_FSNOTIFY */
#else /* CONFIG_FSNOTIFY */
static inline const char *fsnotify_oldname_init(const char *name)
static inline const char *fsnotify_oldname_init(const unsigned char *name)
{
return NULL;
}
static inline void fsnotify_oldname_free(const char *old_name)
static inline void fsnotify_oldname_free(const unsigned char *old_name)
{
}
#endif /* ! CONFIG_INOTIFY */
#endif /* CONFIG_FSNOTIFY */
#endif /* _LINUX_FS_NOTIFY_H */

View file

@ -41,6 +41,10 @@
#define FS_Q_OVERFLOW 0x00004000 /* Event queued overflowed */
#define FS_IN_IGNORED 0x00008000 /* last inotify event here */
#define FS_OPEN_PERM 0x00010000 /* open event in an permission hook */
#define FS_ACCESS_PERM 0x00020000 /* access event in a permissions hook */
#define FS_EXCL_UNLINK 0x04000000 /* do not send events if object is unlinked */
#define FS_IN_ISDIR 0x40000000 /* event occurred against dir */
#define FS_IN_ONESHOT 0x80000000 /* only send event once */
@ -58,13 +62,20 @@
FS_MOVED_FROM | FS_MOVED_TO | FS_CREATE |\
FS_DELETE)
/* listeners that hard code group numbers near the top */
#define DNOTIFY_GROUP_NUM UINT_MAX
#define INOTIFY_GROUP_NUM (DNOTIFY_GROUP_NUM-1)
#define FS_MOVE (FS_MOVED_FROM | FS_MOVED_TO)
#define ALL_FSNOTIFY_EVENTS (FS_ACCESS | FS_MODIFY | FS_ATTRIB | \
FS_CLOSE_WRITE | FS_CLOSE_NOWRITE | FS_OPEN | \
FS_MOVED_FROM | FS_MOVED_TO | FS_CREATE | \
FS_DELETE | FS_DELETE_SELF | FS_MOVE_SELF | \
FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED | \
FS_OPEN_PERM | FS_ACCESS_PERM | FS_EXCL_UNLINK | \
FS_IN_ISDIR | FS_IN_ONESHOT | FS_DN_RENAME | \
FS_DN_MULTISHOT | FS_EVENT_ON_CHILD)
struct fsnotify_group;
struct fsnotify_event;
struct fsnotify_mark_entry;
struct fsnotify_mark;
struct fsnotify_event_private_data;
/*
@ -80,10 +91,16 @@ struct fsnotify_event_private_data;
* valid group and inode to use to clean up.
*/
struct fsnotify_ops {
bool (*should_send_event)(struct fsnotify_group *group, struct inode *inode, __u32 mask);
int (*handle_event)(struct fsnotify_group *group, struct fsnotify_event *event);
bool (*should_send_event)(struct fsnotify_group *group, struct inode *inode,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *vfsmount_mark,
__u32 mask, void *data, int data_type);
int (*handle_event)(struct fsnotify_group *group,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *vfsmount_mark,
struct fsnotify_event *event);
void (*free_group_priv)(struct fsnotify_group *group);
void (*freeing_mark)(struct fsnotify_mark_entry *entry, struct fsnotify_group *group);
void (*freeing_mark)(struct fsnotify_mark *mark, struct fsnotify_group *group);
void (*free_event_priv)(struct fsnotify_event_private_data *priv);
};
@ -94,22 +111,6 @@ struct fsnotify_ops {
* everything will be cleaned up.
*/
struct fsnotify_group {
/*
* global list of all groups receiving events from fsnotify.
* anchored by fsnotify_groups and protected by either fsnotify_grp_mutex
* or fsnotify_grp_srcu depending on write vs read.
*/
struct list_head group_list;
/*
* Defines all of the event types in which this group is interested.
* This mask is a bitwise OR of the FS_* events from above. Each time
* this mask changes for a group (if it changes) the correct functions
* must be called to update the global structures which indicate global
* interest in event types.
*/
__u32 mask;
/*
* How the refcnt is used is up to each group. When the refcnt hits 0
* fsnotify will clean up all of the resources associated with this group.
@ -119,7 +120,6 @@ struct fsnotify_group {
* closed.
*/
atomic_t refcnt; /* things with interest in this group */
unsigned int group_num; /* simply prevents accidental group collision */
const struct fsnotify_ops *ops; /* how this group handles things */
@ -130,15 +130,12 @@ struct fsnotify_group {
unsigned int q_len; /* events on the queue */
unsigned int max_events; /* maximum events allowed on the list */
/* stores all fastapth entries assoc with this group so they can be cleaned on unregister */
spinlock_t mark_lock; /* protect mark_entries list */
atomic_t num_marks; /* 1 for each mark entry and 1 for not being
/* stores all fastpath marks assoc with this group so they can be cleaned on unregister */
spinlock_t mark_lock; /* protect marks_list */
atomic_t num_marks; /* 1 for each mark and 1 for not being
* past the point of no return when freeing
* a group */
struct list_head mark_entries; /* all inode mark entries for this group */
/* prevents double list_del of group_list. protected by global fsnotify_grp_mutex */
bool on_group_list;
struct list_head marks_list; /* all inode marks for this group */
/* groups can define private fields here or use the void *private */
union {
@ -152,6 +149,17 @@ struct fsnotify_group {
struct user_struct *user;
} inotify_data;
#endif
#ifdef CONFIG_FANOTIFY
struct fanotify_group_private_data {
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
/* allows a group to block waiting for a userspace response */
struct mutex access_mutex;
struct list_head access_list;
wait_queue_head_t access_waitq;
#endif /* CONFIG_FANOTIFY_ACCESS_PERMISSIONS */
int f_flags;
} fanotify_data;
#endif /* CONFIG_FANOTIFY */
};
};
@ -195,35 +203,57 @@ struct fsnotify_event {
/* to_tell may ONLY be dereferenced during handle_event(). */
struct inode *to_tell; /* either the inode the event happened to or its parent */
/*
* depending on the event type we should have either a path or inode
* We hold a reference on path, but NOT on inode. Since we have the ref on
* the path, it may be dereferenced at any point during this object's
* depending on the event type we should have either a file or inode
* We hold a reference on file, but NOT on inode. Since we have the ref on
* the file, it may be dereferenced at any point during this object's
* lifetime. That reference is dropped when this object's refcnt hits
* 0. If this event contains an inode instead of a path, the inode may
* 0. If this event contains an inode instead of a file, the inode may
* ONLY be used during handle_event().
*/
union {
struct path path;
struct file *file;
struct inode *inode;
};
/* when calling fsnotify tell it if the data is a path or inode */
#define FSNOTIFY_EVENT_NONE 0
#define FSNOTIFY_EVENT_PATH 1
#define FSNOTIFY_EVENT_FILE 1
#define FSNOTIFY_EVENT_INODE 2
#define FSNOTIFY_EVENT_FILE 3
int data_type; /* which of the above union we have */
atomic_t refcnt; /* how many groups still are using/need to send this event */
__u32 mask; /* the type of access, bitwise OR for FS_* event types */
u32 sync_cookie; /* used to corrolate events, namely inotify mv events */
char *file_name;
const unsigned char *file_name;
size_t name_len;
struct pid *tgid;
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
__u32 response; /* userspace answer to question */
#endif /* CONFIG_FANOTIFY_ACCESS_PERMISSIONS */
struct list_head private_data_list; /* groups can store private data here */
};
/*
* a mark is simply an entry attached to an in core inode which allows an
* Inode specific fields in an fsnotify_mark
*/
struct fsnotify_inode_mark {
struct inode *inode; /* inode this mark is associated with */
struct hlist_node i_list; /* list of marks by inode->i_fsnotify_marks */
struct list_head free_i_list; /* tmp list used when freeing this mark */
};
/*
* Mount point specific fields in an fsnotify_mark
*/
struct fsnotify_vfsmount_mark {
struct vfsmount *mnt; /* vfsmount this mark is associated with */
struct hlist_node m_list; /* list of marks by inode->i_fsnotify_marks */
struct list_head free_m_list; /* tmp list used when freeing this mark */
};
/*
* a mark is simply an object attached to an in core inode which allows an
* fsnotify listener to indicate they are either no longer interested in events
* of a type matching mask or only interested in those events.
*
@ -232,19 +262,28 @@ struct fsnotify_event {
* (such as dnotify) will flush these when the open fd is closed and not at
* inode eviction or modification.
*/
struct fsnotify_mark_entry {
__u32 mask; /* mask this mark entry is for */
struct fsnotify_mark {
__u32 mask; /* mask this mark is for */
/* we hold ref for each i_list and g_list. also one ref for each 'thing'
* in kernel that found and may be using this mark. */
atomic_t refcnt; /* active things looking at this mark */
struct inode *inode; /* inode this entry is associated with */
struct fsnotify_group *group; /* group this mark entry is for */
struct hlist_node i_list; /* list of mark_entries by inode->i_fsnotify_mark_entries */
struct list_head g_list; /* list of mark_entries by group->i_fsnotify_mark_entries */
spinlock_t lock; /* protect group, inode, and killme */
struct list_head free_i_list; /* tmp list used when freeing this mark */
struct fsnotify_group *group; /* group this mark is for */
struct list_head g_list; /* list of marks by group->i_fsnotify_marks */
spinlock_t lock; /* protect group and inode */
union {
struct fsnotify_inode_mark i;
struct fsnotify_vfsmount_mark m;
};
__u32 ignored_mask; /* events types to ignore */
struct list_head free_g_list; /* tmp list used when freeing this mark */
void (*free_mark)(struct fsnotify_mark_entry *entry); /* called on final put+free */
#define FSNOTIFY_MARK_FLAG_INODE 0x01
#define FSNOTIFY_MARK_FLAG_VFSMOUNT 0x02
#define FSNOTIFY_MARK_FLAG_OBJECT_PINNED 0x04
#define FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY 0x08
#define FSNOTIFY_MARK_FLAG_ALIVE 0x10
unsigned int flags; /* vfsmount or inode mark? */
struct list_head destroy_list;
void (*free_mark)(struct fsnotify_mark *mark); /* called on final put+free */
};
#ifdef CONFIG_FSNOTIFY
@ -252,10 +291,11 @@ struct fsnotify_mark_entry {
/* called from the vfs helpers */
/* main fsnotify call to send events */
extern void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
const char *name, u32 cookie);
extern void __fsnotify_parent(struct dentry *dentry, __u32 mask);
extern int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
const unsigned char *name, u32 cookie);
extern void __fsnotify_parent(struct file *file, struct dentry *dentry, __u32 mask);
extern void __fsnotify_inode_delete(struct inode *inode);
extern void __fsnotify_vfsmount_delete(struct vfsmount *mnt);
extern u32 fsnotify_get_cookie(void);
static inline int fsnotify_inode_watches_children(struct inode *inode)
@ -304,15 +344,9 @@ static inline void __fsnotify_d_instantiate(struct dentry *dentry, struct inode
/* called from fsnotify listeners, such as fanotify or dnotify */
/* must call when a group changes its ->mask */
extern void fsnotify_recalc_global_mask(void);
/* get a reference to an existing or create a new group */
extern struct fsnotify_group *fsnotify_obtain_group(unsigned int group_num,
__u32 mask,
const struct fsnotify_ops *ops);
/* run all marks associated with this group and update group->mask */
extern void fsnotify_recalc_group_mask(struct fsnotify_group *group);
/* drop reference on a group from fsnotify_obtain_group */
extern struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops);
/* drop reference on a group from fsnotify_alloc_group */
extern void fsnotify_put_group(struct fsnotify_group *group);
/* take a reference to an event */
@ -323,8 +357,11 @@ extern struct fsnotify_event_private_data *fsnotify_remove_priv_from_event(struc
struct fsnotify_event *event);
/* attach the event to the group notification queue */
extern int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_event *event,
struct fsnotify_event_private_data *priv);
extern struct fsnotify_event *fsnotify_add_notify_event(struct fsnotify_group *group,
struct fsnotify_event *event,
struct fsnotify_event_private_data *priv,
struct fsnotify_event *(*merge)(struct list_head *,
struct fsnotify_event *));
/* true if the group notification queue is empty */
extern bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group);
/* return, but do not dequeue the first event on the notification queue */
@ -334,38 +371,66 @@ extern struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group
/* functions used to manipulate the marks attached to inodes */
/* run all marks associated with a vfsmount and update mnt->mnt_fsnotify_mask */
extern void fsnotify_recalc_vfsmount_mask(struct vfsmount *mnt);
/* run all marks associated with an inode and update inode->i_fsnotify_mask */
extern void fsnotify_recalc_inode_mask(struct inode *inode);
extern void fsnotify_init_mark(struct fsnotify_mark_entry *entry, void (*free_mark)(struct fsnotify_mark_entry *entry));
extern void fsnotify_init_mark(struct fsnotify_mark *mark, void (*free_mark)(struct fsnotify_mark *mark));
/* find (and take a reference) to a mark associated with group and inode */
extern struct fsnotify_mark_entry *fsnotify_find_mark_entry(struct fsnotify_group *group, struct inode *inode);
extern struct fsnotify_mark *fsnotify_find_inode_mark(struct fsnotify_group *group, struct inode *inode);
/* find (and take a reference) to a mark associated with group and vfsmount */
extern struct fsnotify_mark *fsnotify_find_vfsmount_mark(struct fsnotify_group *group, struct vfsmount *mnt);
/* copy the values from old into new */
extern void fsnotify_duplicate_mark(struct fsnotify_mark *new, struct fsnotify_mark *old);
/* set the ignored_mask of a mark */
extern void fsnotify_set_mark_ignored_mask_locked(struct fsnotify_mark *mark, __u32 mask);
/* set the mask of a mark (might pin the object into memory */
extern void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask);
/* attach the mark to both the group and the inode */
extern int fsnotify_add_mark(struct fsnotify_mark_entry *entry, struct fsnotify_group *group, struct inode *inode);
extern int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group,
struct inode *inode, struct vfsmount *mnt, int allow_dups);
/* given a mark, flag it to be freed when all references are dropped */
extern void fsnotify_destroy_mark_by_entry(struct fsnotify_mark_entry *entry);
extern void fsnotify_destroy_mark(struct fsnotify_mark *mark);
/* run all the marks in a group, and clear all of the vfsmount marks */
extern void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group);
/* run all the marks in a group, and clear all of the inode marks */
extern void fsnotify_clear_inode_marks_by_group(struct fsnotify_group *group);
/* run all the marks in a group, and clear all of the marks where mark->flags & flags is true*/
extern void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, unsigned int flags);
/* run all the marks in a group, and flag them to be freed */
extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group);
extern void fsnotify_get_mark(struct fsnotify_mark_entry *entry);
extern void fsnotify_put_mark(struct fsnotify_mark_entry *entry);
extern void fsnotify_get_mark(struct fsnotify_mark *mark);
extern void fsnotify_put_mark(struct fsnotify_mark *mark);
extern void fsnotify_unmount_inodes(struct list_head *list);
/* put here because inotify does some weird stuff when destroying watches */
extern struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
void *data, int data_is, const char *name,
void *data, int data_is,
const unsigned char *name,
u32 cookie, gfp_t gfp);
/* fanotify likes to change events after they are on lists... */
extern struct fsnotify_event *fsnotify_clone_event(struct fsnotify_event *old_event);
extern int fsnotify_replace_event(struct fsnotify_event_holder *old_holder,
struct fsnotify_event *new_event);
#else
static inline void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
const char *name, u32 cookie)
{}
static inline int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
const unsigned char *name, u32 cookie)
{
return 0;
}
static inline void __fsnotify_parent(struct dentry *dentry, __u32 mask)
static inline void __fsnotify_parent(struct file *file, struct dentry *dentry, __u32 mask)
{}
static inline void __fsnotify_inode_delete(struct inode *inode)
{}
static inline void __fsnotify_vfsmount_delete(struct vfsmount *mnt)
{}
static inline void __fsnotify_update_dcache_flags(struct dentry *dentry)
{}

View file

@ -51,6 +51,7 @@ struct inotify_event {
/* special flags */
#define IN_ONLYDIR 0x01000000 /* only watch the path if it is a directory */
#define IN_DONT_FOLLOW 0x02000000 /* don't follow a sym link */
#define IN_EXCL_UNLINK 0x04000000 /* exclude events on unlinked objects */
#define IN_MASK_ADD 0x20000000 /* add to the mask of an already existing watch */
#define IN_ISDIR 0x40000000 /* event occurred against dir */
#define IN_ONESHOT 0x80000000 /* only send event once */
@ -70,177 +71,17 @@ struct inotify_event {
#define IN_NONBLOCK O_NONBLOCK
#ifdef __KERNEL__
#include <linux/sysctl.h>
extern struct ctl_table inotify_table[]; /* for sysctl */
#include <linux/dcache.h>
#include <linux/fs.h>
#define ALL_INOTIFY_BITS (IN_ACCESS | IN_MODIFY | IN_ATTRIB | IN_CLOSE_WRITE | \
IN_CLOSE_NOWRITE | IN_OPEN | IN_MOVED_FROM | \
IN_MOVED_TO | IN_CREATE | IN_DELETE | \
IN_DELETE_SELF | IN_MOVE_SELF | IN_UNMOUNT | \
IN_Q_OVERFLOW | IN_IGNORED | IN_ONLYDIR | \
IN_DONT_FOLLOW | IN_EXCL_UNLINK | IN_MASK_ADD | \
IN_ISDIR | IN_ONESHOT)
/*
* struct inotify_watch - represents a watch request on a specific inode
*
* h_list is protected by ih->mutex of the associated inotify_handle.
* i_list, mask are protected by inode->inotify_mutex of the associated inode.
* ih, inode, and wd are never written to once the watch is created.
*
* Callers must use the established inotify interfaces to access inotify_watch
* contents. The content of this structure is private to the inotify
* implementation.
*/
struct inotify_watch {
struct list_head h_list; /* entry in inotify_handle's list */
struct list_head i_list; /* entry in inode's list */
atomic_t count; /* reference count */
struct inotify_handle *ih; /* associated inotify handle */
struct inode *inode; /* associated inode */
__s32 wd; /* watch descriptor */
__u32 mask; /* event mask for this watch */
};
struct inotify_operations {
void (*handle_event)(struct inotify_watch *, u32, u32, u32,
const char *, struct inode *);
void (*destroy_watch)(struct inotify_watch *);
};
#ifdef CONFIG_INOTIFY
/* Kernel API for producing events */
extern void inotify_d_instantiate(struct dentry *, struct inode *);
extern void inotify_d_move(struct dentry *);
extern void inotify_inode_queue_event(struct inode *, __u32, __u32,
const char *, struct inode *);
extern void inotify_dentry_parent_queue_event(struct dentry *, __u32, __u32,
const char *);
extern void inotify_unmount_inodes(struct list_head *);
extern void inotify_inode_is_dead(struct inode *);
extern u32 inotify_get_cookie(void);
/* Kernel Consumer API */
extern struct inotify_handle *inotify_init(const struct inotify_operations *);
extern void inotify_init_watch(struct inotify_watch *);
extern void inotify_destroy(struct inotify_handle *);
extern __s32 inotify_find_watch(struct inotify_handle *, struct inode *,
struct inotify_watch **);
extern __s32 inotify_find_update_watch(struct inotify_handle *, struct inode *,
u32);
extern __s32 inotify_add_watch(struct inotify_handle *, struct inotify_watch *,
struct inode *, __u32);
extern __s32 inotify_clone_watch(struct inotify_watch *, struct inotify_watch *);
extern void inotify_evict_watch(struct inotify_watch *);
extern int inotify_rm_watch(struct inotify_handle *, struct inotify_watch *);
extern int inotify_rm_wd(struct inotify_handle *, __u32);
extern void inotify_remove_watch_locked(struct inotify_handle *,
struct inotify_watch *);
extern void get_inotify_watch(struct inotify_watch *);
extern void put_inotify_watch(struct inotify_watch *);
extern int pin_inotify_watch(struct inotify_watch *);
extern void unpin_inotify_watch(struct inotify_watch *);
#else
static inline void inotify_d_instantiate(struct dentry *dentry,
struct inode *inode)
{
}
static inline void inotify_d_move(struct dentry *dentry)
{
}
static inline void inotify_inode_queue_event(struct inode *inode,
__u32 mask, __u32 cookie,
const char *filename,
struct inode *n_inode)
{
}
static inline void inotify_dentry_parent_queue_event(struct dentry *dentry,
__u32 mask, __u32 cookie,
const char *filename)
{
}
static inline void inotify_unmount_inodes(struct list_head *list)
{
}
static inline void inotify_inode_is_dead(struct inode *inode)
{
}
static inline u32 inotify_get_cookie(void)
{
return 0;
}
static inline struct inotify_handle *inotify_init(const struct inotify_operations *ops)
{
return ERR_PTR(-EOPNOTSUPP);
}
static inline void inotify_init_watch(struct inotify_watch *watch)
{
}
static inline void inotify_destroy(struct inotify_handle *ih)
{
}
static inline __s32 inotify_find_watch(struct inotify_handle *ih, struct inode *inode,
struct inotify_watch **watchp)
{
return -EOPNOTSUPP;
}
static inline __s32 inotify_find_update_watch(struct inotify_handle *ih,
struct inode *inode, u32 mask)
{
return -EOPNOTSUPP;
}
static inline __s32 inotify_add_watch(struct inotify_handle *ih,
struct inotify_watch *watch,
struct inode *inode, __u32 mask)
{
return -EOPNOTSUPP;
}
static inline int inotify_rm_watch(struct inotify_handle *ih,
struct inotify_watch *watch)
{
return -EOPNOTSUPP;
}
static inline int inotify_rm_wd(struct inotify_handle *ih, __u32 wd)
{
return -EOPNOTSUPP;
}
static inline void inotify_remove_watch_locked(struct inotify_handle *ih,
struct inotify_watch *watch)
{
}
static inline void get_inotify_watch(struct inotify_watch *watch)
{
}
static inline void put_inotify_watch(struct inotify_watch *watch)
{
}
extern inline int pin_inotify_watch(struct inotify_watch *watch)
{
return 0;
}
extern inline void unpin_inotify_watch(struct inotify_watch *watch)
{
}
#endif /* CONFIG_INOTIFY */
#endif /* __KERNEL __ */
#endif
#endif /* _LINUX_INOTIFY_H */

View file

@ -56,7 +56,11 @@ struct vfsmount {
struct list_head mnt_mounts; /* list of children, anchored here */
struct list_head mnt_child; /* and going through their mnt_child */
int mnt_flags;
/* 4 bytes hole on 64bits arches */
/* 4 bytes hole on 64bits arches without fsnotify */
#ifdef CONFIG_FSNOTIFY
__u32 mnt_fsnotify_mask;
struct hlist_head mnt_fsnotify_marks;
#endif
const char *mnt_devname; /* Name of device e.g. /dev/dsk/hda1 */
struct list_head mnt_list;
struct list_head mnt_expire; /* link in fs-specific expiry list */

View file

@ -23,6 +23,7 @@
#define __LINUX_SECURITY_H
#include <linux/fs.h>
#include <linux/fsnotify.h>
#include <linux/binfmts.h>
#include <linux/signal.h>
#include <linux/resource.h>

View file

@ -811,6 +811,10 @@ asmlinkage long sys_pselect6(int, fd_set __user *, fd_set __user *,
asmlinkage long sys_ppoll(struct pollfd __user *, unsigned int,
struct timespec __user *, const sigset_t __user *,
size_t);
asmlinkage long sys_fanotify_init(unsigned int flags, unsigned int event_f_flags);
asmlinkage long sys_fanotify_mark(int fanotify_fd, unsigned int flags,
u64 mask, int fd,
const char __user *pathname);
int kernel_execve(const char *filename, char *const argv[], char *const envp[]);

View file

@ -320,13 +320,17 @@ config AUDITSYSCALL
help
Enable low-overhead system-call auditing infrastructure that
can be used independently or with another kernel subsystem,
such as SELinux. To use audit's filesystem watch feature, please
ensure that INOTIFY is configured.
such as SELinux.
config AUDIT_WATCH
def_bool y
depends on AUDITSYSCALL
select FSNOTIFY
config AUDIT_TREE
def_bool y
depends on AUDITSYSCALL
select INOTIFY
select FSNOTIFY
menu "RCU Subsystem"

View file

@ -70,10 +70,11 @@ obj-$(CONFIG_IKCONFIG) += configs.o
obj-$(CONFIG_RESOURCE_COUNTERS) += res_counter.o
obj-$(CONFIG_SMP) += stop_machine.o
obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o
obj-$(CONFIG_AUDIT) += audit.o auditfilter.o audit_watch.o
obj-$(CONFIG_AUDIT) += audit.o auditfilter.o
obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
obj-$(CONFIG_GCOV_KERNEL) += gcov/
obj-$(CONFIG_AUDIT_WATCH) += audit_watch.o
obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
obj-$(CONFIG_GCOV_KERNEL) += gcov/
obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_KGDB) += debug/
obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o

View file

@ -56,7 +56,6 @@
#include <net/netlink.h>
#include <linux/skbuff.h>
#include <linux/netlink.h>
#include <linux/inotify.h>
#include <linux/freezer.h>
#include <linux/tty.h>

View file

@ -103,21 +103,27 @@ extern struct mutex audit_filter_mutex;
extern void audit_free_rule_rcu(struct rcu_head *);
extern struct list_head audit_filter_list[];
extern struct audit_entry *audit_dupe_rule(struct audit_krule *old);
/* audit watch functions */
extern unsigned long audit_watch_inode(struct audit_watch *watch);
extern dev_t audit_watch_dev(struct audit_watch *watch);
#ifdef CONFIG_AUDIT_WATCH
extern void audit_put_watch(struct audit_watch *watch);
extern void audit_get_watch(struct audit_watch *watch);
extern int audit_to_watch(struct audit_krule *krule, char *path, int len, u32 op);
extern int audit_add_watch(struct audit_krule *krule);
extern void audit_remove_watch(struct audit_watch *watch);
extern void audit_remove_watch_rule(struct audit_krule *krule, struct list_head *list);
extern void audit_inotify_unregister(struct list_head *in_list);
extern int audit_add_watch(struct audit_krule *krule, struct list_head **list);
extern void audit_remove_watch_rule(struct audit_krule *krule);
extern char *audit_watch_path(struct audit_watch *watch);
extern struct list_head *audit_watch_rules(struct audit_watch *watch);
extern int audit_watch_compare(struct audit_watch *watch, unsigned long ino, dev_t dev);
#else
#define audit_put_watch(w) {}
#define audit_get_watch(w) {}
#define audit_to_watch(k, p, l, o) (-EINVAL)
#define audit_add_watch(k, l) (-EINVAL)
#define audit_remove_watch_rule(k) BUG()
#define audit_watch_path(w) ""
#define audit_watch_compare(w, i, d) 0
extern struct audit_entry *audit_dupe_rule(struct audit_krule *old,
struct audit_watch *watch);
#endif /* CONFIG_AUDIT_WATCH */
#ifdef CONFIG_AUDIT_TREE
extern struct audit_chunk *audit_tree_lookup(const struct inode *);

View file

@ -1,5 +1,5 @@
#include "audit.h"
#include <linux/inotify.h>
#include <linux/fsnotify_backend.h>
#include <linux/namei.h>
#include <linux/mount.h>
#include <linux/kthread.h>
@ -22,7 +22,7 @@ struct audit_tree {
struct audit_chunk {
struct list_head hash;
struct inotify_watch watch;
struct fsnotify_mark mark;
struct list_head trees; /* with root here */
int dead;
int count;
@ -59,7 +59,7 @@ static LIST_HEAD(prune_list);
* tree is refcounted; one reference for "some rules on rules_list refer to
* it", one for each chunk with pointer to it.
*
* chunk is refcounted by embedded inotify_watch + .refs (non-zero refcount
* chunk is refcounted by embedded fsnotify_mark + .refs (non-zero refcount
* of watch contributes 1 to .refs).
*
* node.index allows to get from node.list to containing chunk.
@ -68,7 +68,7 @@ static LIST_HEAD(prune_list);
* that makes a difference. Some.
*/
static struct inotify_handle *rtree_ih;
static struct fsnotify_group *audit_tree_group;
static struct audit_tree *alloc_tree(const char *s)
{
@ -111,29 +111,6 @@ const char *audit_tree_path(struct audit_tree *tree)
return tree->pathname;
}
static struct audit_chunk *alloc_chunk(int count)
{
struct audit_chunk *chunk;
size_t size;
int i;
size = offsetof(struct audit_chunk, owners) + count * sizeof(struct node);
chunk = kzalloc(size, GFP_KERNEL);
if (!chunk)
return NULL;
INIT_LIST_HEAD(&chunk->hash);
INIT_LIST_HEAD(&chunk->trees);
chunk->count = count;
atomic_long_set(&chunk->refs, 1);
for (i = 0; i < count; i++) {
INIT_LIST_HEAD(&chunk->owners[i].list);
chunk->owners[i].index = i;
}
inotify_init_watch(&chunk->watch);
return chunk;
}
static void free_chunk(struct audit_chunk *chunk)
{
int i;
@ -157,6 +134,35 @@ static void __put_chunk(struct rcu_head *rcu)
audit_put_chunk(chunk);
}
static void audit_tree_destroy_watch(struct fsnotify_mark *entry)
{
struct audit_chunk *chunk = container_of(entry, struct audit_chunk, mark);
call_rcu(&chunk->head, __put_chunk);
}
static struct audit_chunk *alloc_chunk(int count)
{
struct audit_chunk *chunk;
size_t size;
int i;
size = offsetof(struct audit_chunk, owners) + count * sizeof(struct node);
chunk = kzalloc(size, GFP_KERNEL);
if (!chunk)
return NULL;
INIT_LIST_HEAD(&chunk->hash);
INIT_LIST_HEAD(&chunk->trees);
chunk->count = count;
atomic_long_set(&chunk->refs, 1);
for (i = 0; i < count; i++) {
INIT_LIST_HEAD(&chunk->owners[i].list);
chunk->owners[i].index = i;
}
fsnotify_init_mark(&chunk->mark, audit_tree_destroy_watch);
return chunk;
}
enum {HASH_SIZE = 128};
static struct list_head chunk_hash_heads[HASH_SIZE];
static __cacheline_aligned_in_smp DEFINE_SPINLOCK(hash_lock);
@ -167,10 +173,15 @@ static inline struct list_head *chunk_hash(const struct inode *inode)
return chunk_hash_heads + n % HASH_SIZE;
}
/* hash_lock is held by caller */
/* hash_lock & entry->lock is held by caller */
static void insert_hash(struct audit_chunk *chunk)
{
struct list_head *list = chunk_hash(chunk->watch.inode);
struct fsnotify_mark *entry = &chunk->mark;
struct list_head *list;
if (!entry->i.inode)
return;
list = chunk_hash(entry->i.inode);
list_add_rcu(&chunk->hash, list);
}
@ -181,7 +192,8 @@ struct audit_chunk *audit_tree_lookup(const struct inode *inode)
struct audit_chunk *p;
list_for_each_entry_rcu(p, list, hash) {
if (p->watch.inode == inode) {
/* mark.inode may have gone NULL, but who cares? */
if (p->mark.i.inode == inode) {
atomic_long_inc(&p->refs);
return p;
}
@ -210,38 +222,19 @@ static struct audit_chunk *find_chunk(struct node *p)
static void untag_chunk(struct node *p)
{
struct audit_chunk *chunk = find_chunk(p);
struct fsnotify_mark *entry = &chunk->mark;
struct audit_chunk *new;
struct audit_tree *owner;
int size = chunk->count - 1;
int i, j;
if (!pin_inotify_watch(&chunk->watch)) {
/*
* Filesystem is shutting down; all watches are getting
* evicted, just take it off the node list for this
* tree and let the eviction logics take care of the
* rest.
*/
owner = p->owner;
if (owner->root == chunk) {
list_del_init(&owner->same_root);
owner->root = NULL;
}
list_del_init(&p->list);
p->owner = NULL;
put_tree(owner);
return;
}
fsnotify_get_mark(entry);
spin_unlock(&hash_lock);
/*
* pin_inotify_watch() succeeded, so the watch won't go away
* from under us.
*/
mutex_lock(&chunk->watch.inode->inotify_mutex);
if (chunk->dead) {
mutex_unlock(&chunk->watch.inode->inotify_mutex);
spin_lock(&entry->lock);
if (chunk->dead || !entry->i.inode) {
spin_unlock(&entry->lock);
goto out;
}
@ -256,16 +249,17 @@ static void untag_chunk(struct node *p)
list_del_init(&p->list);
list_del_rcu(&chunk->hash);
spin_unlock(&hash_lock);
inotify_evict_watch(&chunk->watch);
mutex_unlock(&chunk->watch.inode->inotify_mutex);
put_inotify_watch(&chunk->watch);
spin_unlock(&entry->lock);
fsnotify_destroy_mark(entry);
fsnotify_put_mark(entry);
goto out;
}
new = alloc_chunk(size);
if (!new)
goto Fallback;
if (inotify_clone_watch(&chunk->watch, &new->watch) < 0) {
fsnotify_duplicate_mark(&new->mark, entry);
if (fsnotify_add_mark(&new->mark, new->mark.group, new->mark.i.inode, NULL, 1)) {
free_chunk(new);
goto Fallback;
}
@ -298,9 +292,9 @@ static void untag_chunk(struct node *p)
list_for_each_entry(owner, &new->trees, same_root)
owner->root = new;
spin_unlock(&hash_lock);
inotify_evict_watch(&chunk->watch);
mutex_unlock(&chunk->watch.inode->inotify_mutex);
put_inotify_watch(&chunk->watch);
spin_unlock(&entry->lock);
fsnotify_destroy_mark(entry);
fsnotify_put_mark(entry);
goto out;
Fallback:
@ -314,31 +308,33 @@ static void untag_chunk(struct node *p)
p->owner = NULL;
put_tree(owner);
spin_unlock(&hash_lock);
mutex_unlock(&chunk->watch.inode->inotify_mutex);
spin_unlock(&entry->lock);
out:
unpin_inotify_watch(&chunk->watch);
fsnotify_put_mark(entry);
spin_lock(&hash_lock);
}
static int create_chunk(struct inode *inode, struct audit_tree *tree)
{
struct fsnotify_mark *entry;
struct audit_chunk *chunk = alloc_chunk(1);
if (!chunk)
return -ENOMEM;
if (inotify_add_watch(rtree_ih, &chunk->watch, inode, IN_IGNORED | IN_DELETE_SELF) < 0) {
entry = &chunk->mark;
if (fsnotify_add_mark(entry, audit_tree_group, inode, NULL, 0)) {
free_chunk(chunk);
return -ENOSPC;
}
mutex_lock(&inode->inotify_mutex);
spin_lock(&entry->lock);
spin_lock(&hash_lock);
if (tree->goner) {
spin_unlock(&hash_lock);
chunk->dead = 1;
inotify_evict_watch(&chunk->watch);
mutex_unlock(&inode->inotify_mutex);
put_inotify_watch(&chunk->watch);
spin_unlock(&entry->lock);
fsnotify_destroy_mark(entry);
fsnotify_put_mark(entry);
return 0;
}
chunk->owners[0].index = (1U << 31);
@ -351,30 +347,31 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
}
insert_hash(chunk);
spin_unlock(&hash_lock);
mutex_unlock(&inode->inotify_mutex);
spin_unlock(&entry->lock);
return 0;
}
/* the first tagged inode becomes root of tree */
static int tag_chunk(struct inode *inode, struct audit_tree *tree)
{
struct inotify_watch *watch;
struct fsnotify_mark *old_entry, *chunk_entry;
struct audit_tree *owner;
struct audit_chunk *chunk, *old;
struct node *p;
int n;
if (inotify_find_watch(rtree_ih, inode, &watch) < 0)
old_entry = fsnotify_find_inode_mark(audit_tree_group, inode);
if (!old_entry)
return create_chunk(inode, tree);
old = container_of(watch, struct audit_chunk, watch);
old = container_of(old_entry, struct audit_chunk, mark);
/* are we already there? */
spin_lock(&hash_lock);
for (n = 0; n < old->count; n++) {
if (old->owners[n].owner == tree) {
spin_unlock(&hash_lock);
put_inotify_watch(&old->watch);
fsnotify_put_mark(old_entry);
return 0;
}
}
@ -382,25 +379,44 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
chunk = alloc_chunk(old->count + 1);
if (!chunk) {
put_inotify_watch(&old->watch);
fsnotify_put_mark(old_entry);
return -ENOMEM;
}
mutex_lock(&inode->inotify_mutex);
if (inotify_clone_watch(&old->watch, &chunk->watch) < 0) {
mutex_unlock(&inode->inotify_mutex);
put_inotify_watch(&old->watch);
chunk_entry = &chunk->mark;
spin_lock(&old_entry->lock);
if (!old_entry->i.inode) {
/* old_entry is being shot, lets just lie */
spin_unlock(&old_entry->lock);
fsnotify_put_mark(old_entry);
free_chunk(chunk);
return -ENOENT;
}
fsnotify_duplicate_mark(chunk_entry, old_entry);
if (fsnotify_add_mark(chunk_entry, chunk_entry->group, chunk_entry->i.inode, NULL, 1)) {
spin_unlock(&old_entry->lock);
free_chunk(chunk);
fsnotify_put_mark(old_entry);
return -ENOSPC;
}
/* even though we hold old_entry->lock, this is safe since chunk_entry->lock could NEVER have been grabbed before */
spin_lock(&chunk_entry->lock);
spin_lock(&hash_lock);
/* we now hold old_entry->lock, chunk_entry->lock, and hash_lock */
if (tree->goner) {
spin_unlock(&hash_lock);
chunk->dead = 1;
inotify_evict_watch(&chunk->watch);
mutex_unlock(&inode->inotify_mutex);
put_inotify_watch(&old->watch);
put_inotify_watch(&chunk->watch);
spin_unlock(&chunk_entry->lock);
spin_unlock(&old_entry->lock);
fsnotify_destroy_mark(chunk_entry);
fsnotify_put_mark(chunk_entry);
fsnotify_put_mark(old_entry);
return 0;
}
list_replace_init(&old->trees, &chunk->trees);
@ -426,10 +442,11 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
list_add(&tree->same_root, &chunk->trees);
}
spin_unlock(&hash_lock);
inotify_evict_watch(&old->watch);
mutex_unlock(&inode->inotify_mutex);
put_inotify_watch(&old->watch); /* pair to inotify_find_watch */
put_inotify_watch(&old->watch); /* and kill it */
spin_unlock(&chunk_entry->lock);
spin_unlock(&old_entry->lock);
fsnotify_destroy_mark(old_entry);
fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */
fsnotify_put_mark(old_entry); /* and kill it */
return 0;
}
@ -584,7 +601,9 @@ void audit_trim_trees(void)
spin_lock(&hash_lock);
list_for_each_entry(node, &tree->chunks, list) {
struct inode *inode = find_chunk(node)->watch.inode;
struct audit_chunk *chunk = find_chunk(node);
/* this could be NULL if the watch is dieing else where... */
struct inode *inode = chunk->mark.i.inode;
node->index |= 1U<<31;
if (iterate_mounts(compare_root, inode, root_mnt))
node->index &= ~(1U<<31);
@ -846,7 +865,6 @@ void audit_kill_trees(struct list_head *list)
* Here comes the stuff asynchronous to auditctl operations
*/
/* inode->inotify_mutex is locked */
static void evict_chunk(struct audit_chunk *chunk)
{
struct audit_tree *owner;
@ -885,35 +903,46 @@ static void evict_chunk(struct audit_chunk *chunk)
mutex_unlock(&audit_filter_mutex);
}
static void handle_event(struct inotify_watch *watch, u32 wd, u32 mask,
u32 cookie, const char *dname, struct inode *inode)
static int audit_tree_handle_event(struct fsnotify_group *group,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *vfsmonut_mark,
struct fsnotify_event *event)
{
struct audit_chunk *chunk = container_of(watch, struct audit_chunk, watch);
if (mask & IN_IGNORED) {
evict_chunk(chunk);
put_inotify_watch(watch);
}
BUG();
return -EOPNOTSUPP;
}
static void destroy_watch(struct inotify_watch *watch)
static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify_group *group)
{
struct audit_chunk *chunk = container_of(watch, struct audit_chunk, watch);
call_rcu(&chunk->head, __put_chunk);
struct audit_chunk *chunk = container_of(entry, struct audit_chunk, mark);
evict_chunk(chunk);
fsnotify_put_mark(entry);
}
static const struct inotify_operations rtree_inotify_ops = {
.handle_event = handle_event,
.destroy_watch = destroy_watch,
static bool audit_tree_send_event(struct fsnotify_group *group, struct inode *inode,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *vfsmount_mark,
__u32 mask, void *data, int data_type)
{
return false;
}
static const struct fsnotify_ops audit_tree_ops = {
.handle_event = audit_tree_handle_event,
.should_send_event = audit_tree_send_event,
.free_group_priv = NULL,
.free_event_priv = NULL,
.freeing_mark = audit_tree_freeing_mark,
};
static int __init audit_tree_init(void)
{
int i;
rtree_ih = inotify_init(&rtree_inotify_ops);
if (IS_ERR(rtree_ih))
audit_panic("cannot initialize inotify handle for rectree watches");
audit_tree_group = fsnotify_alloc_group(&audit_tree_ops);
if (IS_ERR(audit_tree_group))
audit_panic("cannot initialize fsnotify group for rectree watches");
for (i = 0; i < HASH_SIZE; i++)
INIT_LIST_HEAD(&chunk_hash_heads[i]);

View file

@ -24,18 +24,18 @@
#include <linux/kthread.h>
#include <linux/mutex.h>
#include <linux/fs.h>
#include <linux/fsnotify_backend.h>
#include <linux/namei.h>
#include <linux/netlink.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/inotify.h>
#include <linux/security.h>
#include "audit.h"
/*
* Reference counting:
*
* audit_parent: lifetime is from audit_init_parent() to receipt of an IN_IGNORED
* audit_parent: lifetime is from audit_init_parent() to receipt of an FS_IGNORED
* event. Each audit_watch holds a reference to its associated parent.
*
* audit_watch: if added to lists, lifetime is from audit_init_watch() to
@ -51,40 +51,61 @@ struct audit_watch {
unsigned long ino; /* associated inode number */
struct audit_parent *parent; /* associated parent */
struct list_head wlist; /* entry in parent->watches list */
struct list_head rules; /* associated rules */
struct list_head rules; /* anchor for krule->rlist */
};
struct audit_parent {
struct list_head ilist; /* entry in inotify registration list */
struct list_head watches; /* associated watches */
struct inotify_watch wdata; /* inotify watch data */
unsigned flags; /* status flags */
struct list_head watches; /* anchor for audit_watch->wlist */
struct fsnotify_mark mark; /* fsnotify mark on the inode */
};
/* Inotify handle. */
struct inotify_handle *audit_ih;
/* fsnotify handle. */
struct fsnotify_group *audit_watch_group;
/*
* audit_parent status flags:
*
* AUDIT_PARENT_INVALID - set anytime rules/watches are auto-removed due to
* a filesystem event to ensure we're adding audit watches to a valid parent.
* Technically not needed for IN_DELETE_SELF or IN_UNMOUNT events, as we cannot
* receive them while we have nameidata, but must be used for IN_MOVE_SELF which
* we can receive while holding nameidata.
*/
#define AUDIT_PARENT_INVALID 0x001
/* fsnotify events we care about. */
#define AUDIT_FS_WATCH (FS_MOVE | FS_CREATE | FS_DELETE | FS_DELETE_SELF |\
FS_MOVE_SELF | FS_EVENT_ON_CHILD)
/* Inotify events we care about. */
#define AUDIT_IN_WATCH IN_MOVE|IN_CREATE|IN_DELETE|IN_DELETE_SELF|IN_MOVE_SELF
static void audit_free_parent(struct audit_parent *parent)
{
WARN_ON(!list_empty(&parent->watches));
kfree(parent);
}
static void audit_free_parent(struct inotify_watch *i_watch)
static void audit_watch_free_mark(struct fsnotify_mark *entry)
{
struct audit_parent *parent;
parent = container_of(i_watch, struct audit_parent, wdata);
WARN_ON(!list_empty(&parent->watches));
kfree(parent);
parent = container_of(entry, struct audit_parent, mark);
audit_free_parent(parent);
}
static void audit_get_parent(struct audit_parent *parent)
{
if (likely(parent))
fsnotify_get_mark(&parent->mark);
}
static void audit_put_parent(struct audit_parent *parent)
{
if (likely(parent))
fsnotify_put_mark(&parent->mark);
}
/*
* Find and return the audit_parent on the given inode. If found a reference
* is taken on this parent.
*/
static inline struct audit_parent *audit_find_parent(struct inode *inode)
{
struct audit_parent *parent = NULL;
struct fsnotify_mark *entry;
entry = fsnotify_find_inode_mark(audit_watch_group, inode);
if (entry)
parent = container_of(entry, struct audit_parent, mark);
return parent;
}
void audit_get_watch(struct audit_watch *watch)
@ -105,7 +126,7 @@ void audit_put_watch(struct audit_watch *watch)
void audit_remove_watch(struct audit_watch *watch)
{
list_del(&watch->wlist);
put_inotify_watch(&watch->parent->wdata);
audit_put_parent(watch->parent);
watch->parent = NULL;
audit_put_watch(watch); /* match initial get */
}
@ -115,42 +136,32 @@ char *audit_watch_path(struct audit_watch *watch)
return watch->path;
}
struct list_head *audit_watch_rules(struct audit_watch *watch)
int audit_watch_compare(struct audit_watch *watch, unsigned long ino, dev_t dev)
{
return &watch->rules;
}
unsigned long audit_watch_inode(struct audit_watch *watch)
{
return watch->ino;
}
dev_t audit_watch_dev(struct audit_watch *watch)
{
return watch->dev;
return (watch->ino != (unsigned long)-1) &&
(watch->ino == ino) &&
(watch->dev == dev);
}
/* Initialize a parent watch entry. */
static struct audit_parent *audit_init_parent(struct nameidata *ndp)
{
struct inode *inode = ndp->path.dentry->d_inode;
struct audit_parent *parent;
s32 wd;
int ret;
parent = kzalloc(sizeof(*parent), GFP_KERNEL);
if (unlikely(!parent))
return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(&parent->watches);
parent->flags = 0;
inotify_init_watch(&parent->wdata);
/* grab a ref so inotify watch hangs around until we take audit_filter_mutex */
get_inotify_watch(&parent->wdata);
wd = inotify_add_watch(audit_ih, &parent->wdata,
ndp->path.dentry->d_inode, AUDIT_IN_WATCH);
if (wd < 0) {
audit_free_parent(&parent->wdata);
return ERR_PTR(wd);
fsnotify_init_mark(&parent->mark, audit_watch_free_mark);
parent->mark.mask = AUDIT_FS_WATCH;
ret = fsnotify_add_mark(&parent->mark, audit_watch_group, inode, NULL, 0);
if (ret < 0) {
audit_free_parent(parent);
return ERR_PTR(ret);
}
return parent;
@ -179,7 +190,7 @@ int audit_to_watch(struct audit_krule *krule, char *path, int len, u32 op)
{
struct audit_watch *watch;
if (!audit_ih)
if (!audit_watch_group)
return -EOPNOTSUPP;
if (path[0] != '/' || path[len-1] == '/' ||
@ -217,7 +228,7 @@ static struct audit_watch *audit_dupe_watch(struct audit_watch *old)
new->dev = old->dev;
new->ino = old->ino;
get_inotify_watch(&old->parent->wdata);
audit_get_parent(old->parent);
new->parent = old->parent;
out:
@ -251,15 +262,19 @@ static void audit_update_watch(struct audit_parent *parent,
struct audit_entry *oentry, *nentry;
mutex_lock(&audit_filter_mutex);
/* Run all of the watches on this parent looking for the one that
* matches the given dname */
list_for_each_entry_safe(owatch, nextw, &parent->watches, wlist) {
if (audit_compare_dname_path(dname, owatch->path, NULL))
continue;
/* If the update involves invalidating rules, do the inode-based
* filtering now, so we don't omit records. */
if (invalidating && current->audit_context)
if (invalidating && !audit_dummy_context())
audit_filter_inodes(current, current->audit_context);
/* updating ino will likely change which audit_hash_list we
* are on so we need a new watch for the new list */
nwatch = audit_dupe_watch(owatch);
if (IS_ERR(nwatch)) {
mutex_unlock(&audit_filter_mutex);
@ -275,12 +290,21 @@ static void audit_update_watch(struct audit_parent *parent,
list_del(&oentry->rule.rlist);
list_del_rcu(&oentry->list);
nentry = audit_dupe_rule(&oentry->rule, nwatch);
nentry = audit_dupe_rule(&oentry->rule);
if (IS_ERR(nentry)) {
list_del(&oentry->rule.list);
audit_panic("error updating watch, removing");
} else {
int h = audit_hash_ino((u32)ino);
/*
* nentry->rule.watch == oentry->rule.watch so
* we must drop that reference and set it to our
* new watch.
*/
audit_put_watch(nentry->rule.watch);
audit_get_watch(nwatch);
nentry->rule.watch = nwatch;
list_add(&nentry->rule.rlist, &nwatch->rules);
list_add_rcu(&nentry->list, &audit_inode_hash[h]);
list_replace(&oentry->rule.list,
@ -312,7 +336,6 @@ static void audit_remove_parent_watches(struct audit_parent *parent)
struct audit_entry *e;
mutex_lock(&audit_filter_mutex);
parent->flags |= AUDIT_PARENT_INVALID;
list_for_each_entry_safe(w, nextw, &parent->watches, wlist) {
list_for_each_entry_safe(r, nextr, &w->rules, rlist) {
e = container_of(r, struct audit_entry, rule);
@ -325,20 +348,8 @@ static void audit_remove_parent_watches(struct audit_parent *parent)
audit_remove_watch(w);
}
mutex_unlock(&audit_filter_mutex);
}
/* Unregister inotify watches for parents on in_list.
* Generates an IN_IGNORED event. */
void audit_inotify_unregister(struct list_head *in_list)
{
struct audit_parent *p, *n;
list_for_each_entry_safe(p, n, in_list, ilist) {
list_del(&p->ilist);
inotify_rm_watch(audit_ih, &p->wdata);
/* the unpin matching the pin in audit_do_del_rule() */
unpin_inotify_watch(&p->wdata);
}
fsnotify_destroy_mark(&parent->mark);
}
/* Get path information necessary for adding watches. */
@ -389,7 +400,7 @@ static void audit_put_nd(struct nameidata *ndp, struct nameidata *ndw)
}
}
/* Associate the given rule with an existing parent inotify_watch.
/* Associate the given rule with an existing parent.
* Caller must hold audit_filter_mutex. */
static void audit_add_to_parent(struct audit_krule *krule,
struct audit_parent *parent)
@ -397,6 +408,8 @@ static void audit_add_to_parent(struct audit_krule *krule,
struct audit_watch *w, *watch = krule->watch;
int watch_found = 0;
BUG_ON(!mutex_is_locked(&audit_filter_mutex));
list_for_each_entry(w, &parent->watches, wlist) {
if (strcmp(watch->path, w->path))
continue;
@ -413,7 +426,7 @@ static void audit_add_to_parent(struct audit_krule *krule,
}
if (!watch_found) {
get_inotify_watch(&parent->wdata);
audit_get_parent(parent);
watch->parent = parent;
list_add(&watch->wlist, &parent->watches);
@ -423,13 +436,12 @@ static void audit_add_to_parent(struct audit_krule *krule,
/* Find a matching watch entry, or add this one.
* Caller must hold audit_filter_mutex. */
int audit_add_watch(struct audit_krule *krule)
int audit_add_watch(struct audit_krule *krule, struct list_head **list)
{
struct audit_watch *watch = krule->watch;
struct inotify_watch *i_watch;
struct audit_parent *parent;
struct nameidata *ndp = NULL, *ndw = NULL;
int ret = 0;
int h, ret = 0;
mutex_unlock(&audit_filter_mutex);
@ -441,47 +453,38 @@ int audit_add_watch(struct audit_krule *krule)
goto error;
}
mutex_lock(&audit_filter_mutex);
/* update watch filter fields */
if (ndw) {
watch->dev = ndw->path.dentry->d_inode->i_sb->s_dev;
watch->ino = ndw->path.dentry->d_inode->i_ino;
}
/* The audit_filter_mutex must not be held during inotify calls because
* we hold it during inotify event callback processing. If an existing
* inotify watch is found, inotify_find_watch() grabs a reference before
* returning.
*/
if (inotify_find_watch(audit_ih, ndp->path.dentry->d_inode,
&i_watch) < 0) {
/* either find an old parent or attach a new one */
parent = audit_find_parent(ndp->path.dentry->d_inode);
if (!parent) {
parent = audit_init_parent(ndp);
if (IS_ERR(parent)) {
/* caller expects mutex locked */
mutex_lock(&audit_filter_mutex);
ret = PTR_ERR(parent);
goto error;
}
} else
parent = container_of(i_watch, struct audit_parent, wdata);
}
mutex_lock(&audit_filter_mutex);
audit_add_to_parent(krule, parent);
/* parent was moved before we took audit_filter_mutex */
if (parent->flags & AUDIT_PARENT_INVALID)
ret = -ENOENT;
else
audit_add_to_parent(krule, parent);
/* match get in audit_init_parent or inotify_find_watch */
put_inotify_watch(&parent->wdata);
/* match get in audit_find_parent or audit_init_parent */
audit_put_parent(parent);
h = audit_hash_ino((u32)watch->ino);
*list = &audit_inode_hash[h];
error:
audit_put_nd(ndp, ndw); /* NULL args OK */
return ret;
}
void audit_remove_watch_rule(struct audit_krule *krule, struct list_head *list)
void audit_remove_watch_rule(struct audit_krule *krule)
{
struct audit_watch *watch = krule->watch;
struct audit_parent *parent = watch->parent;
@ -492,53 +495,74 @@ void audit_remove_watch_rule(struct audit_krule *krule, struct list_head *list)
audit_remove_watch(watch);
if (list_empty(&parent->watches)) {
/* Put parent on the inotify un-registration
* list. Grab a reference before releasing
* audit_filter_mutex, to be released in
* audit_inotify_unregister().
* If filesystem is going away, just leave
* the sucker alone, eviction will take
* care of it. */
if (pin_inotify_watch(&parent->wdata))
list_add(&parent->ilist, list);
audit_get_parent(parent);
fsnotify_destroy_mark(&parent->mark);
audit_put_parent(parent);
}
}
}
/* Update watch data in audit rules based on inotify events. */
static void audit_handle_ievent(struct inotify_watch *i_watch, u32 wd, u32 mask,
u32 cookie, const char *dname, struct inode *inode)
static bool audit_watch_should_send_event(struct fsnotify_group *group, struct inode *inode,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *vfsmount_mark,
__u32 mask, void *data, int data_type)
{
struct audit_parent *parent;
parent = container_of(i_watch, struct audit_parent, wdata);
if (mask & (IN_CREATE|IN_MOVED_TO) && inode)
audit_update_watch(parent, dname, inode->i_sb->s_dev,
inode->i_ino, 0);
else if (mask & (IN_DELETE|IN_MOVED_FROM))
audit_update_watch(parent, dname, (dev_t)-1, (unsigned long)-1, 1);
/* inotify automatically removes the watch and sends IN_IGNORED */
else if (mask & (IN_DELETE_SELF|IN_UNMOUNT))
audit_remove_parent_watches(parent);
/* inotify does not remove the watch, so remove it manually */
else if(mask & IN_MOVE_SELF) {
audit_remove_parent_watches(parent);
inotify_remove_watch_locked(audit_ih, i_watch);
} else if (mask & IN_IGNORED)
put_inotify_watch(i_watch);
return true;
}
static const struct inotify_operations audit_inotify_ops = {
.handle_event = audit_handle_ievent,
.destroy_watch = audit_free_parent,
/* Update watch data in audit rules based on fsnotify events. */
static int audit_watch_handle_event(struct fsnotify_group *group,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *vfsmount_mark,
struct fsnotify_event *event)
{
struct inode *inode;
__u32 mask = event->mask;
const char *dname = event->file_name;
struct audit_parent *parent;
parent = container_of(inode_mark, struct audit_parent, mark);
BUG_ON(group != audit_watch_group);
switch (event->data_type) {
case (FSNOTIFY_EVENT_FILE):
inode = event->file->f_path.dentry->d_inode;
break;
case (FSNOTIFY_EVENT_INODE):
inode = event->inode;
break;
default:
BUG();
inode = NULL;
break;
};
if (mask & (FS_CREATE|FS_MOVED_TO) && inode)
audit_update_watch(parent, dname, inode->i_sb->s_dev, inode->i_ino, 0);
else if (mask & (FS_DELETE|FS_MOVED_FROM))
audit_update_watch(parent, dname, (dev_t)-1, (unsigned long)-1, 1);
else if (mask & (FS_DELETE_SELF|FS_UNMOUNT|FS_MOVE_SELF))
audit_remove_parent_watches(parent);
return 0;
}
static const struct fsnotify_ops audit_watch_fsnotify_ops = {
.should_send_event = audit_watch_should_send_event,
.handle_event = audit_watch_handle_event,
.free_group_priv = NULL,
.freeing_mark = NULL,
.free_event_priv = NULL,
};
static int __init audit_watch_init(void)
{
audit_ih = inotify_init(&audit_inotify_ops);
if (IS_ERR(audit_ih))
audit_panic("cannot initialize inotify handle");
audit_watch_group = fsnotify_alloc_group(&audit_watch_fsnotify_ops);
if (IS_ERR(audit_watch_group)) {
audit_watch_group = NULL;
audit_panic("cannot create audit fsnotify group");
}
return 0;
}
subsys_initcall(audit_watch_init);
device_initcall(audit_watch_init);

View file

@ -71,6 +71,7 @@ static inline void audit_free_rule(struct audit_entry *e)
{
int i;
struct audit_krule *erule = &e->rule;
/* some rules don't have associated watches */
if (erule->watch)
audit_put_watch(erule->watch);
@ -746,8 +747,7 @@ static inline int audit_dupe_lsm_field(struct audit_field *df,
* rule with the new rule in the filterlist, then free the old rule.
* The rlist element is undefined; list manipulations are handled apart from
* the initial copy. */
struct audit_entry *audit_dupe_rule(struct audit_krule *old,
struct audit_watch *watch)
struct audit_entry *audit_dupe_rule(struct audit_krule *old)
{
u32 fcount = old->field_count;
struct audit_entry *entry;
@ -769,8 +769,8 @@ struct audit_entry *audit_dupe_rule(struct audit_krule *old,
new->prio = old->prio;
new->buflen = old->buflen;
new->inode_f = old->inode_f;
new->watch = NULL;
new->field_count = old->field_count;
/*
* note that we are OK with not refcounting here; audit_match_tree()
* never dereferences tree and we can't get false positives there
@ -811,9 +811,9 @@ struct audit_entry *audit_dupe_rule(struct audit_krule *old,
}
}
if (watch) {
audit_get_watch(watch);
new->watch = watch;
if (old->watch) {
audit_get_watch(old->watch);
new->watch = old->watch;
}
return entry;
@ -866,7 +866,7 @@ static inline int audit_add_rule(struct audit_entry *entry)
struct audit_watch *watch = entry->rule.watch;
struct audit_tree *tree = entry->rule.tree;
struct list_head *list;
int h, err;
int err;
#ifdef CONFIG_AUDITSYSCALL
int dont_count = 0;
@ -889,15 +889,11 @@ static inline int audit_add_rule(struct audit_entry *entry)
if (watch) {
/* audit_filter_mutex is dropped and re-taken during this call */
err = audit_add_watch(&entry->rule);
err = audit_add_watch(&entry->rule, &list);
if (err) {
mutex_unlock(&audit_filter_mutex);
goto error;
}
/* entry->rule.watch may have changed during audit_add_watch() */
watch = entry->rule.watch;
h = audit_hash_ino((u32)audit_watch_inode(watch));
list = &audit_inode_hash[h];
}
if (tree) {
err = audit_add_tree_rule(&entry->rule);
@ -949,7 +945,6 @@ static inline int audit_del_rule(struct audit_entry *entry)
struct audit_watch *watch = entry->rule.watch;
struct audit_tree *tree = entry->rule.tree;
struct list_head *list;
LIST_HEAD(inotify_list);
int ret = 0;
#ifdef CONFIG_AUDITSYSCALL
int dont_count = 0;
@ -969,7 +964,7 @@ static inline int audit_del_rule(struct audit_entry *entry)
}
if (e->rule.watch)
audit_remove_watch_rule(&e->rule, &inotify_list);
audit_remove_watch_rule(&e->rule);
if (e->rule.tree)
audit_remove_tree_rule(&e->rule);
@ -987,9 +982,6 @@ static inline int audit_del_rule(struct audit_entry *entry)
#endif
mutex_unlock(&audit_filter_mutex);
if (!list_empty(&inotify_list))
audit_inotify_unregister(&inotify_list);
out:
if (watch)
audit_put_watch(watch); /* match initial get */
@ -1323,30 +1315,23 @@ static int update_lsm_rule(struct audit_krule *r)
{
struct audit_entry *entry = container_of(r, struct audit_entry, rule);
struct audit_entry *nentry;
struct audit_watch *watch;
struct audit_tree *tree;
int err = 0;
if (!security_audit_rule_known(r))
return 0;
watch = r->watch;
tree = r->tree;
nentry = audit_dupe_rule(r, watch);
nentry = audit_dupe_rule(r);
if (IS_ERR(nentry)) {
/* save the first error encountered for the
* return value */
err = PTR_ERR(nentry);
audit_panic("error updating LSM filters");
if (watch)
if (r->watch)
list_del(&r->rlist);
list_del_rcu(&entry->list);
list_del(&r->list);
} else {
if (watch) {
list_add(&nentry->rule.rlist, audit_watch_rules(watch));
list_del(&r->rlist);
} else if (tree)
if (r->watch || r->tree)
list_replace_init(&r->rlist, &nentry->rule.rlist);
list_replace_rcu(&entry->list, &nentry->list);
list_replace(&r->list, &nentry->rule.list);

View file

@ -65,7 +65,6 @@
#include <linux/binfmts.h>
#include <linux/highmem.h>
#include <linux/syscalls.h>
#include <linux/inotify.h>
#include <linux/capability.h>
#include <linux/fs_struct.h>
@ -549,9 +548,8 @@ static int audit_filter_rules(struct task_struct *tsk,
}
break;
case AUDIT_WATCH:
if (name && audit_watch_inode(rule->watch) != (unsigned long)-1)
result = (name->dev == audit_watch_dev(rule->watch) &&
name->ino == audit_watch_inode(rule->watch));
if (name)
result = audit_watch_compare(rule->watch, name->ino, name->dev);
break;
case AUDIT_DIR:
if (ctx)
@ -1726,7 +1724,7 @@ static inline void handle_one(const struct inode *inode)
struct audit_tree_refs *p;
struct audit_chunk *chunk;
int count;
if (likely(list_empty(&inode->inotify_watches)))
if (likely(hlist_empty(&inode->i_fsnotify_marks)))
return;
context = current->audit_context;
p = context->trees;
@ -1769,7 +1767,7 @@ static void handle_path(const struct dentry *dentry)
seq = read_seqbegin(&rename_lock);
for(;;) {
struct inode *inode = d->d_inode;
if (inode && unlikely(!list_empty(&inode->inotify_watches))) {
if (inode && unlikely(!hlist_empty(&inode->i_fsnotify_marks))) {
struct audit_chunk *chunk;
chunk = audit_tree_lookup(inode);
if (chunk) {

View file

@ -181,3 +181,7 @@ cond_syscall(sys_eventfd2);
/* performance counters: */
cond_syscall(sys_perf_event_open);
/* fanotify! */
cond_syscall(sys_fanotify_init);
cond_syscall(sys_fanotify_mark);

View file

@ -44,6 +44,7 @@
#include <linux/times.h>
#include <linux/limits.h>
#include <linux/dcache.h>
#include <linux/dnotify.h>
#include <linux/syscalls.h>
#include <linux/vmstat.h>
#include <linux/nfs_fs.h>
@ -131,6 +132,9 @@ static int min_percpu_pagelist_fract = 8;
static int ngroups_max = NGROUPS_MAX;
#ifdef CONFIG_INOTIFY_USER
#include <linux/inotify.h>
#endif
#ifdef CONFIG_SPARC
#include <asm/system.h>
#endif
@ -207,9 +211,6 @@ static struct ctl_table fs_table[];
static struct ctl_table debug_table[];
static struct ctl_table dev_table[];
extern struct ctl_table random_table[];
#ifdef CONFIG_INOTIFY_USER
extern struct ctl_table inotify_table[];
#endif
#ifdef CONFIG_EPOLL
extern struct ctl_table epoll_table[];
#endif

View file

@ -619,7 +619,13 @@ void security_inode_getsecid(const struct inode *inode, u32 *secid)
int security_file_permission(struct file *file, int mask)
{
return security_ops->file_permission(file, mask);
int ret;
ret = security_ops->file_permission(file, mask);
if (ret)
return ret;
return fsnotify_perm(file, mask);
}
int security_file_alloc(struct file *file)
@ -683,7 +689,13 @@ int security_file_receive(struct file *file)
int security_dentry_open(struct file *file, const struct cred *cred)
{
return security_ops->dentry_open(file, cred);
int ret;
ret = security_ops->dentry_open(file, cred);
if (ret)
return ret;
return fsnotify_perm(file, MAY_OPEN);
}
int security_task_create(unsigned long clone_flags)