acct: new lifetime rules

Do not reuse bsd_acct_struct after closing the damn thing.
Structure lifetime is controlled by refcount now.  We also
have a mutex in there, held over closing and writing (the
file is O_APPEND, so we are not losing any concurrency).

As the result, we do not need to bother with get_file()/fput()
on log write anymore.  Moreover, do_acct_process() only needs
acct itself; file and pidns are picked from it.

Killed instances are distinguished by having NULL ->ns.
Refcount is protected by acct_lock; anybody taking the
mutex needs to grab a reference first.

The things will get a lot simpler in the next commits - this
is just the minimal chunk switching to the new lifetime rules.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
This commit is contained in:
Al Viro 2014-08-07 07:51:03 -04:00
parent 9df7fa16ee
commit b8f00e6be4

View file

@ -75,15 +75,11 @@ int acct_parm[3] = {4, 2, 30};
/* /*
* External references and all of the globals. * External references and all of the globals.
*/ */
static void do_acct_process(struct bsd_acct_struct *acct, static void do_acct_process(struct bsd_acct_struct *acct);
struct pid_namespace *ns, struct file *);
/*
* This structure is used so that all the data protected by lock
* can be placed in the same cache line as the lock. This primes
* the cache line to have the data after getting the lock.
*/
struct bsd_acct_struct { struct bsd_acct_struct {
long count;
struct mutex lock;
int active; int active;
unsigned long needcheck; unsigned long needcheck;
struct file *file; struct file *file;
@ -157,39 +153,59 @@ static int check_free_space(struct bsd_acct_struct *acct, struct file *file)
return res; return res;
} }
/* static void acct_put(struct bsd_acct_struct *p)
* Close the old accounting file (if currently open) and then replace
* it with file (if non-NULL).
*
* NOTE: acct_lock MUST be held on entry and exit.
*/
static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file,
struct pid_namespace *ns)
{ {
struct file *old_acct = NULL; spin_lock(&acct_lock);
struct pid_namespace *old_ns = NULL; if (!--p->count)
kfree(p);
spin_unlock(&acct_lock);
}
if (acct->file) { static struct bsd_acct_struct *acct_get(struct bsd_acct_struct **p)
old_acct = acct->file; {
old_ns = acct->ns; struct bsd_acct_struct *res;
acct->active = 0; spin_lock(&acct_lock);
acct->file = NULL; again:
acct->ns = NULL; res = *p;
list_del(&acct->list); if (res)
res->count++;
spin_unlock(&acct_lock);
if (res) {
mutex_lock(&res->lock);
if (!res->ns) {
mutex_unlock(&res->lock);
spin_lock(&acct_lock);
if (!--res->count)
kfree(res);
goto again;
}
} }
if (file) { return res;
acct->file = file; }
acct->ns = ns;
acct->needcheck = jiffies; static void acct_kill(struct bsd_acct_struct *acct,
acct->active = 0; struct bsd_acct_struct *new)
list_add(&acct->list, &acct_list); {
} if (acct) {
if (old_acct) { struct file *file = acct->file;
mnt_unpin(old_acct->f_path.mnt); struct pid_namespace *ns = acct->ns;
spin_unlock(&acct_lock);
do_acct_process(acct, old_ns, old_acct);
filp_close(old_acct, NULL);
spin_lock(&acct_lock); spin_lock(&acct_lock);
list_del(&acct->list);
mnt_unpin(file->f_path.mnt);
spin_unlock(&acct_lock);
do_acct_process(acct);
filp_close(file, NULL);
spin_lock(&acct_lock);
ns->bacct = new;
if (new) {
mnt_pin(new->file->f_path.mnt);
list_add(&new->list, &acct_list);
}
acct->ns = NULL;
mutex_unlock(&acct->lock);
if (!(acct->count -= 2))
kfree(acct);
spin_unlock(&acct_lock);
} }
} }
@ -197,47 +213,50 @@ static int acct_on(struct filename *pathname)
{ {
struct file *file; struct file *file;
struct vfsmount *mnt; struct vfsmount *mnt;
struct pid_namespace *ns; struct pid_namespace *ns = task_active_pid_ns(current);
struct bsd_acct_struct *acct = NULL; struct bsd_acct_struct *acct, *old;
acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL);
if (!acct)
return -ENOMEM;
/* Difference from BSD - they don't do O_APPEND */ /* Difference from BSD - they don't do O_APPEND */
file = file_open_name(pathname, O_WRONLY|O_APPEND|O_LARGEFILE, 0); file = file_open_name(pathname, O_WRONLY|O_APPEND|O_LARGEFILE, 0);
if (IS_ERR(file)) if (IS_ERR(file)) {
kfree(acct);
return PTR_ERR(file); return PTR_ERR(file);
}
if (!S_ISREG(file_inode(file)->i_mode)) { if (!S_ISREG(file_inode(file)->i_mode)) {
kfree(acct);
filp_close(file, NULL); filp_close(file, NULL);
return -EACCES; return -EACCES;
} }
if (!file->f_op->write) { if (!file->f_op->write) {
kfree(acct);
filp_close(file, NULL); filp_close(file, NULL);
return -EIO; return -EIO;
} }
ns = task_active_pid_ns(current); acct->count = 1;
if (ns->bacct == NULL) { acct->file = file;
acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL); acct->needcheck = jiffies;
if (acct == NULL) { acct->ns = ns;
filp_close(file, NULL); mutex_init(&acct->lock);
return -ENOMEM;
}
}
spin_lock(&acct_lock);
if (ns->bacct == NULL) {
ns->bacct = acct;
acct = NULL;
}
mnt = file->f_path.mnt; mnt = file->f_path.mnt;
mnt_pin(mnt);
acct_file_reopen(ns->bacct, file, ns);
spin_unlock(&acct_lock);
old = acct_get(&ns->bacct);
if (old) {
acct_kill(old, acct);
} else {
spin_lock(&acct_lock);
ns->bacct = acct;
mnt_pin(mnt);
list_add(&acct->list, &acct_list);
spin_unlock(&acct_lock);
}
mntput(mnt); /* it's pinned, now give up active reference */ mntput(mnt); /* it's pinned, now give up active reference */
kfree(acct);
return 0; return 0;
} }
@ -270,15 +289,7 @@ SYSCALL_DEFINE1(acct, const char __user *, name)
mutex_unlock(&acct_on_mutex); mutex_unlock(&acct_on_mutex);
putname(tmp); putname(tmp);
} else { } else {
struct bsd_acct_struct *acct; acct_kill(acct_get(&task_active_pid_ns(current)->bacct), NULL);
acct = task_active_pid_ns(current)->bacct;
if (acct == NULL)
return 0;
spin_lock(&acct_lock);
acct_file_reopen(acct, NULL, NULL);
spin_unlock(&acct_lock);
} }
return error; return error;
@ -298,8 +309,19 @@ void acct_auto_close_mnt(struct vfsmount *m)
spin_lock(&acct_lock); spin_lock(&acct_lock);
restart: restart:
list_for_each_entry(acct, &acct_list, list) list_for_each_entry(acct, &acct_list, list)
if (acct->file && acct->file->f_path.mnt == m) { if (acct->file->f_path.mnt == m) {
acct_file_reopen(acct, NULL, NULL); acct->count++;
spin_unlock(&acct_lock);
mutex_lock(&acct->lock);
if (!acct->ns) {
mutex_unlock(&acct->lock);
spin_lock(&acct_lock);
if (!--acct->count)
kfree(acct);
goto restart;
}
acct_kill(acct, NULL);
spin_lock(&acct_lock);
goto restart; goto restart;
} }
spin_unlock(&acct_lock); spin_unlock(&acct_lock);
@ -319,8 +341,19 @@ void acct_auto_close(struct super_block *sb)
spin_lock(&acct_lock); spin_lock(&acct_lock);
restart: restart:
list_for_each_entry(acct, &acct_list, list) list_for_each_entry(acct, &acct_list, list)
if (acct->file && acct->file->f_path.dentry->d_sb == sb) { if (acct->file->f_path.dentry->d_sb == sb) {
acct_file_reopen(acct, NULL, NULL); acct->count++;
spin_unlock(&acct_lock);
mutex_lock(&acct->lock);
if (!acct->ns) {
mutex_unlock(&acct->lock);
spin_lock(&acct_lock);
if (!--acct->count)
kfree(acct);
goto restart;
}
acct_kill(acct, NULL);
spin_lock(&acct_lock);
goto restart; goto restart;
} }
spin_unlock(&acct_lock); spin_unlock(&acct_lock);
@ -328,17 +361,7 @@ void acct_auto_close(struct super_block *sb)
void acct_exit_ns(struct pid_namespace *ns) void acct_exit_ns(struct pid_namespace *ns)
{ {
struct bsd_acct_struct *acct = ns->bacct; acct_kill(acct_get(&ns->bacct), NULL);
if (acct == NULL)
return;
spin_lock(&acct_lock);
if (acct->file != NULL)
acct_file_reopen(acct, NULL, NULL);
spin_unlock(&acct_lock);
kfree(acct);
} }
/* /*
@ -507,12 +530,13 @@ static void fill_ac(acct_t *ac)
/* /*
* do_acct_process does all actual work. Caller holds the reference to file. * do_acct_process does all actual work. Caller holds the reference to file.
*/ */
static void do_acct_process(struct bsd_acct_struct *acct, static void do_acct_process(struct bsd_acct_struct *acct)
struct pid_namespace *ns, struct file *file)
{ {
acct_t ac; acct_t ac;
unsigned long flim; unsigned long flim;
const struct cred *orig_cred; const struct cred *orig_cred;
struct pid_namespace *ns = acct->ns;
struct file *file = acct->file;
/* /*
* Accounting records are not subject to resource limits. * Accounting records are not subject to resource limits.
@ -606,27 +630,12 @@ void acct_collect(long exitcode, int group_dead)
static void slow_acct_process(struct pid_namespace *ns) static void slow_acct_process(struct pid_namespace *ns)
{ {
for ( ; ns; ns = ns->parent) { for ( ; ns; ns = ns->parent) {
struct file *file = NULL; struct bsd_acct_struct *acct = acct_get(&ns->bacct);
struct bsd_acct_struct *acct; if (acct) {
do_acct_process(acct);
acct = ns->bacct; mutex_unlock(&acct->lock);
/* acct_put(acct);
* accelerate the common fastpath:
*/
if (!acct || !acct->file)
continue;
spin_lock(&acct_lock);
file = acct->file;
if (unlikely(!file)) {
spin_unlock(&acct_lock);
continue;
} }
get_file(file);
spin_unlock(&acct_lock);
do_acct_process(acct, ns, file);
fput(file);
} }
} }
@ -645,8 +654,7 @@ void acct_process(void)
* its parent. * its parent.
*/ */
for (ns = task_active_pid_ns(current); ns != NULL; ns = ns->parent) { for (ns = task_active_pid_ns(current); ns != NULL; ns = ns->parent) {
struct bsd_acct_struct *acct = ns->bacct; if (ns->bacct)
if (acct && acct->file)
break; break;
} }
if (unlikely(ns)) if (unlikely(ns))