[PATCH] RCU signal handling
RCU tasklist_lock and RCU signal handling: send signals RCU-read-locked instead of tasklist_lock read-locked. This is a scalability improvement on SMP and a preemption-latency improvement under PREEMPT_RCU. Signed-off-by: Paul E. McKenney <paulmck@us.ibm.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> Acked-by: William Irwin <wli@holomorphy.com> Cc: Roland McGrath <roland@redhat.com> Cc: Oleg Nesterov <oleg@tv-sign.ru> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
parent
4369ef3c3e
commit
e56d090310
8 changed files with 143 additions and 31 deletions
|
@ -760,7 +760,7 @@ static inline int de_thread(struct task_struct *tsk)
|
|||
spin_lock(&oldsighand->siglock);
|
||||
spin_lock(&newsighand->siglock);
|
||||
|
||||
current->sighand = newsighand;
|
||||
rcu_assign_pointer(current->sighand, newsighand);
|
||||
recalc_sigpending();
|
||||
|
||||
spin_unlock(&newsighand->siglock);
|
||||
|
@ -768,7 +768,7 @@ static inline int de_thread(struct task_struct *tsk)
|
|||
write_unlock_irq(&tasklist_lock);
|
||||
|
||||
if (atomic_dec_and_test(&oldsighand->count))
|
||||
kmem_cache_free(sighand_cachep, oldsighand);
|
||||
sighand_free(oldsighand);
|
||||
}
|
||||
|
||||
BUG_ON(!thread_group_leader(current));
|
||||
|
|
|
@ -34,6 +34,7 @@
|
|||
#include <linux/percpu.h>
|
||||
#include <linux/topology.h>
|
||||
#include <linux/seccomp.h>
|
||||
#include <linux/rcupdate.h>
|
||||
|
||||
#include <linux/auxvec.h> /* For AT_VECTOR_SIZE */
|
||||
|
||||
|
@ -350,8 +351,16 @@ struct sighand_struct {
|
|||
atomic_t count;
|
||||
struct k_sigaction action[_NSIG];
|
||||
spinlock_t siglock;
|
||||
struct rcu_head rcu;
|
||||
};
|
||||
|
||||
extern void sighand_free_cb(struct rcu_head *rhp);
|
||||
|
||||
static inline void sighand_free(struct sighand_struct *sp)
|
||||
{
|
||||
call_rcu(&sp->rcu, sighand_free_cb);
|
||||
}
|
||||
|
||||
/*
|
||||
* NOTE! "signal_struct" does not have it's own
|
||||
* locking, because a shared signal_struct always
|
||||
|
@ -844,6 +853,7 @@ struct task_struct {
|
|||
int cpuset_mems_generation;
|
||||
#endif
|
||||
atomic_t fs_excl; /* holding fs exclusive resources */
|
||||
struct rcu_head rcu;
|
||||
};
|
||||
|
||||
static inline pid_t process_group(struct task_struct *tsk)
|
||||
|
@ -867,8 +877,26 @@ static inline int pid_alive(struct task_struct *p)
|
|||
extern void free_task(struct task_struct *tsk);
|
||||
extern void __put_task_struct(struct task_struct *tsk);
|
||||
#define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0)
|
||||
#define put_task_struct(tsk) \
|
||||
do { if (atomic_dec_and_test(&(tsk)->usage)) __put_task_struct(tsk); } while(0)
|
||||
|
||||
static inline int get_task_struct_rcu(struct task_struct *t)
|
||||
{
|
||||
int oldusage;
|
||||
|
||||
do {
|
||||
oldusage = atomic_read(&t->usage);
|
||||
if (oldusage == 0)
|
||||
return 0;
|
||||
} while (cmpxchg(&t->usage.counter, oldusage, oldusage+1) != oldusage);
|
||||
return 1;
|
||||
}
|
||||
|
||||
extern void __put_task_struct_cb(struct rcu_head *rhp);
|
||||
|
||||
static inline void put_task_struct(struct task_struct *t)
|
||||
{
|
||||
if (atomic_dec_and_test(&t->usage))
|
||||
call_rcu(&t->rcu, __put_task_struct_cb);
|
||||
}
|
||||
|
||||
/*
|
||||
* Per process flags
|
||||
|
|
|
@ -72,7 +72,6 @@ void release_task(struct task_struct * p)
|
|||
__ptrace_unlink(p);
|
||||
BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children));
|
||||
__exit_signal(p);
|
||||
__exit_sighand(p);
|
||||
/*
|
||||
* Note that the fastpath in sys_times depends on __exit_signal having
|
||||
* updated the counters before a task is removed from the tasklist of
|
||||
|
|
|
@ -743,6 +743,14 @@ int unshare_files(void)
|
|||
|
||||
EXPORT_SYMBOL(unshare_files);
|
||||
|
||||
void sighand_free_cb(struct rcu_head *rhp)
|
||||
{
|
||||
struct sighand_struct *sp;
|
||||
|
||||
sp = container_of(rhp, struct sighand_struct, rcu);
|
||||
kmem_cache_free(sighand_cachep, sp);
|
||||
}
|
||||
|
||||
static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk)
|
||||
{
|
||||
struct sighand_struct *sig;
|
||||
|
@ -752,7 +760,7 @@ static inline int copy_sighand(unsigned long clone_flags, struct task_struct * t
|
|||
return 0;
|
||||
}
|
||||
sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
|
||||
tsk->sighand = sig;
|
||||
rcu_assign_pointer(tsk->sighand, sig);
|
||||
if (!sig)
|
||||
return -ENOMEM;
|
||||
spin_lock_init(&sig->siglock);
|
||||
|
|
22
kernel/pid.c
22
kernel/pid.c
|
@ -136,7 +136,7 @@ struct pid * fastcall find_pid(enum pid_type type, int nr)
|
|||
struct hlist_node *elem;
|
||||
struct pid *pid;
|
||||
|
||||
hlist_for_each_entry(pid, elem,
|
||||
hlist_for_each_entry_rcu(pid, elem,
|
||||
&pid_hash[type][pid_hashfn(nr)], pid_chain) {
|
||||
if (pid->nr == nr)
|
||||
return pid;
|
||||
|
@ -150,15 +150,15 @@ int fastcall attach_pid(task_t *task, enum pid_type type, int nr)
|
|||
|
||||
task_pid = &task->pids[type];
|
||||
pid = find_pid(type, nr);
|
||||
task_pid->nr = nr;
|
||||
if (pid == NULL) {
|
||||
hlist_add_head(&task_pid->pid_chain,
|
||||
&pid_hash[type][pid_hashfn(nr)]);
|
||||
INIT_LIST_HEAD(&task_pid->pid_list);
|
||||
hlist_add_head_rcu(&task_pid->pid_chain,
|
||||
&pid_hash[type][pid_hashfn(nr)]);
|
||||
} else {
|
||||
INIT_HLIST_NODE(&task_pid->pid_chain);
|
||||
list_add_tail(&task_pid->pid_list, &pid->pid_list);
|
||||
list_add_tail_rcu(&task_pid->pid_list, &pid->pid_list);
|
||||
}
|
||||
task_pid->nr = nr;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -170,20 +170,20 @@ static fastcall int __detach_pid(task_t *task, enum pid_type type)
|
|||
|
||||
pid = &task->pids[type];
|
||||
if (!hlist_unhashed(&pid->pid_chain)) {
|
||||
hlist_del(&pid->pid_chain);
|
||||
|
||||
if (list_empty(&pid->pid_list))
|
||||
if (list_empty(&pid->pid_list)) {
|
||||
nr = pid->nr;
|
||||
else {
|
||||
hlist_del_rcu(&pid->pid_chain);
|
||||
} else {
|
||||
pid_next = list_entry(pid->pid_list.next,
|
||||
struct pid, pid_list);
|
||||
/* insert next pid from pid_list to hash */
|
||||
hlist_add_head(&pid_next->pid_chain,
|
||||
&pid_hash[type][pid_hashfn(pid_next->nr)]);
|
||||
hlist_replace_rcu(&pid->pid_chain,
|
||||
&pid_next->pid_chain);
|
||||
}
|
||||
}
|
||||
|
||||
list_del(&pid->pid_list);
|
||||
list_del_rcu(&pid->pid_list);
|
||||
pid->nr = 0;
|
||||
|
||||
return nr;
|
||||
|
|
|
@ -35,6 +35,7 @@
|
|||
#include <linux/init.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/sched.h>
|
||||
#include <asm/atomic.h>
|
||||
|
|
|
@ -176,6 +176,13 @@ static unsigned int task_timeslice(task_t *p)
|
|||
#define task_hot(p, now, sd) ((long long) ((now) - (p)->last_ran) \
|
||||
< (long long) (sd)->cache_hot_time)
|
||||
|
||||
void __put_task_struct_cb(struct rcu_head *rhp)
|
||||
{
|
||||
__put_task_struct(container_of(rhp, struct task_struct, rcu));
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL_GPL(__put_task_struct_cb);
|
||||
|
||||
/*
|
||||
* These are the runqueue data structures:
|
||||
*/
|
||||
|
|
|
@ -329,13 +329,20 @@ void __exit_sighand(struct task_struct *tsk)
|
|||
/* Ok, we're done with the signal handlers */
|
||||
tsk->sighand = NULL;
|
||||
if (atomic_dec_and_test(&sighand->count))
|
||||
kmem_cache_free(sighand_cachep, sighand);
|
||||
sighand_free(sighand);
|
||||
}
|
||||
|
||||
void exit_sighand(struct task_struct *tsk)
|
||||
{
|
||||
write_lock_irq(&tasklist_lock);
|
||||
__exit_sighand(tsk);
|
||||
rcu_read_lock();
|
||||
if (tsk->sighand != NULL) {
|
||||
struct sighand_struct *sighand = rcu_dereference(tsk->sighand);
|
||||
spin_lock(&sighand->siglock);
|
||||
__exit_sighand(tsk);
|
||||
spin_unlock(&sighand->siglock);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
write_unlock_irq(&tasklist_lock);
|
||||
}
|
||||
|
||||
|
@ -345,12 +352,14 @@ void exit_sighand(struct task_struct *tsk)
|
|||
void __exit_signal(struct task_struct *tsk)
|
||||
{
|
||||
struct signal_struct * sig = tsk->signal;
|
||||
struct sighand_struct * sighand = tsk->sighand;
|
||||
struct sighand_struct * sighand;
|
||||
|
||||
if (!sig)
|
||||
BUG();
|
||||
if (!atomic_read(&sig->count))
|
||||
BUG();
|
||||
rcu_read_lock();
|
||||
sighand = rcu_dereference(tsk->sighand);
|
||||
spin_lock(&sighand->siglock);
|
||||
posix_cpu_timers_exit(tsk);
|
||||
if (atomic_dec_and_test(&sig->count)) {
|
||||
|
@ -358,6 +367,7 @@ void __exit_signal(struct task_struct *tsk)
|
|||
if (tsk == sig->curr_target)
|
||||
sig->curr_target = next_thread(tsk);
|
||||
tsk->signal = NULL;
|
||||
__exit_sighand(tsk);
|
||||
spin_unlock(&sighand->siglock);
|
||||
flush_sigqueue(&sig->shared_pending);
|
||||
} else {
|
||||
|
@ -389,9 +399,11 @@ void __exit_signal(struct task_struct *tsk)
|
|||
sig->nvcsw += tsk->nvcsw;
|
||||
sig->nivcsw += tsk->nivcsw;
|
||||
sig->sched_time += tsk->sched_time;
|
||||
__exit_sighand(tsk);
|
||||
spin_unlock(&sighand->siglock);
|
||||
sig = NULL; /* Marker for below. */
|
||||
}
|
||||
rcu_read_unlock();
|
||||
clear_tsk_thread_flag(tsk,TIF_SIGPENDING);
|
||||
flush_sigqueue(&tsk->pending);
|
||||
if (sig) {
|
||||
|
@ -1080,18 +1092,28 @@ void zap_other_threads(struct task_struct *p)
|
|||
}
|
||||
|
||||
/*
|
||||
* Must be called with the tasklist_lock held for reading!
|
||||
* Must be called under rcu_read_lock() or with tasklist_lock read-held.
|
||||
*/
|
||||
int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct sighand_struct *sp;
|
||||
int ret;
|
||||
|
||||
retry:
|
||||
ret = check_kill_permission(sig, info, p);
|
||||
if (!ret && sig && p->sighand) {
|
||||
spin_lock_irqsave(&p->sighand->siglock, flags);
|
||||
if (!ret && sig && (sp = p->sighand)) {
|
||||
if (!get_task_struct_rcu(p))
|
||||
return -ESRCH;
|
||||
spin_lock_irqsave(&sp->siglock, flags);
|
||||
if (p->sighand != sp) {
|
||||
spin_unlock_irqrestore(&sp->siglock, flags);
|
||||
put_task_struct(p);
|
||||
goto retry;
|
||||
}
|
||||
ret = __group_send_sig_info(sig, info, p);
|
||||
spin_unlock_irqrestore(&p->sighand->siglock, flags);
|
||||
spin_unlock_irqrestore(&sp->siglock, flags);
|
||||
put_task_struct(p);
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
@ -1136,14 +1158,21 @@ int
|
|||
kill_proc_info(int sig, struct siginfo *info, pid_t pid)
|
||||
{
|
||||
int error;
|
||||
int acquired_tasklist_lock = 0;
|
||||
struct task_struct *p;
|
||||
|
||||
read_lock(&tasklist_lock);
|
||||
rcu_read_lock();
|
||||
if (unlikely(sig_kernel_stop(sig) || sig == SIGCONT)) {
|
||||
read_lock(&tasklist_lock);
|
||||
acquired_tasklist_lock = 1;
|
||||
}
|
||||
p = find_task_by_pid(pid);
|
||||
error = -ESRCH;
|
||||
if (p)
|
||||
error = group_send_sig_info(sig, info, p);
|
||||
read_unlock(&tasklist_lock);
|
||||
if (unlikely(acquired_tasklist_lock))
|
||||
read_unlock(&tasklist_lock);
|
||||
rcu_read_unlock();
|
||||
return error;
|
||||
}
|
||||
|
||||
|
@ -1355,16 +1384,54 @@ send_sigqueue(int sig, struct sigqueue *q, struct task_struct *p)
|
|||
{
|
||||
unsigned long flags;
|
||||
int ret = 0;
|
||||
struct sighand_struct *sh;
|
||||
|
||||
BUG_ON(!(q->flags & SIGQUEUE_PREALLOC));
|
||||
read_lock(&tasklist_lock);
|
||||
|
||||
/*
|
||||
* The rcu based delayed sighand destroy makes it possible to
|
||||
* run this without tasklist lock held. The task struct itself
|
||||
* cannot go away as create_timer did get_task_struct().
|
||||
*
|
||||
* We return -1, when the task is marked exiting, so
|
||||
* posix_timer_event can redirect it to the group leader
|
||||
*/
|
||||
rcu_read_lock();
|
||||
|
||||
if (unlikely(p->flags & PF_EXITING)) {
|
||||
ret = -1;
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
spin_lock_irqsave(&p->sighand->siglock, flags);
|
||||
retry:
|
||||
sh = rcu_dereference(p->sighand);
|
||||
|
||||
spin_lock_irqsave(&sh->siglock, flags);
|
||||
if (p->sighand != sh) {
|
||||
/* We raced with exec() in a multithreaded process... */
|
||||
spin_unlock_irqrestore(&sh->siglock, flags);
|
||||
goto retry;
|
||||
}
|
||||
|
||||
/*
|
||||
* We do the check here again to handle the following scenario:
|
||||
*
|
||||
* CPU 0 CPU 1
|
||||
* send_sigqueue
|
||||
* check PF_EXITING
|
||||
* interrupt exit code running
|
||||
* __exit_signal
|
||||
* lock sighand->siglock
|
||||
* unlock sighand->siglock
|
||||
* lock sh->siglock
|
||||
* add(tsk->pending) flush_sigqueue(tsk->pending)
|
||||
*
|
||||
*/
|
||||
|
||||
if (unlikely(p->flags & PF_EXITING)) {
|
||||
ret = -1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (unlikely(!list_empty(&q->list))) {
|
||||
/*
|
||||
|
@ -1388,9 +1455,9 @@ send_sigqueue(int sig, struct sigqueue *q, struct task_struct *p)
|
|||
signal_wake_up(p, sig == SIGKILL);
|
||||
|
||||
out:
|
||||
spin_unlock_irqrestore(&p->sighand->siglock, flags);
|
||||
spin_unlock_irqrestore(&sh->siglock, flags);
|
||||
out_err:
|
||||
read_unlock(&tasklist_lock);
|
||||
rcu_read_unlock();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -1402,7 +1469,9 @@ send_group_sigqueue(int sig, struct sigqueue *q, struct task_struct *p)
|
|||
int ret = 0;
|
||||
|
||||
BUG_ON(!(q->flags & SIGQUEUE_PREALLOC));
|
||||
|
||||
read_lock(&tasklist_lock);
|
||||
/* Since it_lock is held, p->sighand cannot be NULL. */
|
||||
spin_lock_irqsave(&p->sighand->siglock, flags);
|
||||
handle_stop_signal(sig, p);
|
||||
|
||||
|
@ -1436,7 +1505,7 @@ send_group_sigqueue(int sig, struct sigqueue *q, struct task_struct *p)
|
|||
out:
|
||||
spin_unlock_irqrestore(&p->sighand->siglock, flags);
|
||||
read_unlock(&tasklist_lock);
|
||||
return(ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
Loading…
Reference in a new issue