sched: Remove rq->lock from the first half of ttwu()

Currently ttwu() does two rq->lock acquisitions, once on the task's
old rq, holding it over the p->state fiddling and load-balance pass.
Then it drops the old rq->lock to acquire the new rq->lock.

By having serialized ttwu(), p->sched_class, p->cpus_allowed with
p->pi_lock, we can now drop the whole first rq->lock acquisition.

The p->pi_lock serializing concurrent ttwu() calls protects p->state,
which we will set to TASK_WAKING to bridge possible p->pi_lock to
rq->lock gaps and serialize set_task_cpu() calls against
task_rq_lock().

The p->pi_lock serialization of p->sched_class allows us to call
scheduling class methods without holding the rq->lock, and the
serialization of p->cpus_allowed allows us to do the load-balancing
bits without races.

Reviewed-by: Frank Rowand <frank.rowand@am.sony.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20110405152729.354401150@chello.nl
This commit is contained in:
Peter Zijlstra 2011-04-05 17:23:54 +02:00 committed by Ingo Molnar
parent 8f42ced974
commit e4a52bcb9a

View file

@ -2493,69 +2493,78 @@ ttwu_post_activation(struct task_struct *p, struct rq *rq, int wake_flags)
* Returns %true if @p was woken up, %false if it was already running * Returns %true if @p was woken up, %false if it was already running
* or @state didn't match @p's state. * or @state didn't match @p's state.
*/ */
static int try_to_wake_up(struct task_struct *p, unsigned int state, static int
int wake_flags) try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
{ {
int cpu, orig_cpu, this_cpu, success = 0; int cpu, this_cpu, success = 0;
unsigned long flags; unsigned long flags;
unsigned long en_flags = ENQUEUE_WAKEUP;
struct rq *rq; struct rq *rq;
this_cpu = get_cpu(); this_cpu = get_cpu();
smp_wmb(); smp_wmb();
raw_spin_lock_irqsave(&p->pi_lock, flags); raw_spin_lock_irqsave(&p->pi_lock, flags);
rq = __task_rq_lock(p);
if (!(p->state & state)) if (!(p->state & state))
goto out; goto out;
cpu = task_cpu(p); cpu = task_cpu(p);
if (p->on_rq) {
rq = __task_rq_lock(p);
if (p->on_rq) if (p->on_rq)
goto out_running; goto out_running;
__task_rq_unlock(rq);
}
orig_cpu = cpu;
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
if (unlikely(task_running(rq, p))) while (p->on_cpu) {
#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
/*
* If called from interrupt context we could have landed in the
* middle of schedule(), in this case we should take care not
* to spin on ->on_cpu if p is current, since that would
* deadlock.
*/
if (p == current)
goto out_activate; goto out_activate;
#endif
cpu_relax();
}
/*
* Pairs with the smp_wmb() in finish_lock_switch().
*/
smp_rmb();
p->sched_contributes_to_load = !!task_contributes_to_load(p); p->sched_contributes_to_load = !!task_contributes_to_load(p);
p->state = TASK_WAKING; p->state = TASK_WAKING;
if (p->sched_class->task_waking) { if (p->sched_class->task_waking)
p->sched_class->task_waking(p); p->sched_class->task_waking(p);
en_flags |= ENQUEUE_WAKING;
}
cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags); cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
if (cpu != orig_cpu) #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
set_task_cpu(p, cpu); out_activate:
__task_rq_unlock(rq); #endif
#endif /* CONFIG_SMP */
rq = cpu_rq(cpu); rq = cpu_rq(cpu);
raw_spin_lock(&rq->lock); raw_spin_lock(&rq->lock);
/* #ifdef CONFIG_SMP
* We migrated the task without holding either rq->lock, however if (cpu != task_cpu(p))
* since the task is not on the task list itself, nobody else set_task_cpu(p, cpu);
* will try and migrate the task, hence the rq should match the
* cpu we just moved it to.
*/
WARN_ON(task_cpu(p) != cpu);
WARN_ON(p->state != TASK_WAKING);
if (p->sched_contributes_to_load) if (p->sched_contributes_to_load)
rq->nr_uninterruptible--; rq->nr_uninterruptible--;
#endif
out_activate: ttwu_activate(rq, p, ENQUEUE_WAKEUP | ENQUEUE_WAKING);
#endif /* CONFIG_SMP */
ttwu_activate(rq, p, en_flags);
out_running: out_running:
ttwu_post_activation(p, rq, wake_flags); ttwu_post_activation(p, rq, wake_flags);
ttwu_stat(rq, p, cpu, wake_flags); ttwu_stat(rq, p, cpu, wake_flags);
success = 1; success = 1;
out:
__task_rq_unlock(rq); __task_rq_unlock(rq);
out:
raw_spin_unlock_irqrestore(&p->pi_lock, flags); raw_spin_unlock_irqrestore(&p->pi_lock, flags);
put_cpu(); put_cpu();