rcu: Simplify rcu_read_unlock_special() quiescent-state accounting

The earlier approach required two scheduling-clock ticks to note an
preemptable-RCU quiescent state in the situation in which the
scheduling-clock interrupt is unlucky enough to always interrupt an
RCU read-side critical section.

With this change, the quiescent state is instead noted by the
outermost rcu_read_unlock() immediately following the first
scheduling-clock tick, or, alternatively, by the first subsequent
context switch.  Therefore, this change also speeds up grace
periods.

Suggested-by: Josh Triplett <josh@joshtriplett.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: akpm@linux-foundation.org
Cc: mathieu.desnoyers@polymtl.ca
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
LKML-Reference: <12528585111945-git-send-email->
Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
Paul E. McKenney 2009-09-13 09:15:10 -07:00 committed by Ingo Molnar
parent b0e165c035
commit c3422bea5f
3 changed files with 32 additions and 38 deletions

View file

@ -1740,7 +1740,6 @@ extern cputime_t task_gtime(struct task_struct *p);
#define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */ #define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */
#define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */ #define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */
#define RCU_READ_UNLOCK_GOT_QS (1 << 2) /* CPU has responded to RCU core. */
static inline void rcu_copy_process(struct task_struct *p) static inline void rcu_copy_process(struct task_struct *p)
{ {

View file

@ -107,27 +107,23 @@ static void __cpuinit rcu_init_percpu_data(int cpu, struct rcu_state *rsp,
*/ */
void rcu_sched_qs(int cpu) void rcu_sched_qs(int cpu)
{ {
unsigned long flags;
struct rcu_data *rdp; struct rcu_data *rdp;
local_irq_save(flags);
rdp = &per_cpu(rcu_sched_data, cpu); rdp = &per_cpu(rcu_sched_data, cpu);
rdp->passed_quiesc = 1;
rdp->passed_quiesc_completed = rdp->completed; rdp->passed_quiesc_completed = rdp->completed;
rcu_preempt_qs(cpu); barrier();
local_irq_restore(flags); rdp->passed_quiesc = 1;
rcu_preempt_note_context_switch(cpu);
} }
void rcu_bh_qs(int cpu) void rcu_bh_qs(int cpu)
{ {
unsigned long flags;
struct rcu_data *rdp; struct rcu_data *rdp;
local_irq_save(flags);
rdp = &per_cpu(rcu_bh_data, cpu); rdp = &per_cpu(rcu_bh_data, cpu);
rdp->passed_quiesc = 1;
rdp->passed_quiesc_completed = rdp->completed; rdp->passed_quiesc_completed = rdp->completed;
local_irq_restore(flags); barrier();
rdp->passed_quiesc = 1;
} }
#ifdef CONFIG_NO_HZ #ifdef CONFIG_NO_HZ
@ -615,6 +611,7 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
/* Advance to a new grace period and initialize state. */ /* Advance to a new grace period and initialize state. */
rsp->gpnum++; rsp->gpnum++;
WARN_ON_ONCE(rsp->signaled == RCU_GP_INIT);
rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */ rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */
rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS;
record_gp_stall_check_time(rsp); record_gp_stall_check_time(rsp);

View file

@ -64,34 +64,42 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed);
* not in a quiescent state. There might be any number of tasks blocked * not in a quiescent state. There might be any number of tasks blocked
* while in an RCU read-side critical section. * while in an RCU read-side critical section.
*/ */
static void rcu_preempt_qs_record(int cpu) static void rcu_preempt_qs(int cpu)
{ {
struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu);
rdp->passed_quiesc = 1;
rdp->passed_quiesc_completed = rdp->completed; rdp->passed_quiesc_completed = rdp->completed;
barrier();
rdp->passed_quiesc = 1;
} }
/* /*
* We have entered the scheduler or are between softirqs in ksoftirqd. * We have entered the scheduler, and the current task might soon be
* If we are in an RCU read-side critical section, we need to reflect * context-switched away from. If this task is in an RCU read-side
* that in the state of the rcu_node structure corresponding to this CPU. * critical section, we will no longer be able to rely on the CPU to
* Caller must disable hardirqs. * record that fact, so we enqueue the task on the appropriate entry
* of the blocked_tasks[] array. The task will dequeue itself when
* it exits the outermost enclosing RCU read-side critical section.
* Therefore, the current grace period cannot be permitted to complete
* until the blocked_tasks[] entry indexed by the low-order bit of
* rnp->gpnum empties.
*
* Caller must disable preemption.
*/ */
static void rcu_preempt_qs(int cpu) static void rcu_preempt_note_context_switch(int cpu)
{ {
struct task_struct *t = current; struct task_struct *t = current;
unsigned long flags;
int phase; int phase;
struct rcu_data *rdp; struct rcu_data *rdp;
struct rcu_node *rnp; struct rcu_node *rnp;
if (t->rcu_read_lock_nesting && if (t->rcu_read_lock_nesting &&
(t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
WARN_ON_ONCE(cpu != smp_processor_id());
/* Possibly blocking in an RCU read-side critical section. */ /* Possibly blocking in an RCU read-side critical section. */
rdp = rcu_preempt_state.rda[cpu]; rdp = rcu_preempt_state.rda[cpu];
rnp = rdp->mynode; rnp = rdp->mynode;
spin_lock(&rnp->lock); spin_lock_irqsave(&rnp->lock, flags);
t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
t->rcu_blocked_node = rnp; t->rcu_blocked_node = rnp;
@ -112,7 +120,7 @@ static void rcu_preempt_qs(int cpu)
phase = !(rnp->qsmask & rdp->grpmask) ^ (rnp->gpnum & 0x1); phase = !(rnp->qsmask & rdp->grpmask) ^ (rnp->gpnum & 0x1);
list_add(&t->rcu_node_entry, &rnp->blocked_tasks[phase]); list_add(&t->rcu_node_entry, &rnp->blocked_tasks[phase]);
smp_mb(); /* Ensure later ctxt swtch seen after above. */ smp_mb(); /* Ensure later ctxt swtch seen after above. */
spin_unlock(&rnp->lock); spin_unlock_irqrestore(&rnp->lock, flags);
} }
/* /*
@ -124,9 +132,8 @@ static void rcu_preempt_qs(int cpu)
* grace period, then the fact that the task has been enqueued * grace period, then the fact that the task has been enqueued
* means that we continue to block the current grace period. * means that we continue to block the current grace period.
*/ */
rcu_preempt_qs_record(cpu); rcu_preempt_qs(cpu);
t->rcu_read_unlock_special &= ~(RCU_READ_UNLOCK_NEED_QS | t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
RCU_READ_UNLOCK_GOT_QS);
} }
/* /*
@ -162,7 +169,7 @@ static void rcu_read_unlock_special(struct task_struct *t)
special = t->rcu_read_unlock_special; special = t->rcu_read_unlock_special;
if (special & RCU_READ_UNLOCK_NEED_QS) { if (special & RCU_READ_UNLOCK_NEED_QS) {
t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
t->rcu_read_unlock_special |= RCU_READ_UNLOCK_GOT_QS; rcu_preempt_qs(smp_processor_id());
} }
/* Hardware IRQ handlers cannot block. */ /* Hardware IRQ handlers cannot block. */
@ -199,9 +206,7 @@ static void rcu_read_unlock_special(struct task_struct *t)
*/ */
if (!empty && rnp->qsmask == 0 && if (!empty && rnp->qsmask == 0 &&
list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1])) { list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1])) {
t->rcu_read_unlock_special &= t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
~(RCU_READ_UNLOCK_NEED_QS |
RCU_READ_UNLOCK_GOT_QS);
if (rnp->parent == NULL) { if (rnp->parent == NULL) {
/* Only one rcu_node in the tree. */ /* Only one rcu_node in the tree. */
cpu_quiet_msk_finish(&rcu_preempt_state, flags); cpu_quiet_msk_finish(&rcu_preempt_state, flags);
@ -352,19 +357,12 @@ static void rcu_preempt_check_callbacks(int cpu)
struct task_struct *t = current; struct task_struct *t = current;
if (t->rcu_read_lock_nesting == 0) { if (t->rcu_read_lock_nesting == 0) {
t->rcu_read_unlock_special &= t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
~(RCU_READ_UNLOCK_NEED_QS | RCU_READ_UNLOCK_GOT_QS); rcu_preempt_qs(cpu);
rcu_preempt_qs_record(cpu);
return; return;
} }
if (per_cpu(rcu_preempt_data, cpu).qs_pending) { if (per_cpu(rcu_preempt_data, cpu).qs_pending) {
if (t->rcu_read_unlock_special & RCU_READ_UNLOCK_GOT_QS) { t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
rcu_preempt_qs_record(cpu);
t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_GOT_QS;
} else if (!(t->rcu_read_unlock_special &
RCU_READ_UNLOCK_NEED_QS)) {
t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
}
} }
} }
@ -451,7 +449,7 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed);
* Because preemptable RCU does not exist, we never have to check for * Because preemptable RCU does not exist, we never have to check for
* CPUs being in quiescent states. * CPUs being in quiescent states.
*/ */
static void rcu_preempt_qs(int cpu) static void rcu_preempt_note_context_switch(int cpu)
{ {
} }