Merge branch 'for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu into core/rcu
Pull RCU updates from Paul E. McKenney: - Miscellaneous fixes, perhaps most notably removing obsolete code whose only purpose in life was to gather information for the now-removed RCU debugfs facility. Other notable changes include removing NO_HZ_FULL_ALL in favor of the nohz_full kernel boot parameter, minor optimizations for expedited grace periods, some added tracing, creating an RCU-specific workqueue using Tejun's new WQ_MEM_RECLAIM flag, and several cleanups to code and comments. - SRCU cleanups and optimizations. - Torture-test updates, perhaps most notably the adding of ARMv8 support, but also including numerous cleanups and usability fixes. Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
commit
c4fb5f3700
25 changed files with 238 additions and 242 deletions
|
@ -131,13 +131,6 @@ error message, and the boot CPU will be removed from the mask. Note that
|
|||
this means that your system must have at least two CPUs in order for
|
||||
CONFIG_NO_HZ_FULL=y to do anything for you.
|
||||
|
||||
Alternatively, the CONFIG_NO_HZ_FULL_ALL=y Kconfig parameter specifies
|
||||
that all CPUs other than the boot CPU are adaptive-ticks CPUs. This
|
||||
Kconfig parameter will be overridden by the "nohz_full=" boot parameter,
|
||||
so that if both the CONFIG_NO_HZ_FULL_ALL=y Kconfig parameter and
|
||||
the "nohz_full=1" boot parameter is specified, the boot parameter will
|
||||
prevail so that only CPU 1 will be an adaptive-ticks CPU.
|
||||
|
||||
Finally, adaptive-ticks CPUs must have their RCU callbacks offloaded.
|
||||
This is covered in the "RCU IMPLICATIONS" section below.
|
||||
|
||||
|
|
|
@ -214,10 +214,12 @@ do { \
|
|||
#endif
|
||||
|
||||
/*
|
||||
* init_rcu_head_on_stack()/destroy_rcu_head_on_stack() are needed for dynamic
|
||||
* initialization and destruction of rcu_head on the stack. rcu_head structures
|
||||
* allocated dynamically in the heap or defined statically don't need any
|
||||
* initialization.
|
||||
* The init_rcu_head_on_stack() and destroy_rcu_head_on_stack() calls
|
||||
* are needed for dynamic initialization and destruction of rcu_head
|
||||
* on the stack, and init_rcu_head()/destroy_rcu_head() are needed for
|
||||
* dynamic initialization and destruction of statically allocated rcu_head
|
||||
* structures. However, rcu_head structures allocated dynamically in the
|
||||
* heap don't need any initialization.
|
||||
*/
|
||||
#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
|
||||
void init_rcu_head(struct rcu_head *head);
|
||||
|
|
|
@ -217,7 +217,7 @@ struct ustat {
|
|||
*
|
||||
* This guarantee is important for few reasons:
|
||||
* - future call_rcu_lazy() will make use of lower bits in the pointer;
|
||||
* - the structure shares storage spacer in struct page with @compound_head,
|
||||
* - the structure shares storage space in struct page with @compound_head,
|
||||
* which encode PageTail() in bit 0. The guarantee is needed to avoid
|
||||
* false-positive PageTail().
|
||||
*/
|
||||
|
|
|
@ -179,6 +179,10 @@ TRACE_EVENT(rcu_grace_period_init,
|
|||
*
|
||||
* "snap": Captured snapshot of expedited grace period sequence number.
|
||||
* "start": Started a real expedited grace period.
|
||||
* "reset": Started resetting the tree
|
||||
* "select": Started selecting the CPUs to wait on.
|
||||
* "selectofl": Selected CPU partially offline.
|
||||
* "startwait": Started waiting on selected CPUs.
|
||||
* "end": Ended a real expedited grace period.
|
||||
* "endwake": Woke piggybackers up.
|
||||
* "done": Someone else did the expedited grace period for us.
|
||||
|
|
|
@ -77,12 +77,18 @@ static inline void rcu_seq_start(unsigned long *sp)
|
|||
WARN_ON_ONCE(rcu_seq_state(*sp) != 1);
|
||||
}
|
||||
|
||||
/* Compute the end-of-grace-period value for the specified sequence number. */
|
||||
static inline unsigned long rcu_seq_endval(unsigned long *sp)
|
||||
{
|
||||
return (*sp | RCU_SEQ_STATE_MASK) + 1;
|
||||
}
|
||||
|
||||
/* Adjust sequence number for end of update-side operation. */
|
||||
static inline void rcu_seq_end(unsigned long *sp)
|
||||
{
|
||||
smp_mb(); /* Ensure update-side operation before counter increment. */
|
||||
WARN_ON_ONCE(!rcu_seq_state(*sp));
|
||||
WRITE_ONCE(*sp, (*sp | RCU_SEQ_STATE_MASK) + 1);
|
||||
WRITE_ONCE(*sp, rcu_seq_endval(sp));
|
||||
}
|
||||
|
||||
/* Take a snapshot of the update side's sequence number. */
|
||||
|
@ -295,9 +301,19 @@ static inline void rcu_init_levelspread(int *levelspread, const int *levelcnt)
|
|||
* Iterate over all possible CPUs in a leaf RCU node.
|
||||
*/
|
||||
#define for_each_leaf_node_possible_cpu(rnp, cpu) \
|
||||
for ((cpu) = cpumask_next(rnp->grplo - 1, cpu_possible_mask); \
|
||||
cpu <= rnp->grphi; \
|
||||
cpu = cpumask_next((cpu), cpu_possible_mask))
|
||||
for ((cpu) = cpumask_next((rnp)->grplo - 1, cpu_possible_mask); \
|
||||
(cpu) <= rnp->grphi; \
|
||||
(cpu) = cpumask_next((cpu), cpu_possible_mask))
|
||||
|
||||
/*
|
||||
* Iterate over all CPUs in a leaf RCU node's specified mask.
|
||||
*/
|
||||
#define rcu_find_next_bit(rnp, cpu, mask) \
|
||||
((rnp)->grplo + find_next_bit(&(mask), BITS_PER_LONG, (cpu)))
|
||||
#define for_each_leaf_node_cpu_mask(rnp, cpu, mask) \
|
||||
for ((cpu) = rcu_find_next_bit((rnp), 0, (mask)); \
|
||||
(cpu) <= rnp->grphi; \
|
||||
(cpu) = rcu_find_next_bit((rnp), (cpu) + 1 - (rnp->grplo), (mask)))
|
||||
|
||||
/*
|
||||
* Wrappers for the rcu_node::lock acquire and release.
|
||||
|
@ -337,7 +353,7 @@ do { \
|
|||
} while (0)
|
||||
|
||||
#define raw_spin_unlock_irqrestore_rcu_node(p, flags) \
|
||||
raw_spin_unlock_irqrestore(&ACCESS_PRIVATE(p, lock), flags) \
|
||||
raw_spin_unlock_irqrestore(&ACCESS_PRIVATE(p, lock), flags)
|
||||
|
||||
#define raw_spin_trylock_rcu_node(p) \
|
||||
({ \
|
||||
|
@ -348,6 +364,9 @@ do { \
|
|||
___locked; \
|
||||
})
|
||||
|
||||
#define raw_lockdep_assert_held_rcu_node(p) \
|
||||
lockdep_assert_held(&ACCESS_PRIVATE(p, lock))
|
||||
|
||||
#endif /* #if defined(SRCU) || !defined(TINY_RCU) */
|
||||
|
||||
#ifdef CONFIG_TINY_RCU
|
||||
|
@ -356,24 +375,20 @@ static inline bool rcu_gp_is_normal(void) { return true; }
|
|||
static inline bool rcu_gp_is_expedited(void) { return false; }
|
||||
static inline void rcu_expedite_gp(void) { }
|
||||
static inline void rcu_unexpedite_gp(void) { }
|
||||
static inline void rcu_request_urgent_qs_task(struct task_struct *t) { }
|
||||
#else /* #ifdef CONFIG_TINY_RCU */
|
||||
bool rcu_gp_is_normal(void); /* Internal RCU use. */
|
||||
bool rcu_gp_is_expedited(void); /* Internal RCU use. */
|
||||
void rcu_expedite_gp(void);
|
||||
void rcu_unexpedite_gp(void);
|
||||
void rcupdate_announce_bootup_oddness(void);
|
||||
void rcu_request_urgent_qs_task(struct task_struct *t);
|
||||
#endif /* #else #ifdef CONFIG_TINY_RCU */
|
||||
|
||||
#define RCU_SCHEDULER_INACTIVE 0
|
||||
#define RCU_SCHEDULER_INIT 1
|
||||
#define RCU_SCHEDULER_RUNNING 2
|
||||
|
||||
#ifdef CONFIG_TINY_RCU
|
||||
static inline void rcu_request_urgent_qs_task(struct task_struct *t) { }
|
||||
#else /* #ifdef CONFIG_TINY_RCU */
|
||||
void rcu_request_urgent_qs_task(struct task_struct *t);
|
||||
#endif /* #else #ifdef CONFIG_TINY_RCU */
|
||||
|
||||
enum rcutorture_type {
|
||||
RCU_FLAVOR,
|
||||
RCU_BH_FLAVOR,
|
||||
|
@ -470,6 +485,7 @@ void show_rcu_gp_kthreads(void);
|
|||
void rcu_force_quiescent_state(void);
|
||||
void rcu_bh_force_quiescent_state(void);
|
||||
void rcu_sched_force_quiescent_state(void);
|
||||
extern struct workqueue_struct *rcu_gp_wq;
|
||||
#endif /* #else #ifdef CONFIG_TINY_RCU */
|
||||
|
||||
#ifdef CONFIG_RCU_NOCB_CPU
|
||||
|
|
|
@ -61,11 +61,30 @@ MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.vnet.ibm.com>");
|
|||
#define VERBOSE_PERFOUT_ERRSTRING(s) \
|
||||
do { if (verbose) pr_alert("%s" PERF_FLAG "!!! %s\n", perf_type, s); } while (0)
|
||||
|
||||
/*
|
||||
* The intended use cases for the nreaders and nwriters module parameters
|
||||
* are as follows:
|
||||
*
|
||||
* 1. Specify only the nr_cpus kernel boot parameter. This will
|
||||
* set both nreaders and nwriters to the value specified by
|
||||
* nr_cpus for a mixed reader/writer test.
|
||||
*
|
||||
* 2. Specify the nr_cpus kernel boot parameter, but set
|
||||
* rcuperf.nreaders to zero. This will set nwriters to the
|
||||
* value specified by nr_cpus for an update-only test.
|
||||
*
|
||||
* 3. Specify the nr_cpus kernel boot parameter, but set
|
||||
* rcuperf.nwriters to zero. This will set nreaders to the
|
||||
* value specified by nr_cpus for a read-only test.
|
||||
*
|
||||
* Various other use cases may of course be specified.
|
||||
*/
|
||||
|
||||
torture_param(bool, gp_async, false, "Use asynchronous GP wait primitives");
|
||||
torture_param(int, gp_async_max, 1000, "Max # outstanding waits per reader");
|
||||
torture_param(bool, gp_exp, false, "Use expedited GP wait primitives");
|
||||
torture_param(int, holdoff, 10, "Holdoff time before test start (s)");
|
||||
torture_param(int, nreaders, 0, "Number of RCU reader threads");
|
||||
torture_param(int, nreaders, -1, "Number of RCU reader threads");
|
||||
torture_param(int, nwriters, -1, "Number of RCU updater threads");
|
||||
torture_param(bool, shutdown, !IS_ENABLED(MODULE),
|
||||
"Shutdown at end of performance tests.");
|
||||
|
|
|
@ -909,34 +909,38 @@ rcu_torture_writer(void *arg)
|
|||
int nsynctypes = 0;
|
||||
|
||||
VERBOSE_TOROUT_STRING("rcu_torture_writer task started");
|
||||
if (!can_expedite) {
|
||||
if (!can_expedite)
|
||||
pr_alert("%s" TORTURE_FLAG
|
||||
" GP expediting controlled from boot/sysfs for %s,\n",
|
||||
" GP expediting controlled from boot/sysfs for %s.\n",
|
||||
torture_type, cur_ops->name);
|
||||
pr_alert("%s" TORTURE_FLAG
|
||||
" Disabled dynamic grace-period expediting.\n",
|
||||
torture_type);
|
||||
}
|
||||
|
||||
/* Initialize synctype[] array. If none set, take default. */
|
||||
if (!gp_cond1 && !gp_exp1 && !gp_normal1 && !gp_sync1)
|
||||
gp_cond1 = gp_exp1 = gp_normal1 = gp_sync1 = true;
|
||||
if (gp_cond1 && cur_ops->get_state && cur_ops->cond_sync)
|
||||
if (gp_cond1 && cur_ops->get_state && cur_ops->cond_sync) {
|
||||
synctype[nsynctypes++] = RTWS_COND_GET;
|
||||
else if (gp_cond && (!cur_ops->get_state || !cur_ops->cond_sync))
|
||||
pr_alert("rcu_torture_writer: gp_cond without primitives.\n");
|
||||
if (gp_exp1 && cur_ops->exp_sync)
|
||||
pr_info("%s: Testing conditional GPs.\n", __func__);
|
||||
} else if (gp_cond && (!cur_ops->get_state || !cur_ops->cond_sync)) {
|
||||
pr_alert("%s: gp_cond without primitives.\n", __func__);
|
||||
}
|
||||
if (gp_exp1 && cur_ops->exp_sync) {
|
||||
synctype[nsynctypes++] = RTWS_EXP_SYNC;
|
||||
else if (gp_exp && !cur_ops->exp_sync)
|
||||
pr_alert("rcu_torture_writer: gp_exp without primitives.\n");
|
||||
if (gp_normal1 && cur_ops->deferred_free)
|
||||
pr_info("%s: Testing expedited GPs.\n", __func__);
|
||||
} else if (gp_exp && !cur_ops->exp_sync) {
|
||||
pr_alert("%s: gp_exp without primitives.\n", __func__);
|
||||
}
|
||||
if (gp_normal1 && cur_ops->deferred_free) {
|
||||
synctype[nsynctypes++] = RTWS_DEF_FREE;
|
||||
else if (gp_normal && !cur_ops->deferred_free)
|
||||
pr_alert("rcu_torture_writer: gp_normal without primitives.\n");
|
||||
if (gp_sync1 && cur_ops->sync)
|
||||
pr_info("%s: Testing asynchronous GPs.\n", __func__);
|
||||
} else if (gp_normal && !cur_ops->deferred_free) {
|
||||
pr_alert("%s: gp_normal without primitives.\n", __func__);
|
||||
}
|
||||
if (gp_sync1 && cur_ops->sync) {
|
||||
synctype[nsynctypes++] = RTWS_SYNC;
|
||||
else if (gp_sync && !cur_ops->sync)
|
||||
pr_alert("rcu_torture_writer: gp_sync without primitives.\n");
|
||||
pr_info("%s: Testing normal GPs.\n", __func__);
|
||||
} else if (gp_sync && !cur_ops->sync) {
|
||||
pr_alert("%s: gp_sync without primitives.\n", __func__);
|
||||
}
|
||||
if (WARN_ONCE(nsynctypes == 0,
|
||||
"rcu_torture_writer: No update-side primitives.\n")) {
|
||||
/*
|
||||
|
@ -1011,6 +1015,9 @@ rcu_torture_writer(void *arg)
|
|||
rcu_unexpedite_gp();
|
||||
if (++expediting > 3)
|
||||
expediting = -expediting;
|
||||
} else if (!can_expedite) { /* Disabled during boot, recheck. */
|
||||
can_expedite = !rcu_gp_is_expedited() &&
|
||||
!rcu_gp_is_normal();
|
||||
}
|
||||
rcu_torture_writer_state = RTWS_STUTTER;
|
||||
stutter_wait("rcu_torture_writer");
|
||||
|
@ -1021,6 +1028,10 @@ rcu_torture_writer(void *arg)
|
|||
while (can_expedite && expediting++ < 0)
|
||||
rcu_unexpedite_gp();
|
||||
WARN_ON_ONCE(can_expedite && rcu_gp_is_expedited());
|
||||
if (!can_expedite)
|
||||
pr_alert("%s" TORTURE_FLAG
|
||||
" Dynamic grace-period expediting was disabled.\n",
|
||||
torture_type);
|
||||
rcu_torture_writer_state = RTWS_STOPPING;
|
||||
torture_kthread_stopping("rcu_torture_writer");
|
||||
return 0;
|
||||
|
@ -1045,13 +1056,13 @@ rcu_torture_fakewriter(void *arg)
|
|||
torture_random(&rand) % (nfakewriters * 8) == 0) {
|
||||
cur_ops->cb_barrier();
|
||||
} else if (gp_normal == gp_exp) {
|
||||
if (torture_random(&rand) & 0x80)
|
||||
if (cur_ops->sync && torture_random(&rand) & 0x80)
|
||||
cur_ops->sync();
|
||||
else
|
||||
else if (cur_ops->exp_sync)
|
||||
cur_ops->exp_sync();
|
||||
} else if (gp_normal) {
|
||||
} else if (gp_normal && cur_ops->sync) {
|
||||
cur_ops->sync();
|
||||
} else {
|
||||
} else if (cur_ops->exp_sync) {
|
||||
cur_ops->exp_sync();
|
||||
}
|
||||
stutter_wait("rcu_torture_fakewriter");
|
||||
|
@ -1557,11 +1568,10 @@ static int rcu_torture_barrier_init(void)
|
|||
atomic_set(&barrier_cbs_count, 0);
|
||||
atomic_set(&barrier_cbs_invoked, 0);
|
||||
barrier_cbs_tasks =
|
||||
kzalloc(n_barrier_cbs * sizeof(barrier_cbs_tasks[0]),
|
||||
kcalloc(n_barrier_cbs, sizeof(barrier_cbs_tasks[0]),
|
||||
GFP_KERNEL);
|
||||
barrier_cbs_wq =
|
||||
kzalloc(n_barrier_cbs * sizeof(barrier_cbs_wq[0]),
|
||||
GFP_KERNEL);
|
||||
kcalloc(n_barrier_cbs, sizeof(barrier_cbs_wq[0]), GFP_KERNEL);
|
||||
if (barrier_cbs_tasks == NULL || !barrier_cbs_wq)
|
||||
return -ENOMEM;
|
||||
for (i = 0; i < n_barrier_cbs; i++) {
|
||||
|
@ -1674,7 +1684,7 @@ static void rcu_torture_err_cb(struct rcu_head *rhp)
|
|||
* next grace period. Unlikely, but can happen. If it
|
||||
* does happen, the debug-objects subsystem won't have splatted.
|
||||
*/
|
||||
pr_alert("rcutorture: duplicated callback was invoked.\n");
|
||||
pr_alert("%s: duplicated callback was invoked.\n", KBUILD_MODNAME);
|
||||
}
|
||||
#endif /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */
|
||||
|
||||
|
@ -1691,7 +1701,7 @@ static void rcu_test_debug_objects(void)
|
|||
|
||||
init_rcu_head_on_stack(&rh1);
|
||||
init_rcu_head_on_stack(&rh2);
|
||||
pr_alert("rcutorture: WARN: Duplicate call_rcu() test starting.\n");
|
||||
pr_alert("%s: WARN: Duplicate call_rcu() test starting.\n", KBUILD_MODNAME);
|
||||
|
||||
/* Try to queue the rh2 pair of callbacks for the same grace period. */
|
||||
preempt_disable(); /* Prevent preemption from interrupting test. */
|
||||
|
@ -1706,11 +1716,11 @@ static void rcu_test_debug_objects(void)
|
|||
|
||||
/* Wait for them all to get done so we can safely return. */
|
||||
rcu_barrier();
|
||||
pr_alert("rcutorture: WARN: Duplicate call_rcu() test complete.\n");
|
||||
pr_alert("%s: WARN: Duplicate call_rcu() test complete.\n", KBUILD_MODNAME);
|
||||
destroy_rcu_head_on_stack(&rh1);
|
||||
destroy_rcu_head_on_stack(&rh2);
|
||||
#else /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */
|
||||
pr_alert("rcutorture: !CONFIG_DEBUG_OBJECTS_RCU_HEAD, not testing duplicate call_rcu()\n");
|
||||
pr_alert("%s: !CONFIG_DEBUG_OBJECTS_RCU_HEAD, not testing duplicate call_rcu()\n", KBUILD_MODNAME);
|
||||
#endif /* #else #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */
|
||||
}
|
||||
|
||||
|
@ -1799,7 +1809,7 @@ rcu_torture_init(void)
|
|||
if (firsterr)
|
||||
goto unwind;
|
||||
if (nfakewriters > 0) {
|
||||
fakewriter_tasks = kzalloc(nfakewriters *
|
||||
fakewriter_tasks = kcalloc(nfakewriters,
|
||||
sizeof(fakewriter_tasks[0]),
|
||||
GFP_KERNEL);
|
||||
if (fakewriter_tasks == NULL) {
|
||||
|
@ -1814,7 +1824,7 @@ rcu_torture_init(void)
|
|||
if (firsterr)
|
||||
goto unwind;
|
||||
}
|
||||
reader_tasks = kzalloc(nrealreaders * sizeof(reader_tasks[0]),
|
||||
reader_tasks = kcalloc(nrealreaders, sizeof(reader_tasks[0]),
|
||||
GFP_KERNEL);
|
||||
if (reader_tasks == NULL) {
|
||||
VERBOSE_TOROUT_ERRSTRING("out of memory");
|
||||
|
|
|
@ -386,7 +386,7 @@ void cleanup_srcu_struct(struct srcu_struct *sp)
|
|||
flush_delayed_work(&per_cpu_ptr(sp->sda, cpu)->work);
|
||||
if (WARN_ON(rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) != SRCU_STATE_IDLE) ||
|
||||
WARN_ON(srcu_readers_active(sp))) {
|
||||
pr_info("cleanup_srcu_struct: Active srcu_struct %p state: %d\n", sp, rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)));
|
||||
pr_info("%s: Active srcu_struct %p state: %d\n", __func__, sp, rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)));
|
||||
return; /* Caller forgot to stop doing call_srcu()? */
|
||||
}
|
||||
free_percpu(sp->sda);
|
||||
|
@ -439,7 +439,7 @@ static void srcu_gp_start(struct srcu_struct *sp)
|
|||
struct srcu_data *sdp = this_cpu_ptr(sp->sda);
|
||||
int state;
|
||||
|
||||
lockdep_assert_held(&sp->lock);
|
||||
lockdep_assert_held(&ACCESS_PRIVATE(sp, lock));
|
||||
WARN_ON_ONCE(ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed));
|
||||
rcu_segcblist_advance(&sdp->srcu_cblist,
|
||||
rcu_seq_current(&sp->srcu_gp_seq));
|
||||
|
@ -492,8 +492,7 @@ static bool srcu_queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
|
|||
*/
|
||||
static void srcu_schedule_cbs_sdp(struct srcu_data *sdp, unsigned long delay)
|
||||
{
|
||||
srcu_queue_delayed_work_on(sdp->cpu, system_power_efficient_wq,
|
||||
&sdp->work, delay);
|
||||
srcu_queue_delayed_work_on(sdp->cpu, rcu_gp_wq, &sdp->work, delay);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -527,11 +526,11 @@ static void srcu_gp_end(struct srcu_struct *sp)
|
|||
{
|
||||
unsigned long cbdelay;
|
||||
bool cbs;
|
||||
bool last_lvl;
|
||||
int cpu;
|
||||
unsigned long flags;
|
||||
unsigned long gpseq;
|
||||
int idx;
|
||||
int idxnext;
|
||||
unsigned long mask;
|
||||
struct srcu_data *sdp;
|
||||
struct srcu_node *snp;
|
||||
|
@ -555,11 +554,11 @@ static void srcu_gp_end(struct srcu_struct *sp)
|
|||
|
||||
/* Initiate callback invocation as needed. */
|
||||
idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs);
|
||||
idxnext = (idx + 1) % ARRAY_SIZE(snp->srcu_have_cbs);
|
||||
rcu_for_each_node_breadth_first(sp, snp) {
|
||||
spin_lock_irq_rcu_node(snp);
|
||||
cbs = false;
|
||||
if (snp >= sp->level[rcu_num_lvls - 1])
|
||||
last_lvl = snp >= sp->level[rcu_num_lvls - 1];
|
||||
if (last_lvl)
|
||||
cbs = snp->srcu_have_cbs[idx] == gpseq;
|
||||
snp->srcu_have_cbs[idx] = gpseq;
|
||||
rcu_seq_set_state(&snp->srcu_have_cbs[idx], 1);
|
||||
|
@ -572,13 +571,16 @@ static void srcu_gp_end(struct srcu_struct *sp)
|
|||
srcu_schedule_cbs_snp(sp, snp, mask, cbdelay);
|
||||
|
||||
/* Occasionally prevent srcu_data counter wrap. */
|
||||
if (!(gpseq & counter_wrap_check))
|
||||
if (!(gpseq & counter_wrap_check) && last_lvl)
|
||||
for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) {
|
||||
sdp = per_cpu_ptr(sp->sda, cpu);
|
||||
spin_lock_irqsave_rcu_node(sdp, flags);
|
||||
if (ULONG_CMP_GE(gpseq,
|
||||
sdp->srcu_gp_seq_needed + 100))
|
||||
sdp->srcu_gp_seq_needed = gpseq;
|
||||
if (ULONG_CMP_GE(gpseq,
|
||||
sdp->srcu_gp_seq_needed_exp + 100))
|
||||
sdp->srcu_gp_seq_needed_exp = gpseq;
|
||||
spin_unlock_irqrestore_rcu_node(sdp, flags);
|
||||
}
|
||||
}
|
||||
|
@ -593,9 +595,7 @@ static void srcu_gp_end(struct srcu_struct *sp)
|
|||
ULONG_CMP_LT(gpseq, sp->srcu_gp_seq_needed)) {
|
||||
srcu_gp_start(sp);
|
||||
spin_unlock_irq_rcu_node(sp);
|
||||
/* Throttle expedited grace periods: Should be rare! */
|
||||
srcu_reschedule(sp, rcu_seq_ctr(gpseq) & 0x3ff
|
||||
? 0 : SRCU_INTERVAL);
|
||||
srcu_reschedule(sp, 0);
|
||||
} else {
|
||||
spin_unlock_irq_rcu_node(sp);
|
||||
}
|
||||
|
@ -626,7 +626,7 @@ static void srcu_funnel_exp_start(struct srcu_struct *sp, struct srcu_node *snp,
|
|||
spin_unlock_irqrestore_rcu_node(snp, flags);
|
||||
}
|
||||
spin_lock_irqsave_rcu_node(sp, flags);
|
||||
if (!ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, s))
|
||||
if (ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, s))
|
||||
sp->srcu_gp_seq_needed_exp = s;
|
||||
spin_unlock_irqrestore_rcu_node(sp, flags);
|
||||
}
|
||||
|
@ -691,8 +691,7 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp,
|
|||
rcu_seq_state(sp->srcu_gp_seq) == SRCU_STATE_IDLE) {
|
||||
WARN_ON_ONCE(ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed));
|
||||
srcu_gp_start(sp);
|
||||
queue_delayed_work(system_power_efficient_wq, &sp->work,
|
||||
srcu_get_delay(sp));
|
||||
queue_delayed_work(rcu_gp_wq, &sp->work, srcu_get_delay(sp));
|
||||
}
|
||||
spin_unlock_irqrestore_rcu_node(sp, flags);
|
||||
}
|
||||
|
@ -1225,7 +1224,7 @@ static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay)
|
|||
spin_unlock_irq_rcu_node(sp);
|
||||
|
||||
if (pushgp)
|
||||
queue_delayed_work(system_power_efficient_wq, &sp->work, delay);
|
||||
queue_delayed_work(rcu_gp_wq, &sp->work, delay);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -1161,7 +1161,7 @@ static int rcu_is_cpu_rrupt_from_idle(void)
|
|||
*/
|
||||
static void rcu_gpnum_ovf(struct rcu_node *rnp, struct rcu_data *rdp)
|
||||
{
|
||||
lockdep_assert_held(&rnp->lock);
|
||||
raw_lockdep_assert_held_rcu_node(rnp);
|
||||
if (ULONG_CMP_LT(READ_ONCE(rdp->gpnum) + ULONG_MAX / 4, rnp->gpnum))
|
||||
WRITE_ONCE(rdp->gpwrap, true);
|
||||
if (ULONG_CMP_LT(rdp->rcu_iw_gpnum + ULONG_MAX / 4, rnp->gpnum))
|
||||
|
@ -1350,6 +1350,7 @@ static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp)
|
|||
rsp->gp_kthread ? rsp->gp_kthread->state : ~0,
|
||||
rsp->gp_kthread ? task_cpu(rsp->gp_kthread) : -1);
|
||||
if (rsp->gp_kthread) {
|
||||
pr_err("RCU grace-period kthread stack dump:\n");
|
||||
sched_show_task(rsp->gp_kthread);
|
||||
wake_up_process(rsp->gp_kthread);
|
||||
}
|
||||
|
@ -1628,7 +1629,7 @@ void rcu_cpu_stall_reset(void)
|
|||
static unsigned long rcu_cbs_completed(struct rcu_state *rsp,
|
||||
struct rcu_node *rnp)
|
||||
{
|
||||
lockdep_assert_held(&rnp->lock);
|
||||
raw_lockdep_assert_held_rcu_node(rnp);
|
||||
|
||||
/*
|
||||
* If RCU is idle, we just wait for the next grace period.
|
||||
|
@ -1675,7 +1676,7 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
|
|||
bool ret = false;
|
||||
struct rcu_node *rnp_root = rcu_get_root(rdp->rsp);
|
||||
|
||||
lockdep_assert_held(&rnp->lock);
|
||||
raw_lockdep_assert_held_rcu_node(rnp);
|
||||
|
||||
/*
|
||||
* Pick up grace-period number for new callbacks. If this
|
||||
|
@ -1803,7 +1804,7 @@ static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
|
|||
{
|
||||
bool ret = false;
|
||||
|
||||
lockdep_assert_held(&rnp->lock);
|
||||
raw_lockdep_assert_held_rcu_node(rnp);
|
||||
|
||||
/* If no pending (not yet ready to invoke) callbacks, nothing to do. */
|
||||
if (!rcu_segcblist_pend_cbs(&rdp->cblist))
|
||||
|
@ -1843,7 +1844,7 @@ static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
|
|||
static bool rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
|
||||
struct rcu_data *rdp)
|
||||
{
|
||||
lockdep_assert_held(&rnp->lock);
|
||||
raw_lockdep_assert_held_rcu_node(rnp);
|
||||
|
||||
/* If no pending (not yet ready to invoke) callbacks, nothing to do. */
|
||||
if (!rcu_segcblist_pend_cbs(&rdp->cblist))
|
||||
|
@ -1871,7 +1872,7 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp,
|
|||
bool ret;
|
||||
bool need_gp;
|
||||
|
||||
lockdep_assert_held(&rnp->lock);
|
||||
raw_lockdep_assert_held_rcu_node(rnp);
|
||||
|
||||
/* Handle the ends of any preceding grace periods first. */
|
||||
if (rdp->completed == rnp->completed &&
|
||||
|
@ -2296,7 +2297,7 @@ static bool
|
|||
rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
|
||||
struct rcu_data *rdp)
|
||||
{
|
||||
lockdep_assert_held(&rnp->lock);
|
||||
raw_lockdep_assert_held_rcu_node(rnp);
|
||||
if (!rsp->gp_kthread || !cpu_needs_another_gp(rsp, rdp)) {
|
||||
/*
|
||||
* Either we have not yet spawned the grace-period
|
||||
|
@ -2358,7 +2359,7 @@ static bool rcu_start_gp(struct rcu_state *rsp)
|
|||
static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
|
||||
__releases(rcu_get_root(rsp)->lock)
|
||||
{
|
||||
lockdep_assert_held(&rcu_get_root(rsp)->lock);
|
||||
raw_lockdep_assert_held_rcu_node(rcu_get_root(rsp));
|
||||
WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
|
||||
WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS);
|
||||
raw_spin_unlock_irqrestore_rcu_node(rcu_get_root(rsp), flags);
|
||||
|
@ -2383,7 +2384,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
|
|||
unsigned long oldmask = 0;
|
||||
struct rcu_node *rnp_c;
|
||||
|
||||
lockdep_assert_held(&rnp->lock);
|
||||
raw_lockdep_assert_held_rcu_node(rnp);
|
||||
|
||||
/* Walk up the rcu_node hierarchy. */
|
||||
for (;;) {
|
||||
|
@ -2447,7 +2448,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_state *rsp,
|
|||
unsigned long mask;
|
||||
struct rcu_node *rnp_p;
|
||||
|
||||
lockdep_assert_held(&rnp->lock);
|
||||
raw_lockdep_assert_held_rcu_node(rnp);
|
||||
if (rcu_state_p == &rcu_sched_state || rsp != rcu_state_p ||
|
||||
rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
|
@ -2592,7 +2593,7 @@ static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf)
|
|||
long mask;
|
||||
struct rcu_node *rnp = rnp_leaf;
|
||||
|
||||
lockdep_assert_held(&rnp->lock);
|
||||
raw_lockdep_assert_held_rcu_node(rnp);
|
||||
if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) ||
|
||||
rnp->qsmaskinit || rcu_preempt_has_tasks(rnp))
|
||||
return;
|
||||
|
@ -2691,7 +2692,6 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
|
|||
/* Update counts and requeue any remaining callbacks. */
|
||||
rcu_segcblist_insert_done_cbs(&rdp->cblist, &rcl);
|
||||
smp_mb(); /* List handling before counting for rcu_barrier(). */
|
||||
rdp->n_cbs_invoked += count;
|
||||
rcu_segcblist_insert_count(&rdp->cblist, &rcl);
|
||||
|
||||
/* Reinstate batch limit if we have worked down the excess. */
|
||||
|
@ -2845,10 +2845,8 @@ static void force_quiescent_state(struct rcu_state *rsp)
|
|||
!raw_spin_trylock(&rnp->fqslock);
|
||||
if (rnp_old != NULL)
|
||||
raw_spin_unlock(&rnp_old->fqslock);
|
||||
if (ret) {
|
||||
rsp->n_force_qs_lh++;
|
||||
if (ret)
|
||||
return;
|
||||
}
|
||||
rnp_old = rnp;
|
||||
}
|
||||
/* rnp_old == rcu_get_root(rsp), rnp == NULL. */
|
||||
|
@ -2857,7 +2855,6 @@ static void force_quiescent_state(struct rcu_state *rsp)
|
|||
raw_spin_lock_irqsave_rcu_node(rnp_old, flags);
|
||||
raw_spin_unlock(&rnp_old->fqslock);
|
||||
if (READ_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
|
||||
rsp->n_force_qs_lh++;
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp_old, flags);
|
||||
return; /* Someone beat us to it. */
|
||||
}
|
||||
|
@ -3355,8 +3352,6 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
|
|||
{
|
||||
struct rcu_node *rnp = rdp->mynode;
|
||||
|
||||
rdp->n_rcu_pending++;
|
||||
|
||||
/* Check for CPU stalls, if enabled. */
|
||||
check_cpu_stall(rsp, rdp);
|
||||
|
||||
|
@ -3365,48 +3360,31 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
|
|||
return 0;
|
||||
|
||||
/* Is the RCU core waiting for a quiescent state from this CPU? */
|
||||
if (rcu_scheduler_fully_active &&
|
||||
rdp->core_needs_qs && rdp->cpu_no_qs.b.norm &&
|
||||
rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_dynticks.rcu_qs_ctr)) {
|
||||
rdp->n_rp_core_needs_qs++;
|
||||
} else if (rdp->core_needs_qs && !rdp->cpu_no_qs.b.norm) {
|
||||
rdp->n_rp_report_qs++;
|
||||
if (rdp->core_needs_qs && !rdp->cpu_no_qs.b.norm)
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Does this CPU have callbacks ready to invoke? */
|
||||
if (rcu_segcblist_ready_cbs(&rdp->cblist)) {
|
||||
rdp->n_rp_cb_ready++;
|
||||
if (rcu_segcblist_ready_cbs(&rdp->cblist))
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Has RCU gone idle with this CPU needing another grace period? */
|
||||
if (cpu_needs_another_gp(rsp, rdp)) {
|
||||
rdp->n_rp_cpu_needs_gp++;
|
||||
if (cpu_needs_another_gp(rsp, rdp))
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Has another RCU grace period completed? */
|
||||
if (READ_ONCE(rnp->completed) != rdp->completed) { /* outside lock */
|
||||
rdp->n_rp_gp_completed++;
|
||||
if (READ_ONCE(rnp->completed) != rdp->completed) /* outside lock */
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Has a new RCU grace period started? */
|
||||
if (READ_ONCE(rnp->gpnum) != rdp->gpnum ||
|
||||
unlikely(READ_ONCE(rdp->gpwrap))) { /* outside lock */
|
||||
rdp->n_rp_gp_started++;
|
||||
unlikely(READ_ONCE(rdp->gpwrap))) /* outside lock */
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Does this CPU need a deferred NOCB wakeup? */
|
||||
if (rcu_nocb_need_deferred_wakeup(rdp)) {
|
||||
rdp->n_rp_nocb_defer_wakeup++;
|
||||
if (rcu_nocb_need_deferred_wakeup(rdp))
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* nothing to do */
|
||||
rdp->n_rp_need_nothing++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -3618,7 +3596,7 @@ static void rcu_init_new_rnp(struct rcu_node *rnp_leaf)
|
|||
long mask;
|
||||
struct rcu_node *rnp = rnp_leaf;
|
||||
|
||||
lockdep_assert_held(&rnp->lock);
|
||||
raw_lockdep_assert_held_rcu_node(rnp);
|
||||
for (;;) {
|
||||
mask = rnp->grpmask;
|
||||
rnp = rnp->parent;
|
||||
|
@ -3636,12 +3614,9 @@ static void rcu_init_new_rnp(struct rcu_node *rnp_leaf)
|
|||
static void __init
|
||||
rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
|
||||
struct rcu_node *rnp = rcu_get_root(rsp);
|
||||
|
||||
/* Set up local state, ensuring consistent view of global state. */
|
||||
raw_spin_lock_irqsave_rcu_node(rnp, flags);
|
||||
rdp->grpmask = leaf_node_cpu_bit(rdp->mynode, cpu);
|
||||
rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
|
||||
WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != 1);
|
||||
|
@ -3649,7 +3624,6 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
|
|||
rdp->cpu = cpu;
|
||||
rdp->rsp = rsp;
|
||||
rcu_boot_init_nocb_percpu_data(rdp);
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -4193,6 +4167,8 @@ static void __init rcu_dump_rcu_node_tree(struct rcu_state *rsp)
|
|||
pr_cont("\n");
|
||||
}
|
||||
|
||||
struct workqueue_struct *rcu_gp_wq;
|
||||
|
||||
void __init rcu_init(void)
|
||||
{
|
||||
int cpu;
|
||||
|
@ -4219,6 +4195,10 @@ void __init rcu_init(void)
|
|||
rcu_cpu_starting(cpu);
|
||||
rcutree_online_cpu(cpu);
|
||||
}
|
||||
|
||||
/* Create workqueue for expedited GPs and for Tree SRCU. */
|
||||
rcu_gp_wq = alloc_workqueue("rcu_gp", WQ_MEM_RECLAIM, 0);
|
||||
WARN_ON(!rcu_gp_wq);
|
||||
}
|
||||
|
||||
#include "tree_exp.h"
|
||||
|
|
|
@ -146,12 +146,6 @@ struct rcu_node {
|
|||
/* boosting for this rcu_node structure. */
|
||||
unsigned int boost_kthread_status;
|
||||
/* State of boost_kthread_task for tracing. */
|
||||
unsigned long n_tasks_boosted;
|
||||
/* Total number of tasks boosted. */
|
||||
unsigned long n_exp_boosts;
|
||||
/* Number of tasks boosted for expedited GP. */
|
||||
unsigned long n_normal_boosts;
|
||||
/* Number of tasks boosted for normal GP. */
|
||||
#ifdef CONFIG_RCU_NOCB_CPU
|
||||
struct swait_queue_head nocb_gp_wq[2];
|
||||
/* Place for rcu_nocb_kthread() to wait GP. */
|
||||
|
@ -184,13 +178,6 @@ union rcu_noqs {
|
|||
u16 s; /* Set of bits, aggregate OR here. */
|
||||
};
|
||||
|
||||
/* Index values for nxttail array in struct rcu_data. */
|
||||
#define RCU_DONE_TAIL 0 /* Also RCU_WAIT head. */
|
||||
#define RCU_WAIT_TAIL 1 /* Also RCU_NEXT_READY head. */
|
||||
#define RCU_NEXT_READY_TAIL 2 /* Also RCU_NEXT head. */
|
||||
#define RCU_NEXT_TAIL 3
|
||||
#define RCU_NEXT_SIZE 4
|
||||
|
||||
/* Per-CPU data for read-copy update. */
|
||||
struct rcu_data {
|
||||
/* 1) quiescent-state and grace-period handling : */
|
||||
|
@ -217,8 +204,6 @@ struct rcu_data {
|
|||
/* different grace periods. */
|
||||
long qlen_last_fqs_check;
|
||||
/* qlen at last check for QS forcing */
|
||||
unsigned long n_cbs_invoked; /* count of RCU cbs invoked. */
|
||||
unsigned long n_nocbs_invoked; /* count of no-CBs RCU cbs invoked. */
|
||||
unsigned long n_force_qs_snap;
|
||||
/* did other CPU force QS recently? */
|
||||
long blimit; /* Upper limit on a processed batch */
|
||||
|
@ -234,18 +219,7 @@ struct rcu_data {
|
|||
/* Grace period that needs help */
|
||||
/* from cond_resched(). */
|
||||
|
||||
/* 5) __rcu_pending() statistics. */
|
||||
unsigned long n_rcu_pending; /* rcu_pending() calls since boot. */
|
||||
unsigned long n_rp_core_needs_qs;
|
||||
unsigned long n_rp_report_qs;
|
||||
unsigned long n_rp_cb_ready;
|
||||
unsigned long n_rp_cpu_needs_gp;
|
||||
unsigned long n_rp_gp_completed;
|
||||
unsigned long n_rp_gp_started;
|
||||
unsigned long n_rp_nocb_defer_wakeup;
|
||||
unsigned long n_rp_need_nothing;
|
||||
|
||||
/* 6) _rcu_barrier(), OOM callbacks, and expediting. */
|
||||
/* 5) _rcu_barrier(), OOM callbacks, and expediting. */
|
||||
struct rcu_head barrier_head;
|
||||
#ifdef CONFIG_RCU_FAST_NO_HZ
|
||||
struct rcu_head oom_head;
|
||||
|
@ -256,7 +230,7 @@ struct rcu_data {
|
|||
atomic_long_t exp_workdone3; /* # done by others #3. */
|
||||
int exp_dynticks_snap; /* Double-check need for IPI. */
|
||||
|
||||
/* 7) Callback offloading. */
|
||||
/* 6) Callback offloading. */
|
||||
#ifdef CONFIG_RCU_NOCB_CPU
|
||||
struct rcu_head *nocb_head; /* CBs waiting for kthread. */
|
||||
struct rcu_head **nocb_tail;
|
||||
|
@ -283,7 +257,7 @@ struct rcu_data {
|
|||
/* Leader CPU takes GP-end wakeups. */
|
||||
#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
|
||||
|
||||
/* 8) RCU CPU stall data. */
|
||||
/* 7) RCU CPU stall data. */
|
||||
unsigned int softirq_snap; /* Snapshot of softirq activity. */
|
||||
/* ->rcu_iw* fields protected by leaf rcu_node ->lock. */
|
||||
struct irq_work rcu_iw; /* Check for non-irq activity. */
|
||||
|
@ -374,10 +348,6 @@ struct rcu_state {
|
|||
/* kthreads, if configured. */
|
||||
unsigned long n_force_qs; /* Number of calls to */
|
||||
/* force_quiescent_state(). */
|
||||
unsigned long n_force_qs_lh; /* ~Number of calls leaving */
|
||||
/* due to lock unavailable. */
|
||||
unsigned long n_force_qs_ngp; /* Number of calls leaving */
|
||||
/* due to no GP active. */
|
||||
unsigned long gp_start; /* Time at which GP started, */
|
||||
/* but in jiffies. */
|
||||
unsigned long gp_activity; /* Time of last GP kthread */
|
||||
|
|
|
@ -28,6 +28,15 @@ static void rcu_exp_gp_seq_start(struct rcu_state *rsp)
|
|||
rcu_seq_start(&rsp->expedited_sequence);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return then value that expedited-grace-period counter will have
|
||||
* at the end of the current grace period.
|
||||
*/
|
||||
static __maybe_unused unsigned long rcu_exp_gp_seq_endval(struct rcu_state *rsp)
|
||||
{
|
||||
return rcu_seq_endval(&rsp->expedited_sequence);
|
||||
}
|
||||
|
||||
/*
|
||||
* Record the end of an expedited grace period.
|
||||
*/
|
||||
|
@ -366,21 +375,30 @@ static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
|
|||
int ret;
|
||||
struct rcu_node *rnp;
|
||||
|
||||
trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("reset"));
|
||||
sync_exp_reset_tree(rsp);
|
||||
trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("select"));
|
||||
rcu_for_each_leaf_node(rsp, rnp) {
|
||||
raw_spin_lock_irqsave_rcu_node(rnp, flags);
|
||||
|
||||
/* Each pass checks a CPU for identity, offline, and idle. */
|
||||
mask_ofl_test = 0;
|
||||
for_each_leaf_node_possible_cpu(rnp, cpu) {
|
||||
for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) {
|
||||
unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
|
||||
struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
|
||||
struct rcu_dynticks *rdtp = per_cpu_ptr(&rcu_dynticks, cpu);
|
||||
int snap;
|
||||
|
||||
rdp->exp_dynticks_snap =
|
||||
rcu_dynticks_snap(rdp->dynticks);
|
||||
if (raw_smp_processor_id() == cpu ||
|
||||
rcu_dynticks_in_eqs(rdp->exp_dynticks_snap) ||
|
||||
!(rnp->qsmaskinitnext & rdp->grpmask))
|
||||
mask_ofl_test |= rdp->grpmask;
|
||||
!(rnp->qsmaskinitnext & mask)) {
|
||||
mask_ofl_test |= mask;
|
||||
} else {
|
||||
snap = rcu_dynticks_snap(rdtp);
|
||||
if (rcu_dynticks_in_eqs(snap))
|
||||
mask_ofl_test |= mask;
|
||||
else
|
||||
rdp->exp_dynticks_snap = snap;
|
||||
}
|
||||
}
|
||||
mask_ofl_ipi = rnp->expmask & ~mask_ofl_test;
|
||||
|
||||
|
@ -394,7 +412,7 @@ static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
|
|||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
|
||||
/* IPI the remaining CPUs for expedited quiescent state. */
|
||||
for_each_leaf_node_possible_cpu(rnp, cpu) {
|
||||
for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) {
|
||||
unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
|
||||
struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
|
||||
|
||||
|
@ -417,6 +435,7 @@ static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
|
|||
(rnp->expmask & mask)) {
|
||||
/* Online, so delay for a bit and try again. */
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("selectofl"));
|
||||
schedule_timeout_uninterruptible(1);
|
||||
goto retry_ipi;
|
||||
}
|
||||
|
@ -443,6 +462,7 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
|
|||
struct rcu_node *rnp_root = rcu_get_root(rsp);
|
||||
int ret;
|
||||
|
||||
trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("startwait"));
|
||||
jiffies_stall = rcu_jiffies_till_stall_check();
|
||||
jiffies_start = jiffies;
|
||||
|
||||
|
@ -606,7 +626,7 @@ static void _synchronize_rcu_expedited(struct rcu_state *rsp,
|
|||
rew.rew_rsp = rsp;
|
||||
rew.rew_s = s;
|
||||
INIT_WORK_ONSTACK(&rew.rew_work, wait_rcu_exp_gp);
|
||||
schedule_work(&rew.rew_work);
|
||||
queue_work(rcu_gp_wq, &rew.rew_work);
|
||||
}
|
||||
|
||||
/* Wait for expedited grace period to complete. */
|
||||
|
|
|
@ -180,7 +180,7 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
|
|||
(rnp->expmask & rdp->grpmask ? RCU_EXP_BLKD : 0);
|
||||
struct task_struct *t = current;
|
||||
|
||||
lockdep_assert_held(&rnp->lock);
|
||||
raw_lockdep_assert_held_rcu_node(rnp);
|
||||
WARN_ON_ONCE(rdp->mynode != rnp);
|
||||
WARN_ON_ONCE(rnp->level != rcu_num_lvls - 1);
|
||||
|
||||
|
@ -560,8 +560,14 @@ static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
|
|||
}
|
||||
t = list_entry(rnp->gp_tasks->prev,
|
||||
struct task_struct, rcu_node_entry);
|
||||
list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry)
|
||||
list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
|
||||
/*
|
||||
* We could be printing a lot while holding a spinlock.
|
||||
* Avoid triggering hard lockup.
|
||||
*/
|
||||
touch_nmi_watchdog();
|
||||
sched_show_task(t);
|
||||
}
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
}
|
||||
|
||||
|
@ -957,14 +963,10 @@ static int rcu_boost(struct rcu_node *rnp)
|
|||
* expedited grace period must boost all blocked tasks, including
|
||||
* those blocking the pre-existing normal grace period.
|
||||
*/
|
||||
if (rnp->exp_tasks != NULL) {
|
||||
if (rnp->exp_tasks != NULL)
|
||||
tb = rnp->exp_tasks;
|
||||
rnp->n_exp_boosts++;
|
||||
} else {
|
||||
else
|
||||
tb = rnp->boost_tasks;
|
||||
rnp->n_normal_boosts++;
|
||||
}
|
||||
rnp->n_tasks_boosted++;
|
||||
|
||||
/*
|
||||
* We boost task t by manufacturing an rt_mutex that appears to
|
||||
|
@ -1042,7 +1044,7 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
|
|||
{
|
||||
struct task_struct *t;
|
||||
|
||||
lockdep_assert_held(&rnp->lock);
|
||||
raw_lockdep_assert_held_rcu_node(rnp);
|
||||
if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) {
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
return;
|
||||
|
@ -1677,6 +1679,12 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)
|
|||
char *ticks_title;
|
||||
unsigned long ticks_value;
|
||||
|
||||
/*
|
||||
* We could be printing a lot while holding a spinlock. Avoid
|
||||
* triggering hard lockup.
|
||||
*/
|
||||
touch_nmi_watchdog();
|
||||
|
||||
if (rsp->gpnum == rdp->gpnum) {
|
||||
ticks_title = "ticks this GP";
|
||||
ticks_value = rdp->ticks_this_gp;
|
||||
|
@ -2235,7 +2243,6 @@ static int rcu_nocb_kthread(void *arg)
|
|||
smp_mb__before_atomic(); /* _add after CB invocation. */
|
||||
atomic_long_add(-c, &rdp->nocb_q_count);
|
||||
atomic_long_add(-cl, &rdp->nocb_q_count_lazy);
|
||||
rdp->n_nocbs_invoked += c;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@ -2312,6 +2319,9 @@ void __init rcu_init_nohz(void)
|
|||
cpumask_and(rcu_nocb_mask, cpu_possible_mask,
|
||||
rcu_nocb_mask);
|
||||
}
|
||||
if (cpumask_empty(rcu_nocb_mask))
|
||||
pr_info("\tOffload RCU callbacks from CPUs: (none).\n");
|
||||
else
|
||||
pr_info("\tOffload RCU callbacks from CPUs: %*pbl.\n",
|
||||
cpumask_pr_args(rcu_nocb_mask));
|
||||
if (rcu_nocb_poll)
|
||||
|
|
|
@ -113,16 +113,6 @@ config NO_HZ_FULL
|
|||
|
||||
endchoice
|
||||
|
||||
config NO_HZ_FULL_ALL
|
||||
bool "Full dynticks system on all CPUs by default (except CPU 0)"
|
||||
depends on NO_HZ_FULL
|
||||
help
|
||||
If the user doesn't pass the nohz_full boot option to
|
||||
define the range of full dynticks CPUs, consider that all
|
||||
CPUs in the system are full dynticks by default.
|
||||
Note the boot CPU will still be kept outside the range to
|
||||
handle the timekeeping duty.
|
||||
|
||||
config NO_HZ
|
||||
bool "Old Idle dynticks config"
|
||||
depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS
|
||||
|
|
|
@ -405,30 +405,12 @@ static int tick_nohz_cpu_down(unsigned int cpu)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int tick_nohz_init_all(void)
|
||||
{
|
||||
int err = -1;
|
||||
|
||||
#ifdef CONFIG_NO_HZ_FULL_ALL
|
||||
if (!alloc_cpumask_var(&tick_nohz_full_mask, GFP_KERNEL)) {
|
||||
WARN(1, "NO_HZ: Can't allocate full dynticks cpumask\n");
|
||||
return err;
|
||||
}
|
||||
err = 0;
|
||||
cpumask_setall(tick_nohz_full_mask);
|
||||
tick_nohz_full_running = true;
|
||||
#endif
|
||||
return err;
|
||||
}
|
||||
|
||||
void __init tick_nohz_init(void)
|
||||
{
|
||||
int cpu, ret;
|
||||
|
||||
if (!tick_nohz_full_running) {
|
||||
if (tick_nohz_init_all() < 0)
|
||||
if (!tick_nohz_full_running)
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Full dynticks uses irq work to drive the tick rescheduling on safe
|
||||
|
|
|
@ -136,6 +136,9 @@ identify_boot_image () {
|
|||
qemu-system-x86_64|qemu-system-i386)
|
||||
echo arch/x86/boot/bzImage
|
||||
;;
|
||||
qemu-system-aarch64)
|
||||
echo arch/arm64/boot/Image
|
||||
;;
|
||||
*)
|
||||
echo vmlinux
|
||||
;;
|
||||
|
@ -158,6 +161,9 @@ identify_qemu () {
|
|||
elif echo $u | grep -q "Intel 80386"
|
||||
then
|
||||
echo qemu-system-i386
|
||||
elif echo $u | grep -q aarch64
|
||||
then
|
||||
echo qemu-system-aarch64
|
||||
elif uname -a | grep -q ppc64
|
||||
then
|
||||
echo qemu-system-ppc64
|
||||
|
@ -176,16 +182,20 @@ identify_qemu () {
|
|||
# Output arguments for the qemu "-append" string based on CPU type
|
||||
# and the TORTURE_QEMU_INTERACTIVE environment variable.
|
||||
identify_qemu_append () {
|
||||
local console=ttyS0
|
||||
case "$1" in
|
||||
qemu-system-x86_64|qemu-system-i386)
|
||||
echo noapic selinux=0 initcall_debug debug
|
||||
;;
|
||||
qemu-system-aarch64)
|
||||
console=ttyAMA0
|
||||
;;
|
||||
esac
|
||||
if test -n "$TORTURE_QEMU_INTERACTIVE"
|
||||
then
|
||||
echo root=/dev/sda
|
||||
else
|
||||
echo console=ttyS0
|
||||
echo console=$console
|
||||
fi
|
||||
}
|
||||
|
||||
|
@ -197,6 +207,9 @@ identify_qemu_args () {
|
|||
case "$1" in
|
||||
qemu-system-x86_64|qemu-system-i386)
|
||||
;;
|
||||
qemu-system-aarch64)
|
||||
echo -machine virt,gic-version=host -cpu host
|
||||
;;
|
||||
qemu-system-ppc64)
|
||||
echo -enable-kvm -M pseries -nodefaults
|
||||
echo -device spapr-vscsi
|
||||
|
@ -254,7 +267,7 @@ specify_qemu_cpus () {
|
|||
echo $2
|
||||
else
|
||||
case "$1" in
|
||||
qemu-system-x86_64|qemu-system-i386)
|
||||
qemu-system-x86_64|qemu-system-i386|qemu-system-aarch64)
|
||||
echo $2 -smp $3
|
||||
;;
|
||||
qemu-system-ppc64)
|
||||
|
|
|
@ -39,30 +39,31 @@ sed -e 's/us : / : /' |
|
|||
tr -d '\015' |
|
||||
awk '
|
||||
$8 == "start" {
|
||||
if (starttask != "")
|
||||
if (startseq != "")
|
||||
nlost++;
|
||||
starttask = $1;
|
||||
starttime = $3;
|
||||
startseq = $7;
|
||||
seqtask[startseq] = starttask;
|
||||
}
|
||||
|
||||
$8 == "end" {
|
||||
if (starttask == $1 && startseq == $7) {
|
||||
if (startseq == $7) {
|
||||
curgpdur = $3 - starttime;
|
||||
gptimes[++n] = curgpdur;
|
||||
gptaskcnt[starttask]++;
|
||||
sum += curgpdur;
|
||||
if (curgpdur > 1000)
|
||||
print "Long GP " starttime "us to " $3 "us (" curgpdur "us)";
|
||||
starttask = "";
|
||||
startseq = "";
|
||||
} else {
|
||||
# Lost a message or some such, reset.
|
||||
starttask = "";
|
||||
startseq = "";
|
||||
nlost++;
|
||||
}
|
||||
}
|
||||
|
||||
$8 == "done" {
|
||||
$8 == "done" && seqtask[$7] != $1 {
|
||||
piggybackcnt[$1]++;
|
||||
}
|
||||
|
||||
|
|
|
@ -177,8 +177,8 @@ then
|
|||
exit 0
|
||||
fi
|
||||
echo "NOTE: $QEMU either did not run or was interactive" > $resdir/console.log
|
||||
echo $QEMU $qemu_args -m 512 -kernel $KERNEL -append \"$qemu_append $boot_args\" > $resdir/qemu-cmd
|
||||
( $QEMU $qemu_args -m 512 -kernel $KERNEL -append "$qemu_append $boot_args"& echo $! > $resdir/qemu_pid; wait `cat $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) &
|
||||
echo $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append \"$qemu_append $boot_args\" > $resdir/qemu-cmd
|
||||
( $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append "$qemu_append $boot_args"& echo $! > $resdir/qemu_pid; wait `cat $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) &
|
||||
commandcompleted=0
|
||||
sleep 10 # Give qemu's pid a chance to reach the file
|
||||
if test -s "$resdir/qemu_pid"
|
||||
|
|
|
@ -1,10 +1,8 @@
|
|||
#!/bin/bash
|
||||
#
|
||||
# Run a series of 14 tests under KVM. These are not particularly
|
||||
# well-selected or well-tuned, but are the current set.
|
||||
#
|
||||
# Edit the definitions below to set the locations of the various directories,
|
||||
# as well as the test duration.
|
||||
# Run a series of tests under KVM. By default, this series is specified
|
||||
# by the relevant CFLIST file, but can be overridden by the --configs
|
||||
# command-line argument.
|
||||
#
|
||||
# Usage: kvm.sh [ options ]
|
||||
#
|
||||
|
@ -44,6 +42,7 @@ TORTURE_BOOT_IMAGE=""
|
|||
TORTURE_INITRD="$KVM/initrd"; export TORTURE_INITRD
|
||||
TORTURE_KCONFIG_ARG=""
|
||||
TORTURE_KMAKE_ARG=""
|
||||
TORTURE_QEMU_MEM=512
|
||||
TORTURE_SHUTDOWN_GRACE=180
|
||||
TORTURE_SUITE=rcu
|
||||
resdir=""
|
||||
|
@ -70,6 +69,7 @@ usage () {
|
|||
echo " --kconfig Kconfig-options"
|
||||
echo " --kmake-arg kernel-make-arguments"
|
||||
echo " --mac nn:nn:nn:nn:nn:nn"
|
||||
echo " --memory megabytes | nnnG"
|
||||
echo " --no-initrd"
|
||||
echo " --qemu-args qemu-arguments"
|
||||
echo " --qemu-cmd qemu-system-..."
|
||||
|
@ -147,6 +147,11 @@ do
|
|||
TORTURE_QEMU_MAC=$2
|
||||
shift
|
||||
;;
|
||||
--memory)
|
||||
checkarg --memory "(memory size)" $# "$2" '^[0-9]\+[MG]\?$' error
|
||||
TORTURE_QEMU_MEM=$2
|
||||
shift
|
||||
;;
|
||||
--no-initrd)
|
||||
TORTURE_INITRD=""; export TORTURE_INITRD
|
||||
;;
|
||||
|
@ -174,6 +179,12 @@ do
|
|||
checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\|rcuperf\)$' '^--'
|
||||
TORTURE_SUITE=$2
|
||||
shift
|
||||
if test "$TORTURE_SUITE" = rcuperf
|
||||
then
|
||||
# If you really want jitter for rcuperf, specify
|
||||
# it after specifying rcuperf. (But why?)
|
||||
jitter=0
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
echo Unknown argument $1
|
||||
|
@ -288,6 +299,7 @@ TORTURE_KMAKE_ARG="$TORTURE_KMAKE_ARG"; export TORTURE_KMAKE_ARG
|
|||
TORTURE_QEMU_CMD="$TORTURE_QEMU_CMD"; export TORTURE_QEMU_CMD
|
||||
TORTURE_QEMU_INTERACTIVE="$TORTURE_QEMU_INTERACTIVE"; export TORTURE_QEMU_INTERACTIVE
|
||||
TORTURE_QEMU_MAC="$TORTURE_QEMU_MAC"; export TORTURE_QEMU_MAC
|
||||
TORTURE_QEMU_MEM="$TORTURE_QEMU_MEM"; export TORTURE_QEMU_MEM
|
||||
TORTURE_SHUTDOWN_GRACE="$TORTURE_SHUTDOWN_GRACE"; export TORTURE_SHUTDOWN_GRACE
|
||||
TORTURE_SUITE="$TORTURE_SUITE"; export TORTURE_SUITE
|
||||
if ! test -e $resdir
|
||||
|
|
|
@ -9,5 +9,4 @@ CONFIG_PREEMPT=y
|
|||
CONFIG_HZ_PERIODIC=n
|
||||
CONFIG_NO_HZ_IDLE=n
|
||||
CONFIG_NO_HZ_FULL=y
|
||||
CONFIG_NO_HZ_FULL_ALL=y
|
||||
#CHECK#CONFIG_RCU_EXPERT=n
|
||||
|
|
|
@ -1 +1 @@
|
|||
rcutorture.torture_type=tasks
|
||||
rcutorture.torture_type=tasks nohz_full=1
|
||||
|
|
|
@ -7,7 +7,6 @@ CONFIG_PREEMPT=n
|
|||
CONFIG_HZ_PERIODIC=n
|
||||
CONFIG_NO_HZ_IDLE=n
|
||||
CONFIG_NO_HZ_FULL=y
|
||||
CONFIG_NO_HZ_FULL_ALL=y
|
||||
CONFIG_RCU_FAST_NO_HZ=y
|
||||
CONFIG_RCU_TRACE=y
|
||||
CONFIG_HOTPLUG_CPU=n
|
||||
|
|
|
@ -1 +1 @@
|
|||
rcutorture.torture_type=rcu_bh rcutree.rcu_fanout_leaf=4
|
||||
rcutorture.torture_type=rcu_bh rcutree.rcu_fanout_leaf=4 nohz_full=1-7
|
||||
|
|
|
@ -7,7 +7,6 @@ CONFIG_PREEMPT=n
|
|||
CONFIG_HZ_PERIODIC=n
|
||||
CONFIG_NO_HZ_IDLE=n
|
||||
CONFIG_NO_HZ_FULL=y
|
||||
CONFIG_NO_HZ_FULL_ALL=n
|
||||
CONFIG_RCU_FAST_NO_HZ=n
|
||||
CONFIG_RCU_TRACE=y
|
||||
CONFIG_HOTPLUG_CPU=y
|
||||
|
|
|
@ -20,32 +20,10 @@
|
|||
#
|
||||
# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
|
||||
|
||||
# rcuperf_param_nreaders bootparam-string
|
||||
#
|
||||
# Adds nreaders rcuperf module parameter if not already specified.
|
||||
rcuperf_param_nreaders () {
|
||||
if ! echo "$1" | grep -q "rcuperf.nreaders"
|
||||
then
|
||||
echo rcuperf.nreaders=-1
|
||||
fi
|
||||
}
|
||||
|
||||
# rcuperf_param_nwriters bootparam-string
|
||||
#
|
||||
# Adds nwriters rcuperf module parameter if not already specified.
|
||||
rcuperf_param_nwriters () {
|
||||
if ! echo "$1" | grep -q "rcuperf.nwriters"
|
||||
then
|
||||
echo rcuperf.nwriters=-1
|
||||
fi
|
||||
}
|
||||
|
||||
# per_version_boot_params bootparam-string config-file seconds
|
||||
#
|
||||
# Adds per-version torture-module parameters to kernels supporting them.
|
||||
per_version_boot_params () {
|
||||
echo $1 `rcuperf_param_nreaders "$1"` \
|
||||
`rcuperf_param_nwriters "$1"` \
|
||||
rcuperf.shutdown=1 \
|
||||
echo $1 rcuperf.shutdown=1 \
|
||||
rcuperf.verbose=1
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
This document describes one way to created the rcu-test-image file
|
||||
This document describes one way to create the rcu-test-image file
|
||||
that contains the filesystem used by the guest-OS kernel. There are
|
||||
probably much better ways of doing this, and this filesystem could no
|
||||
doubt be smaller. It is probably also possible to simply download
|
||||
|
|
Loading…
Reference in a new issue