rcu: Control RCU_FANOUT_LEAF from boot-time parameter

Although making RCU_FANOUT_LEAF a kernel configuration parameter rather
than a fixed constant makes it easier for people to decrease cache-miss
overhead for large systems, it is of little help for people who must
run a single pre-built kernel binary.

This commit therefore allows the value of RCU_FANOUT_LEAF to be
increased (but not decreased!) via a boot-time parameter named
rcutree.rcu_fanout_leaf.

Reported-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
This commit is contained in:
Paul E. McKenney 2012-04-23 15:52:53 -07:00
parent cba6d0d64e
commit f885b7f2b2
5 changed files with 104 additions and 25 deletions

View file

@ -2367,6 +2367,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
Set maximum number of finished RCU callbacks to process Set maximum number of finished RCU callbacks to process
in one batch. in one batch.
rcutree.fanout_leaf= [KNL,BOOT]
Increase the number of CPUs assigned to each
leaf rcu_node structure. Useful for very large
systems.
rcutree.qhimark= [KNL,BOOT] rcutree.qhimark= [KNL,BOOT]
Set threshold of queued Set threshold of queued
RCU callbacks over which batch limiting is disabled. RCU callbacks over which batch limiting is disabled.

View file

@ -60,17 +60,10 @@
/* Data structures. */ /* Data structures. */
static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
#define RCU_STATE_INITIALIZER(structname) { \ #define RCU_STATE_INITIALIZER(structname) { \
.level = { &structname##_state.node[0] }, \ .level = { &structname##_state.node[0] }, \
.levelcnt = { \
NUM_RCU_LVL_0, /* root of hierarchy. */ \
NUM_RCU_LVL_1, \
NUM_RCU_LVL_2, \
NUM_RCU_LVL_3, \
NUM_RCU_LVL_4, /* == MAX_RCU_LVLS */ \
}, \
.fqs_state = RCU_GP_IDLE, \ .fqs_state = RCU_GP_IDLE, \
.gpnum = -300, \ .gpnum = -300, \
.completed = -300, \ .completed = -300, \
@ -91,6 +84,19 @@ DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
static struct rcu_state *rcu_state; static struct rcu_state *rcu_state;
/* Increase (but not decrease) the CONFIG_RCU_FANOUT_LEAF at boot time. */
static int rcu_fanout_leaf = CONFIG_RCU_FANOUT_LEAF;
module_param(rcu_fanout_leaf, int, 0);
int rcu_num_lvls __read_mostly = RCU_NUM_LVLS;
static int num_rcu_lvl[] = { /* Number of rcu_nodes at specified level. */
NUM_RCU_LVL_0,
NUM_RCU_LVL_1,
NUM_RCU_LVL_2,
NUM_RCU_LVL_3,
NUM_RCU_LVL_4,
};
int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */
/* /*
* The rcu_scheduler_active variable transitions from zero to one just * The rcu_scheduler_active variable transitions from zero to one just
* before the first task is spawned. So when this variable is zero, RCU * before the first task is spawned. So when this variable is zero, RCU
@ -2574,9 +2580,9 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp)
{ {
int i; int i;
for (i = NUM_RCU_LVLS - 1; i > 0; i--) for (i = rcu_num_lvls - 1; i > 0; i--)
rsp->levelspread[i] = CONFIG_RCU_FANOUT; rsp->levelspread[i] = CONFIG_RCU_FANOUT;
rsp->levelspread[0] = CONFIG_RCU_FANOUT_LEAF; rsp->levelspread[0] = rcu_fanout_leaf;
} }
#else /* #ifdef CONFIG_RCU_FANOUT_EXACT */ #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */
static void __init rcu_init_levelspread(struct rcu_state *rsp) static void __init rcu_init_levelspread(struct rcu_state *rsp)
@ -2586,7 +2592,7 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp)
int i; int i;
cprv = NR_CPUS; cprv = NR_CPUS;
for (i = NUM_RCU_LVLS - 1; i >= 0; i--) { for (i = rcu_num_lvls - 1; i >= 0; i--) {
ccur = rsp->levelcnt[i]; ccur = rsp->levelcnt[i];
rsp->levelspread[i] = (cprv + ccur - 1) / ccur; rsp->levelspread[i] = (cprv + ccur - 1) / ccur;
cprv = ccur; cprv = ccur;
@ -2613,13 +2619,15 @@ static void __init rcu_init_one(struct rcu_state *rsp,
/* Initialize the level-tracking arrays. */ /* Initialize the level-tracking arrays. */
for (i = 1; i < NUM_RCU_LVLS; i++) for (i = 0; i < rcu_num_lvls; i++)
rsp->levelcnt[i] = num_rcu_lvl[i];
for (i = 1; i < rcu_num_lvls; i++)
rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1]; rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1];
rcu_init_levelspread(rsp); rcu_init_levelspread(rsp);
/* Initialize the elements themselves, starting from the leaves. */ /* Initialize the elements themselves, starting from the leaves. */
for (i = NUM_RCU_LVLS - 1; i >= 0; i--) { for (i = rcu_num_lvls - 1; i >= 0; i--) {
cpustride *= rsp->levelspread[i]; cpustride *= rsp->levelspread[i];
rnp = rsp->level[i]; rnp = rsp->level[i];
for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) { for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) {
@ -2649,7 +2657,7 @@ static void __init rcu_init_one(struct rcu_state *rsp,
} }
rsp->rda = rda; rsp->rda = rda;
rnp = rsp->level[NUM_RCU_LVLS - 1]; rnp = rsp->level[rcu_num_lvls - 1];
for_each_possible_cpu(i) { for_each_possible_cpu(i) {
while (i > rnp->grphi) while (i > rnp->grphi)
rnp++; rnp++;
@ -2658,11 +2666,72 @@ static void __init rcu_init_one(struct rcu_state *rsp,
} }
} }
/*
* Compute the rcu_node tree geometry from kernel parameters. This cannot
* replace the definitions in rcutree.h because those are needed to size
* the ->node array in the rcu_state structure.
*/
static void __init rcu_init_geometry(void)
{
int i;
int j;
int n = NR_CPUS;
int rcu_capacity[MAX_RCU_LVLS + 1];
/* If the compile-time values are accurate, just leave. */
if (rcu_fanout_leaf == CONFIG_RCU_FANOUT_LEAF)
return;
/*
* Compute number of nodes that can be handled an rcu_node tree
* with the given number of levels. Setting rcu_capacity[0] makes
* some of the arithmetic easier.
*/
rcu_capacity[0] = 1;
rcu_capacity[1] = rcu_fanout_leaf;
for (i = 2; i <= MAX_RCU_LVLS; i++)
rcu_capacity[i] = rcu_capacity[i - 1] * CONFIG_RCU_FANOUT;
/*
* The boot-time rcu_fanout_leaf parameter is only permitted
* to increase the leaf-level fanout, not decrease it. Of course,
* the leaf-level fanout cannot exceed the number of bits in
* the rcu_node masks. Finally, the tree must be able to accommodate
* the configured number of CPUs. Complain and fall back to the
* compile-time values if these limits are exceeded.
*/
if (rcu_fanout_leaf < CONFIG_RCU_FANOUT_LEAF ||
rcu_fanout_leaf > sizeof(unsigned long) * 8 ||
n > rcu_capacity[MAX_RCU_LVLS]) {
WARN_ON(1);
return;
}
/* Calculate the number of rcu_nodes at each level of the tree. */
for (i = 1; i <= MAX_RCU_LVLS; i++)
if (n <= rcu_capacity[i]) {
for (j = 0; j <= i; j++)
num_rcu_lvl[j] =
DIV_ROUND_UP(n, rcu_capacity[i - j]);
rcu_num_lvls = i;
for (j = i + 1; j <= MAX_RCU_LVLS; j++)
num_rcu_lvl[j] = 0;
break;
}
/* Calculate the total number of rcu_node structures. */
rcu_num_nodes = 0;
for (i = 0; i <= MAX_RCU_LVLS; i++)
rcu_num_nodes += num_rcu_lvl[i];
rcu_num_nodes -= n;
}
void __init rcu_init(void) void __init rcu_init(void)
{ {
int cpu; int cpu;
rcu_bootup_announce(); rcu_bootup_announce();
rcu_init_geometry();
rcu_init_one(&rcu_sched_state, &rcu_sched_data); rcu_init_one(&rcu_sched_state, &rcu_sched_data);
rcu_init_one(&rcu_bh_state, &rcu_bh_data); rcu_init_one(&rcu_bh_state, &rcu_bh_data);
__rcu_init_preempt(); __rcu_init_preempt();

View file

@ -42,28 +42,28 @@
#define RCU_FANOUT_4 (RCU_FANOUT_3 * CONFIG_RCU_FANOUT) #define RCU_FANOUT_4 (RCU_FANOUT_3 * CONFIG_RCU_FANOUT)
#if NR_CPUS <= RCU_FANOUT_1 #if NR_CPUS <= RCU_FANOUT_1
# define NUM_RCU_LVLS 1 # define RCU_NUM_LVLS 1
# define NUM_RCU_LVL_0 1 # define NUM_RCU_LVL_0 1
# define NUM_RCU_LVL_1 (NR_CPUS) # define NUM_RCU_LVL_1 (NR_CPUS)
# define NUM_RCU_LVL_2 0 # define NUM_RCU_LVL_2 0
# define NUM_RCU_LVL_3 0 # define NUM_RCU_LVL_3 0
# define NUM_RCU_LVL_4 0 # define NUM_RCU_LVL_4 0
#elif NR_CPUS <= RCU_FANOUT_2 #elif NR_CPUS <= RCU_FANOUT_2
# define NUM_RCU_LVLS 2 # define RCU_NUM_LVLS 2
# define NUM_RCU_LVL_0 1 # define NUM_RCU_LVL_0 1
# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
# define NUM_RCU_LVL_2 (NR_CPUS) # define NUM_RCU_LVL_2 (NR_CPUS)
# define NUM_RCU_LVL_3 0 # define NUM_RCU_LVL_3 0
# define NUM_RCU_LVL_4 0 # define NUM_RCU_LVL_4 0
#elif NR_CPUS <= RCU_FANOUT_3 #elif NR_CPUS <= RCU_FANOUT_3
# define NUM_RCU_LVLS 3 # define RCU_NUM_LVLS 3
# define NUM_RCU_LVL_0 1 # define NUM_RCU_LVL_0 1
# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2) # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
# define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) # define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
# define NUM_RCU_LVL_3 (NR_CPUS) # define NUM_RCU_LVL_3 (NR_CPUS)
# define NUM_RCU_LVL_4 0 # define NUM_RCU_LVL_4 0
#elif NR_CPUS <= RCU_FANOUT_4 #elif NR_CPUS <= RCU_FANOUT_4
# define NUM_RCU_LVLS 4 # define RCU_NUM_LVLS 4
# define NUM_RCU_LVL_0 1 # define NUM_RCU_LVL_0 1
# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3) # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3)
# define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2) # define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
@ -76,6 +76,9 @@
#define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3 + NUM_RCU_LVL_4) #define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3 + NUM_RCU_LVL_4)
#define NUM_RCU_NODES (RCU_SUM - NR_CPUS) #define NUM_RCU_NODES (RCU_SUM - NR_CPUS)
extern int rcu_num_lvls;
extern int rcu_num_nodes;
/* /*
* Dynticks per-CPU state. * Dynticks per-CPU state.
*/ */
@ -206,7 +209,7 @@ struct rcu_node {
*/ */
#define rcu_for_each_node_breadth_first(rsp, rnp) \ #define rcu_for_each_node_breadth_first(rsp, rnp) \
for ((rnp) = &(rsp)->node[0]; \ for ((rnp) = &(rsp)->node[0]; \
(rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++) (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++)
/* /*
* Do a breadth-first scan of the non-leaf rcu_node structures for the * Do a breadth-first scan of the non-leaf rcu_node structures for the
@ -215,7 +218,7 @@ struct rcu_node {
*/ */
#define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \ #define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \
for ((rnp) = &(rsp)->node[0]; \ for ((rnp) = &(rsp)->node[0]; \
(rnp) < (rsp)->level[NUM_RCU_LVLS - 1]; (rnp)++) (rnp) < (rsp)->level[rcu_num_lvls - 1]; (rnp)++)
/* /*
* Scan the leaves of the rcu_node hierarchy for the specified rcu_state * Scan the leaves of the rcu_node hierarchy for the specified rcu_state
@ -224,8 +227,8 @@ struct rcu_node {
* It is still a leaf node, even if it is also the root node. * It is still a leaf node, even if it is also the root node.
*/ */
#define rcu_for_each_leaf_node(rsp, rnp) \ #define rcu_for_each_leaf_node(rsp, rnp) \
for ((rnp) = (rsp)->level[NUM_RCU_LVLS - 1]; \ for ((rnp) = (rsp)->level[rcu_num_lvls - 1]; \
(rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++) (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++)
/* Index values for nxttail array in struct rcu_data. */ /* Index values for nxttail array in struct rcu_data. */
#define RCU_DONE_TAIL 0 /* Also RCU_WAIT head. */ #define RCU_DONE_TAIL 0 /* Also RCU_WAIT head. */
@ -357,9 +360,9 @@ do { \
*/ */
struct rcu_state { struct rcu_state {
struct rcu_node node[NUM_RCU_NODES]; /* Hierarchy. */ struct rcu_node node[NUM_RCU_NODES]; /* Hierarchy. */
struct rcu_node *level[NUM_RCU_LVLS]; /* Hierarchy levels. */ struct rcu_node *level[RCU_NUM_LVLS]; /* Hierarchy levels. */
u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */ u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */
u8 levelspread[NUM_RCU_LVLS]; /* kids/node in each level. */ u8 levelspread[RCU_NUM_LVLS]; /* kids/node in each level. */
struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */ struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */
/* The following fields are guarded by the root rcu_node's lock. */ /* The following fields are guarded by the root rcu_node's lock. */

View file

@ -70,6 +70,8 @@ static void __init rcu_bootup_announce_oddness(void)
#if NUM_RCU_LVL_4 != 0 #if NUM_RCU_LVL_4 != 0
printk(KERN_INFO "\tExperimental four-level hierarchy is enabled.\n"); printk(KERN_INFO "\tExperimental four-level hierarchy is enabled.\n");
#endif #endif
if (rcu_fanout_leaf != CONFIG_RCU_FANOUT_LEAF)
printk(KERN_INFO "\tExperimental boot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf);
} }
#ifdef CONFIG_TREE_PREEMPT_RCU #ifdef CONFIG_TREE_PREEMPT_RCU

View file

@ -278,7 +278,7 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
rsp->n_force_qs, rsp->n_force_qs_ngp, rsp->n_force_qs, rsp->n_force_qs_ngp,
rsp->n_force_qs - rsp->n_force_qs_ngp, rsp->n_force_qs - rsp->n_force_qs_ngp,
rsp->n_force_qs_lh, rsp->qlen_lazy, rsp->qlen); rsp->n_force_qs_lh, rsp->qlen_lazy, rsp->qlen);
for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < NUM_RCU_NODES; rnp++) { for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < rcu_num_nodes; rnp++) {
if (rnp->level != level) { if (rnp->level != level) {
seq_puts(m, "\n"); seq_puts(m, "\n");
level = rnp->level; level = rnp->level;