rcu: Control RCU_FANOUT_LEAF from boot-time parameter
Although making RCU_FANOUT_LEAF a kernel configuration parameter rather than a fixed constant makes it easier for people to decrease cache-miss overhead for large systems, it is of little help for people who must run a single pre-built kernel binary. This commit therefore allows the value of RCU_FANOUT_LEAF to be increased (but not decreased!) via a boot-time parameter named rcutree.rcu_fanout_leaf. Reported-by: Mike Galbraith <efault@gmx.de> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
This commit is contained in:
parent
cba6d0d64e
commit
f885b7f2b2
5 changed files with 104 additions and 25 deletions
|
@ -2367,6 +2367,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
|
|||
Set maximum number of finished RCU callbacks to process
|
||||
in one batch.
|
||||
|
||||
rcutree.fanout_leaf= [KNL,BOOT]
|
||||
Increase the number of CPUs assigned to each
|
||||
leaf rcu_node structure. Useful for very large
|
||||
systems.
|
||||
|
||||
rcutree.qhimark= [KNL,BOOT]
|
||||
Set threshold of queued
|
||||
RCU callbacks over which batch limiting is disabled.
|
||||
|
|
|
@ -60,17 +60,10 @@
|
|||
|
||||
/* Data structures. */
|
||||
|
||||
static struct lock_class_key rcu_node_class[NUM_RCU_LVLS];
|
||||
static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
|
||||
|
||||
#define RCU_STATE_INITIALIZER(structname) { \
|
||||
.level = { &structname##_state.node[0] }, \
|
||||
.levelcnt = { \
|
||||
NUM_RCU_LVL_0, /* root of hierarchy. */ \
|
||||
NUM_RCU_LVL_1, \
|
||||
NUM_RCU_LVL_2, \
|
||||
NUM_RCU_LVL_3, \
|
||||
NUM_RCU_LVL_4, /* == MAX_RCU_LVLS */ \
|
||||
}, \
|
||||
.fqs_state = RCU_GP_IDLE, \
|
||||
.gpnum = -300, \
|
||||
.completed = -300, \
|
||||
|
@ -91,6 +84,19 @@ DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
|
|||
|
||||
static struct rcu_state *rcu_state;
|
||||
|
||||
/* Increase (but not decrease) the CONFIG_RCU_FANOUT_LEAF at boot time. */
|
||||
static int rcu_fanout_leaf = CONFIG_RCU_FANOUT_LEAF;
|
||||
module_param(rcu_fanout_leaf, int, 0);
|
||||
int rcu_num_lvls __read_mostly = RCU_NUM_LVLS;
|
||||
static int num_rcu_lvl[] = { /* Number of rcu_nodes at specified level. */
|
||||
NUM_RCU_LVL_0,
|
||||
NUM_RCU_LVL_1,
|
||||
NUM_RCU_LVL_2,
|
||||
NUM_RCU_LVL_3,
|
||||
NUM_RCU_LVL_4,
|
||||
};
|
||||
int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */
|
||||
|
||||
/*
|
||||
* The rcu_scheduler_active variable transitions from zero to one just
|
||||
* before the first task is spawned. So when this variable is zero, RCU
|
||||
|
@ -2574,9 +2580,9 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp)
|
|||
{
|
||||
int i;
|
||||
|
||||
for (i = NUM_RCU_LVLS - 1; i > 0; i--)
|
||||
for (i = rcu_num_lvls - 1; i > 0; i--)
|
||||
rsp->levelspread[i] = CONFIG_RCU_FANOUT;
|
||||
rsp->levelspread[0] = CONFIG_RCU_FANOUT_LEAF;
|
||||
rsp->levelspread[0] = rcu_fanout_leaf;
|
||||
}
|
||||
#else /* #ifdef CONFIG_RCU_FANOUT_EXACT */
|
||||
static void __init rcu_init_levelspread(struct rcu_state *rsp)
|
||||
|
@ -2586,7 +2592,7 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp)
|
|||
int i;
|
||||
|
||||
cprv = NR_CPUS;
|
||||
for (i = NUM_RCU_LVLS - 1; i >= 0; i--) {
|
||||
for (i = rcu_num_lvls - 1; i >= 0; i--) {
|
||||
ccur = rsp->levelcnt[i];
|
||||
rsp->levelspread[i] = (cprv + ccur - 1) / ccur;
|
||||
cprv = ccur;
|
||||
|
@ -2613,13 +2619,15 @@ static void __init rcu_init_one(struct rcu_state *rsp,
|
|||
|
||||
/* Initialize the level-tracking arrays. */
|
||||
|
||||
for (i = 1; i < NUM_RCU_LVLS; i++)
|
||||
for (i = 0; i < rcu_num_lvls; i++)
|
||||
rsp->levelcnt[i] = num_rcu_lvl[i];
|
||||
for (i = 1; i < rcu_num_lvls; i++)
|
||||
rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1];
|
||||
rcu_init_levelspread(rsp);
|
||||
|
||||
/* Initialize the elements themselves, starting from the leaves. */
|
||||
|
||||
for (i = NUM_RCU_LVLS - 1; i >= 0; i--) {
|
||||
for (i = rcu_num_lvls - 1; i >= 0; i--) {
|
||||
cpustride *= rsp->levelspread[i];
|
||||
rnp = rsp->level[i];
|
||||
for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) {
|
||||
|
@ -2649,7 +2657,7 @@ static void __init rcu_init_one(struct rcu_state *rsp,
|
|||
}
|
||||
|
||||
rsp->rda = rda;
|
||||
rnp = rsp->level[NUM_RCU_LVLS - 1];
|
||||
rnp = rsp->level[rcu_num_lvls - 1];
|
||||
for_each_possible_cpu(i) {
|
||||
while (i > rnp->grphi)
|
||||
rnp++;
|
||||
|
@ -2658,11 +2666,72 @@ static void __init rcu_init_one(struct rcu_state *rsp,
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Compute the rcu_node tree geometry from kernel parameters. This cannot
|
||||
* replace the definitions in rcutree.h because those are needed to size
|
||||
* the ->node array in the rcu_state structure.
|
||||
*/
|
||||
static void __init rcu_init_geometry(void)
|
||||
{
|
||||
int i;
|
||||
int j;
|
||||
int n = NR_CPUS;
|
||||
int rcu_capacity[MAX_RCU_LVLS + 1];
|
||||
|
||||
/* If the compile-time values are accurate, just leave. */
|
||||
if (rcu_fanout_leaf == CONFIG_RCU_FANOUT_LEAF)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Compute number of nodes that can be handled an rcu_node tree
|
||||
* with the given number of levels. Setting rcu_capacity[0] makes
|
||||
* some of the arithmetic easier.
|
||||
*/
|
||||
rcu_capacity[0] = 1;
|
||||
rcu_capacity[1] = rcu_fanout_leaf;
|
||||
for (i = 2; i <= MAX_RCU_LVLS; i++)
|
||||
rcu_capacity[i] = rcu_capacity[i - 1] * CONFIG_RCU_FANOUT;
|
||||
|
||||
/*
|
||||
* The boot-time rcu_fanout_leaf parameter is only permitted
|
||||
* to increase the leaf-level fanout, not decrease it. Of course,
|
||||
* the leaf-level fanout cannot exceed the number of bits in
|
||||
* the rcu_node masks. Finally, the tree must be able to accommodate
|
||||
* the configured number of CPUs. Complain and fall back to the
|
||||
* compile-time values if these limits are exceeded.
|
||||
*/
|
||||
if (rcu_fanout_leaf < CONFIG_RCU_FANOUT_LEAF ||
|
||||
rcu_fanout_leaf > sizeof(unsigned long) * 8 ||
|
||||
n > rcu_capacity[MAX_RCU_LVLS]) {
|
||||
WARN_ON(1);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Calculate the number of rcu_nodes at each level of the tree. */
|
||||
for (i = 1; i <= MAX_RCU_LVLS; i++)
|
||||
if (n <= rcu_capacity[i]) {
|
||||
for (j = 0; j <= i; j++)
|
||||
num_rcu_lvl[j] =
|
||||
DIV_ROUND_UP(n, rcu_capacity[i - j]);
|
||||
rcu_num_lvls = i;
|
||||
for (j = i + 1; j <= MAX_RCU_LVLS; j++)
|
||||
num_rcu_lvl[j] = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Calculate the total number of rcu_node structures. */
|
||||
rcu_num_nodes = 0;
|
||||
for (i = 0; i <= MAX_RCU_LVLS; i++)
|
||||
rcu_num_nodes += num_rcu_lvl[i];
|
||||
rcu_num_nodes -= n;
|
||||
}
|
||||
|
||||
void __init rcu_init(void)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
rcu_bootup_announce();
|
||||
rcu_init_geometry();
|
||||
rcu_init_one(&rcu_sched_state, &rcu_sched_data);
|
||||
rcu_init_one(&rcu_bh_state, &rcu_bh_data);
|
||||
__rcu_init_preempt();
|
||||
|
|
|
@ -42,28 +42,28 @@
|
|||
#define RCU_FANOUT_4 (RCU_FANOUT_3 * CONFIG_RCU_FANOUT)
|
||||
|
||||
#if NR_CPUS <= RCU_FANOUT_1
|
||||
# define NUM_RCU_LVLS 1
|
||||
# define RCU_NUM_LVLS 1
|
||||
# define NUM_RCU_LVL_0 1
|
||||
# define NUM_RCU_LVL_1 (NR_CPUS)
|
||||
# define NUM_RCU_LVL_2 0
|
||||
# define NUM_RCU_LVL_3 0
|
||||
# define NUM_RCU_LVL_4 0
|
||||
#elif NR_CPUS <= RCU_FANOUT_2
|
||||
# define NUM_RCU_LVLS 2
|
||||
# define RCU_NUM_LVLS 2
|
||||
# define NUM_RCU_LVL_0 1
|
||||
# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
|
||||
# define NUM_RCU_LVL_2 (NR_CPUS)
|
||||
# define NUM_RCU_LVL_3 0
|
||||
# define NUM_RCU_LVL_4 0
|
||||
#elif NR_CPUS <= RCU_FANOUT_3
|
||||
# define NUM_RCU_LVLS 3
|
||||
# define RCU_NUM_LVLS 3
|
||||
# define NUM_RCU_LVL_0 1
|
||||
# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
|
||||
# define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
|
||||
# define NUM_RCU_LVL_3 (NR_CPUS)
|
||||
# define NUM_RCU_LVL_4 0
|
||||
#elif NR_CPUS <= RCU_FANOUT_4
|
||||
# define NUM_RCU_LVLS 4
|
||||
# define RCU_NUM_LVLS 4
|
||||
# define NUM_RCU_LVL_0 1
|
||||
# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3)
|
||||
# define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
|
||||
|
@ -76,6 +76,9 @@
|
|||
#define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3 + NUM_RCU_LVL_4)
|
||||
#define NUM_RCU_NODES (RCU_SUM - NR_CPUS)
|
||||
|
||||
extern int rcu_num_lvls;
|
||||
extern int rcu_num_nodes;
|
||||
|
||||
/*
|
||||
* Dynticks per-CPU state.
|
||||
*/
|
||||
|
@ -206,7 +209,7 @@ struct rcu_node {
|
|||
*/
|
||||
#define rcu_for_each_node_breadth_first(rsp, rnp) \
|
||||
for ((rnp) = &(rsp)->node[0]; \
|
||||
(rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++)
|
||||
(rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++)
|
||||
|
||||
/*
|
||||
* Do a breadth-first scan of the non-leaf rcu_node structures for the
|
||||
|
@ -215,7 +218,7 @@ struct rcu_node {
|
|||
*/
|
||||
#define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \
|
||||
for ((rnp) = &(rsp)->node[0]; \
|
||||
(rnp) < (rsp)->level[NUM_RCU_LVLS - 1]; (rnp)++)
|
||||
(rnp) < (rsp)->level[rcu_num_lvls - 1]; (rnp)++)
|
||||
|
||||
/*
|
||||
* Scan the leaves of the rcu_node hierarchy for the specified rcu_state
|
||||
|
@ -224,8 +227,8 @@ struct rcu_node {
|
|||
* It is still a leaf node, even if it is also the root node.
|
||||
*/
|
||||
#define rcu_for_each_leaf_node(rsp, rnp) \
|
||||
for ((rnp) = (rsp)->level[NUM_RCU_LVLS - 1]; \
|
||||
(rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++)
|
||||
for ((rnp) = (rsp)->level[rcu_num_lvls - 1]; \
|
||||
(rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++)
|
||||
|
||||
/* Index values for nxttail array in struct rcu_data. */
|
||||
#define RCU_DONE_TAIL 0 /* Also RCU_WAIT head. */
|
||||
|
@ -357,9 +360,9 @@ do { \
|
|||
*/
|
||||
struct rcu_state {
|
||||
struct rcu_node node[NUM_RCU_NODES]; /* Hierarchy. */
|
||||
struct rcu_node *level[NUM_RCU_LVLS]; /* Hierarchy levels. */
|
||||
struct rcu_node *level[RCU_NUM_LVLS]; /* Hierarchy levels. */
|
||||
u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */
|
||||
u8 levelspread[NUM_RCU_LVLS]; /* kids/node in each level. */
|
||||
u8 levelspread[RCU_NUM_LVLS]; /* kids/node in each level. */
|
||||
struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */
|
||||
|
||||
/* The following fields are guarded by the root rcu_node's lock. */
|
||||
|
|
|
@ -70,6 +70,8 @@ static void __init rcu_bootup_announce_oddness(void)
|
|||
#if NUM_RCU_LVL_4 != 0
|
||||
printk(KERN_INFO "\tExperimental four-level hierarchy is enabled.\n");
|
||||
#endif
|
||||
if (rcu_fanout_leaf != CONFIG_RCU_FANOUT_LEAF)
|
||||
printk(KERN_INFO "\tExperimental boot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_TREE_PREEMPT_RCU
|
||||
|
|
|
@ -278,7 +278,7 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
|
|||
rsp->n_force_qs, rsp->n_force_qs_ngp,
|
||||
rsp->n_force_qs - rsp->n_force_qs_ngp,
|
||||
rsp->n_force_qs_lh, rsp->qlen_lazy, rsp->qlen);
|
||||
for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < NUM_RCU_NODES; rnp++) {
|
||||
for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < rcu_num_nodes; rnp++) {
|
||||
if (rnp->level != level) {
|
||||
seq_puts(m, "\n");
|
||||
level = rnp->level;
|
||||
|
|
Loading…
Reference in a new issue