memcg: cpu hotplug aware percpu count updates
Now, memcgroup's per cpu coutner uses for_each_possible_cpu() to get the value. It's better to use for_each_online_cpu() and a cpu hotplug handler. This patch only handles statistics counter. MEM_CGROUP_ON_MOVE will be handled in another patch. Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Balbir Singh <balbir@in.ibm.com> Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
7d74b06f24
commit
711d3d2c9b
1 changed files with 93 additions and 9 deletions
|
@ -89,7 +89,9 @@ enum mem_cgroup_stat_index {
|
||||||
MEM_CGROUP_STAT_PGPGIN_COUNT, /* # of pages paged in */
|
MEM_CGROUP_STAT_PGPGIN_COUNT, /* # of pages paged in */
|
||||||
MEM_CGROUP_STAT_PGPGOUT_COUNT, /* # of pages paged out */
|
MEM_CGROUP_STAT_PGPGOUT_COUNT, /* # of pages paged out */
|
||||||
MEM_CGROUP_STAT_SWAPOUT, /* # of pages, swapped out */
|
MEM_CGROUP_STAT_SWAPOUT, /* # of pages, swapped out */
|
||||||
MEM_CGROUP_EVENTS, /* incremented at every pagein/pageout */
|
MEM_CGROUP_STAT_DATA, /* end of data requires synchronization */
|
||||||
|
/* incremented at every pagein/pageout */
|
||||||
|
MEM_CGROUP_EVENTS = MEM_CGROUP_STAT_DATA,
|
||||||
MEM_CGROUP_ON_MOVE, /* someone is moving account between groups */
|
MEM_CGROUP_ON_MOVE, /* someone is moving account between groups */
|
||||||
|
|
||||||
MEM_CGROUP_STAT_NSTATS,
|
MEM_CGROUP_STAT_NSTATS,
|
||||||
|
@ -255,6 +257,12 @@ struct mem_cgroup {
|
||||||
* percpu counter.
|
* percpu counter.
|
||||||
*/
|
*/
|
||||||
struct mem_cgroup_stat_cpu *stat;
|
struct mem_cgroup_stat_cpu *stat;
|
||||||
|
/*
|
||||||
|
* used when a cpu is offlined or other synchronizations
|
||||||
|
* See mem_cgroup_read_stat().
|
||||||
|
*/
|
||||||
|
struct mem_cgroup_stat_cpu nocpu_base;
|
||||||
|
spinlock_t pcp_counter_lock;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Stuffs for move charges at task migration. */
|
/* Stuffs for move charges at task migration. */
|
||||||
|
@ -531,14 +539,40 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
|
||||||
return mz;
|
return mz;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Implementation Note: reading percpu statistics for memcg.
|
||||||
|
*
|
||||||
|
* Both of vmstat[] and percpu_counter has threshold and do periodic
|
||||||
|
* synchronization to implement "quick" read. There are trade-off between
|
||||||
|
* reading cost and precision of value. Then, we may have a chance to implement
|
||||||
|
* a periodic synchronizion of counter in memcg's counter.
|
||||||
|
*
|
||||||
|
* But this _read() function is used for user interface now. The user accounts
|
||||||
|
* memory usage by memory cgroup and he _always_ requires exact value because
|
||||||
|
* he accounts memory. Even if we provide quick-and-fuzzy read, we always
|
||||||
|
* have to visit all online cpus and make sum. So, for now, unnecessary
|
||||||
|
* synchronization is not implemented. (just implemented for cpu hotplug)
|
||||||
|
*
|
||||||
|
* If there are kernel internal actions which can make use of some not-exact
|
||||||
|
* value, and reading all cpu value can be performance bottleneck in some
|
||||||
|
* common workload, threashold and synchonization as vmstat[] should be
|
||||||
|
* implemented.
|
||||||
|
*/
|
||||||
static s64 mem_cgroup_read_stat(struct mem_cgroup *mem,
|
static s64 mem_cgroup_read_stat(struct mem_cgroup *mem,
|
||||||
enum mem_cgroup_stat_index idx)
|
enum mem_cgroup_stat_index idx)
|
||||||
{
|
{
|
||||||
int cpu;
|
int cpu;
|
||||||
s64 val = 0;
|
s64 val = 0;
|
||||||
|
|
||||||
for_each_possible_cpu(cpu)
|
get_online_cpus();
|
||||||
|
for_each_online_cpu(cpu)
|
||||||
val += per_cpu(mem->stat->count[idx], cpu);
|
val += per_cpu(mem->stat->count[idx], cpu);
|
||||||
|
#ifdef CONFIG_HOTPLUG_CPU
|
||||||
|
spin_lock(&mem->pcp_counter_lock);
|
||||||
|
val += mem->nocpu_base.count[idx];
|
||||||
|
spin_unlock(&mem->pcp_counter_lock);
|
||||||
|
#endif
|
||||||
|
put_online_cpus();
|
||||||
return val;
|
return val;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -663,9 +697,28 @@ static struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
|
||||||
/* The caller has to guarantee "mem" exists before calling this */
|
/* The caller has to guarantee "mem" exists before calling this */
|
||||||
static struct mem_cgroup *mem_cgroup_start_loop(struct mem_cgroup *mem)
|
static struct mem_cgroup *mem_cgroup_start_loop(struct mem_cgroup *mem)
|
||||||
{
|
{
|
||||||
if (mem && css_tryget(&mem->css))
|
struct cgroup_subsys_state *css;
|
||||||
|
int found;
|
||||||
|
|
||||||
|
if (!mem) /* ROOT cgroup has the smallest ID */
|
||||||
|
return root_mem_cgroup; /*css_put/get against root is ignored*/
|
||||||
|
if (!mem->use_hierarchy) {
|
||||||
|
if (css_tryget(&mem->css))
|
||||||
return mem;
|
return mem;
|
||||||
return NULL;
|
return NULL;
|
||||||
|
}
|
||||||
|
rcu_read_lock();
|
||||||
|
/*
|
||||||
|
* searching a memory cgroup which has the smallest ID under given
|
||||||
|
* ROOT cgroup. (ID >= 1)
|
||||||
|
*/
|
||||||
|
css = css_get_next(&mem_cgroup_subsys, 1, &mem->css, &found);
|
||||||
|
if (css && css_tryget(css))
|
||||||
|
mem = container_of(css, struct mem_cgroup, css);
|
||||||
|
else
|
||||||
|
mem = NULL;
|
||||||
|
rcu_read_unlock();
|
||||||
|
return mem;
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct mem_cgroup *mem_cgroup_get_next(struct mem_cgroup *iter,
|
static struct mem_cgroup *mem_cgroup_get_next(struct mem_cgroup *iter,
|
||||||
|
@ -680,9 +733,13 @@ static struct mem_cgroup *mem_cgroup_get_next(struct mem_cgroup *iter,
|
||||||
hierarchy_used = iter->use_hierarchy;
|
hierarchy_used = iter->use_hierarchy;
|
||||||
|
|
||||||
css_put(&iter->css);
|
css_put(&iter->css);
|
||||||
if (!cond || !hierarchy_used)
|
/* If no ROOT, walk all, ignore hierarchy */
|
||||||
|
if (!cond || (root && !hierarchy_used))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
if (!root)
|
||||||
|
root = root_mem_cgroup;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
iter = NULL;
|
iter = NULL;
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
|
@ -711,6 +768,9 @@ static struct mem_cgroup *mem_cgroup_get_next(struct mem_cgroup *iter,
|
||||||
#define for_each_mem_cgroup_tree(iter, root) \
|
#define for_each_mem_cgroup_tree(iter, root) \
|
||||||
for_each_mem_cgroup_tree_cond(iter, root, true)
|
for_each_mem_cgroup_tree_cond(iter, root, true)
|
||||||
|
|
||||||
|
#define for_each_mem_cgroup_all(iter) \
|
||||||
|
for_each_mem_cgroup_tree_cond(iter, NULL, true)
|
||||||
|
|
||||||
|
|
||||||
static inline bool mem_cgroup_is_root(struct mem_cgroup *mem)
|
static inline bool mem_cgroup_is_root(struct mem_cgroup *mem)
|
||||||
{
|
{
|
||||||
|
@ -1676,15 +1736,38 @@ static void drain_all_stock_sync(void)
|
||||||
atomic_dec(&memcg_drain_count);
|
atomic_dec(&memcg_drain_count);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int __cpuinit memcg_stock_cpu_callback(struct notifier_block *nb,
|
/*
|
||||||
|
* This function drains percpu counter value from DEAD cpu and
|
||||||
|
* move it to local cpu. Note that this function can be preempted.
|
||||||
|
*/
|
||||||
|
static void mem_cgroup_drain_pcp_counter(struct mem_cgroup *mem, int cpu)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
spin_lock(&mem->pcp_counter_lock);
|
||||||
|
for (i = 0; i < MEM_CGROUP_STAT_DATA; i++) {
|
||||||
|
s64 x = per_cpu(mem->stat->count[i], cpu);
|
||||||
|
|
||||||
|
per_cpu(mem->stat->count[i], cpu) = 0;
|
||||||
|
mem->nocpu_base.count[i] += x;
|
||||||
|
}
|
||||||
|
spin_unlock(&mem->pcp_counter_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int __cpuinit memcg_cpu_hotplug_callback(struct notifier_block *nb,
|
||||||
unsigned long action,
|
unsigned long action,
|
||||||
void *hcpu)
|
void *hcpu)
|
||||||
{
|
{
|
||||||
int cpu = (unsigned long)hcpu;
|
int cpu = (unsigned long)hcpu;
|
||||||
struct memcg_stock_pcp *stock;
|
struct memcg_stock_pcp *stock;
|
||||||
|
struct mem_cgroup *iter;
|
||||||
|
|
||||||
if (action != CPU_DEAD)
|
if ((action != CPU_DEAD) || action != CPU_DEAD_FROZEN)
|
||||||
return NOTIFY_OK;
|
return NOTIFY_OK;
|
||||||
|
|
||||||
|
for_each_mem_cgroup_all(iter)
|
||||||
|
mem_cgroup_drain_pcp_counter(iter, cpu);
|
||||||
|
|
||||||
stock = &per_cpu(memcg_stock, cpu);
|
stock = &per_cpu(memcg_stock, cpu);
|
||||||
drain_stock(stock);
|
drain_stock(stock);
|
||||||
return NOTIFY_OK;
|
return NOTIFY_OK;
|
||||||
|
@ -4098,6 +4181,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
|
||||||
vfree(mem);
|
vfree(mem);
|
||||||
mem = NULL;
|
mem = NULL;
|
||||||
}
|
}
|
||||||
|
spin_lock_init(&mem->pcp_counter_lock);
|
||||||
return mem;
|
return mem;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4224,7 +4308,7 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
|
||||||
&per_cpu(memcg_stock, cpu);
|
&per_cpu(memcg_stock, cpu);
|
||||||
INIT_WORK(&stock->work, drain_local_stock);
|
INIT_WORK(&stock->work, drain_local_stock);
|
||||||
}
|
}
|
||||||
hotcpu_notifier(memcg_stock_cpu_callback, 0);
|
hotcpu_notifier(memcg_cpu_hotplug_callback, 0);
|
||||||
} else {
|
} else {
|
||||||
parent = mem_cgroup_from_cont(cont->parent);
|
parent = mem_cgroup_from_cont(cont->parent);
|
||||||
mem->use_hierarchy = parent->use_hierarchy;
|
mem->use_hierarchy = parent->use_hierarchy;
|
||||||
|
|
Loading…
Reference in a new issue