memcg: cpu hotplug aware percpu count updates
Now, memcgroup's per cpu coutner uses for_each_possible_cpu() to get the value. It's better to use for_each_online_cpu() and a cpu hotplug handler. This patch only handles statistics counter. MEM_CGROUP_ON_MOVE will be handled in another patch. Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Balbir Singh <balbir@in.ibm.com> Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
7d74b06f24
commit
711d3d2c9b
1 changed files with 93 additions and 9 deletions
102
mm/memcontrol.c
102
mm/memcontrol.c
|
@ -89,7 +89,9 @@ enum mem_cgroup_stat_index {
|
|||
MEM_CGROUP_STAT_PGPGIN_COUNT, /* # of pages paged in */
|
||||
MEM_CGROUP_STAT_PGPGOUT_COUNT, /* # of pages paged out */
|
||||
MEM_CGROUP_STAT_SWAPOUT, /* # of pages, swapped out */
|
||||
MEM_CGROUP_EVENTS, /* incremented at every pagein/pageout */
|
||||
MEM_CGROUP_STAT_DATA, /* end of data requires synchronization */
|
||||
/* incremented at every pagein/pageout */
|
||||
MEM_CGROUP_EVENTS = MEM_CGROUP_STAT_DATA,
|
||||
MEM_CGROUP_ON_MOVE, /* someone is moving account between groups */
|
||||
|
||||
MEM_CGROUP_STAT_NSTATS,
|
||||
|
@ -255,6 +257,12 @@ struct mem_cgroup {
|
|||
* percpu counter.
|
||||
*/
|
||||
struct mem_cgroup_stat_cpu *stat;
|
||||
/*
|
||||
* used when a cpu is offlined or other synchronizations
|
||||
* See mem_cgroup_read_stat().
|
||||
*/
|
||||
struct mem_cgroup_stat_cpu nocpu_base;
|
||||
spinlock_t pcp_counter_lock;
|
||||
};
|
||||
|
||||
/* Stuffs for move charges at task migration. */
|
||||
|
@ -531,14 +539,40 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
|
|||
return mz;
|
||||
}
|
||||
|
||||
/*
|
||||
* Implementation Note: reading percpu statistics for memcg.
|
||||
*
|
||||
* Both of vmstat[] and percpu_counter has threshold and do periodic
|
||||
* synchronization to implement "quick" read. There are trade-off between
|
||||
* reading cost and precision of value. Then, we may have a chance to implement
|
||||
* a periodic synchronizion of counter in memcg's counter.
|
||||
*
|
||||
* But this _read() function is used for user interface now. The user accounts
|
||||
* memory usage by memory cgroup and he _always_ requires exact value because
|
||||
* he accounts memory. Even if we provide quick-and-fuzzy read, we always
|
||||
* have to visit all online cpus and make sum. So, for now, unnecessary
|
||||
* synchronization is not implemented. (just implemented for cpu hotplug)
|
||||
*
|
||||
* If there are kernel internal actions which can make use of some not-exact
|
||||
* value, and reading all cpu value can be performance bottleneck in some
|
||||
* common workload, threashold and synchonization as vmstat[] should be
|
||||
* implemented.
|
||||
*/
|
||||
static s64 mem_cgroup_read_stat(struct mem_cgroup *mem,
|
||||
enum mem_cgroup_stat_index idx)
|
||||
{
|
||||
int cpu;
|
||||
s64 val = 0;
|
||||
|
||||
for_each_possible_cpu(cpu)
|
||||
get_online_cpus();
|
||||
for_each_online_cpu(cpu)
|
||||
val += per_cpu(mem->stat->count[idx], cpu);
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
spin_lock(&mem->pcp_counter_lock);
|
||||
val += mem->nocpu_base.count[idx];
|
||||
spin_unlock(&mem->pcp_counter_lock);
|
||||
#endif
|
||||
put_online_cpus();
|
||||
return val;
|
||||
}
|
||||
|
||||
|
@ -663,9 +697,28 @@ static struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
|
|||
/* The caller has to guarantee "mem" exists before calling this */
|
||||
static struct mem_cgroup *mem_cgroup_start_loop(struct mem_cgroup *mem)
|
||||
{
|
||||
if (mem && css_tryget(&mem->css))
|
||||
return mem;
|
||||
return NULL;
|
||||
struct cgroup_subsys_state *css;
|
||||
int found;
|
||||
|
||||
if (!mem) /* ROOT cgroup has the smallest ID */
|
||||
return root_mem_cgroup; /*css_put/get against root is ignored*/
|
||||
if (!mem->use_hierarchy) {
|
||||
if (css_tryget(&mem->css))
|
||||
return mem;
|
||||
return NULL;
|
||||
}
|
||||
rcu_read_lock();
|
||||
/*
|
||||
* searching a memory cgroup which has the smallest ID under given
|
||||
* ROOT cgroup. (ID >= 1)
|
||||
*/
|
||||
css = css_get_next(&mem_cgroup_subsys, 1, &mem->css, &found);
|
||||
if (css && css_tryget(css))
|
||||
mem = container_of(css, struct mem_cgroup, css);
|
||||
else
|
||||
mem = NULL;
|
||||
rcu_read_unlock();
|
||||
return mem;
|
||||
}
|
||||
|
||||
static struct mem_cgroup *mem_cgroup_get_next(struct mem_cgroup *iter,
|
||||
|
@ -680,9 +733,13 @@ static struct mem_cgroup *mem_cgroup_get_next(struct mem_cgroup *iter,
|
|||
hierarchy_used = iter->use_hierarchy;
|
||||
|
||||
css_put(&iter->css);
|
||||
if (!cond || !hierarchy_used)
|
||||
/* If no ROOT, walk all, ignore hierarchy */
|
||||
if (!cond || (root && !hierarchy_used))
|
||||
return NULL;
|
||||
|
||||
if (!root)
|
||||
root = root_mem_cgroup;
|
||||
|
||||
do {
|
||||
iter = NULL;
|
||||
rcu_read_lock();
|
||||
|
@ -711,6 +768,9 @@ static struct mem_cgroup *mem_cgroup_get_next(struct mem_cgroup *iter,
|
|||
#define for_each_mem_cgroup_tree(iter, root) \
|
||||
for_each_mem_cgroup_tree_cond(iter, root, true)
|
||||
|
||||
#define for_each_mem_cgroup_all(iter) \
|
||||
for_each_mem_cgroup_tree_cond(iter, NULL, true)
|
||||
|
||||
|
||||
static inline bool mem_cgroup_is_root(struct mem_cgroup *mem)
|
||||
{
|
||||
|
@ -1676,15 +1736,38 @@ static void drain_all_stock_sync(void)
|
|||
atomic_dec(&memcg_drain_count);
|
||||
}
|
||||
|
||||
static int __cpuinit memcg_stock_cpu_callback(struct notifier_block *nb,
|
||||
/*
|
||||
* This function drains percpu counter value from DEAD cpu and
|
||||
* move it to local cpu. Note that this function can be preempted.
|
||||
*/
|
||||
static void mem_cgroup_drain_pcp_counter(struct mem_cgroup *mem, int cpu)
|
||||
{
|
||||
int i;
|
||||
|
||||
spin_lock(&mem->pcp_counter_lock);
|
||||
for (i = 0; i < MEM_CGROUP_STAT_DATA; i++) {
|
||||
s64 x = per_cpu(mem->stat->count[i], cpu);
|
||||
|
||||
per_cpu(mem->stat->count[i], cpu) = 0;
|
||||
mem->nocpu_base.count[i] += x;
|
||||
}
|
||||
spin_unlock(&mem->pcp_counter_lock);
|
||||
}
|
||||
|
||||
static int __cpuinit memcg_cpu_hotplug_callback(struct notifier_block *nb,
|
||||
unsigned long action,
|
||||
void *hcpu)
|
||||
{
|
||||
int cpu = (unsigned long)hcpu;
|
||||
struct memcg_stock_pcp *stock;
|
||||
struct mem_cgroup *iter;
|
||||
|
||||
if (action != CPU_DEAD)
|
||||
if ((action != CPU_DEAD) || action != CPU_DEAD_FROZEN)
|
||||
return NOTIFY_OK;
|
||||
|
||||
for_each_mem_cgroup_all(iter)
|
||||
mem_cgroup_drain_pcp_counter(iter, cpu);
|
||||
|
||||
stock = &per_cpu(memcg_stock, cpu);
|
||||
drain_stock(stock);
|
||||
return NOTIFY_OK;
|
||||
|
@ -4098,6 +4181,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
|
|||
vfree(mem);
|
||||
mem = NULL;
|
||||
}
|
||||
spin_lock_init(&mem->pcp_counter_lock);
|
||||
return mem;
|
||||
}
|
||||
|
||||
|
@ -4224,7 +4308,7 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
|
|||
&per_cpu(memcg_stock, cpu);
|
||||
INIT_WORK(&stock->work, drain_local_stock);
|
||||
}
|
||||
hotcpu_notifier(memcg_stock_cpu_callback, 0);
|
||||
hotcpu_notifier(memcg_cpu_hotplug_callback, 0);
|
||||
} else {
|
||||
parent = mem_cgroup_from_cont(cont->parent);
|
||||
mem->use_hierarchy = parent->use_hierarchy;
|
||||
|
|
Loading…
Reference in a new issue