memcg: add lock to synchronize page accounting and migration
Introduce a new bit spin lock, PCG_MOVE_LOCK, to synchronize the page accounting and migration code. This reworks the locking scheme of _update_stat() and _move_account() by adding new lock bit PCG_MOVE_LOCK, which is always taken under IRQ disable. 1. If pages are being migrated from a memcg, then updates to that memcg page statistics are protected by grabbing PCG_MOVE_LOCK using move_lock_page_cgroup(). In an upcoming commit, memcg dirty page accounting will be updating memcg page accounting (specifically: num writeback pages) from IRQ context (softirq). Avoid a deadlocking nested spin lock attempt by disabling irq on the local processor when grabbing the PCG_MOVE_LOCK. 2. lock for update_page_stat is used only for avoiding race with move_account(). So, IRQ awareness of lock_page_cgroup() itself is not a problem. The problem is between mem_cgroup_update_page_stat() and mem_cgroup_move_account_page(). Trade-off: * Changing lock_page_cgroup() to always disable IRQ (or local_bh) has some impacts on performance and I think it's bad to disable IRQ when it's not necessary. * adding a new lock makes move_account() slower. Score is here. Performance Impact: moving a 8G anon process. Before: real 0m0.792s user 0m0.000s sys 0m0.780s After: real 0m0.854s user 0m0.000s sys 0m0.842s This score is bad but planned patches for optimization can reduce this impact. Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: Greg Thelen <gthelen@google.com> Reviewed-by: Minchan Kim <minchan.kim@gmail.com> Acked-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Cc: Andrea Righi <arighi@develer.com> Cc: Balbir Singh <balbir@linux.vnet.ibm.com> Cc: Wu Fengguang <fengguang.wu@intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
2a7106f2cb
commit
dbd4ea78f0
2 changed files with 35 additions and 5 deletions
|
@ -35,15 +35,18 @@ struct page_cgroup *lookup_page_cgroup(struct page *page);
|
|||
|
||||
enum {
|
||||
/* flags for mem_cgroup */
|
||||
PCG_LOCK, /* page cgroup is locked */
|
||||
PCG_LOCK, /* Lock for pc->mem_cgroup and following bits. */
|
||||
PCG_CACHE, /* charged as cache */
|
||||
PCG_USED, /* this object is in use. */
|
||||
PCG_ACCT_LRU, /* page has been accounted for */
|
||||
PCG_MIGRATION, /* under page migration */
|
||||
/* flags for mem_cgroup and file and I/O status */
|
||||
PCG_MOVE_LOCK, /* For race between move_account v.s. following bits */
|
||||
PCG_FILE_MAPPED, /* page is accounted as "mapped" */
|
||||
PCG_FILE_DIRTY, /* page is dirty */
|
||||
PCG_FILE_WRITEBACK, /* page is under writeback */
|
||||
PCG_FILE_UNSTABLE_NFS, /* page is NFS unstable */
|
||||
PCG_MIGRATION, /* under page migration */
|
||||
/* No lock in page_cgroup */
|
||||
PCG_ACCT_LRU, /* page has been accounted for (under lru_lock) */
|
||||
};
|
||||
|
||||
#define TESTPCGFLAG(uname, lname) \
|
||||
|
@ -117,6 +120,10 @@ static inline enum zone_type page_cgroup_zid(struct page_cgroup *pc)
|
|||
|
||||
static inline void lock_page_cgroup(struct page_cgroup *pc)
|
||||
{
|
||||
/*
|
||||
* Don't take this lock in IRQ context.
|
||||
* This lock is for pc->mem_cgroup, USED, CACHE, MIGRATION
|
||||
*/
|
||||
bit_spin_lock(PCG_LOCK, &pc->flags);
|
||||
}
|
||||
|
||||
|
@ -130,6 +137,24 @@ static inline int page_is_cgroup_locked(struct page_cgroup *pc)
|
|||
return bit_spin_is_locked(PCG_LOCK, &pc->flags);
|
||||
}
|
||||
|
||||
static inline void move_lock_page_cgroup(struct page_cgroup *pc,
|
||||
unsigned long *flags)
|
||||
{
|
||||
/*
|
||||
* We know updates to pc->flags of page cache's stats are from both of
|
||||
* usual context or IRQ context. Disable IRQ to avoid deadlock.
|
||||
*/
|
||||
local_irq_save(*flags);
|
||||
bit_spin_lock(PCG_MOVE_LOCK, &pc->flags);
|
||||
}
|
||||
|
||||
static inline void move_unlock_page_cgroup(struct page_cgroup *pc,
|
||||
unsigned long *flags)
|
||||
{
|
||||
bit_spin_unlock(PCG_MOVE_LOCK, &pc->flags);
|
||||
local_irq_restore(*flags);
|
||||
}
|
||||
|
||||
#else /* CONFIG_CGROUP_MEM_RES_CTLR */
|
||||
struct page_cgroup;
|
||||
|
||||
|
|
|
@ -1606,6 +1606,7 @@ void mem_cgroup_update_page_stat(struct page *page,
|
|||
struct mem_cgroup *mem;
|
||||
struct page_cgroup *pc = lookup_page_cgroup(page);
|
||||
bool need_unlock = false;
|
||||
unsigned long uninitialized_var(flags);
|
||||
|
||||
if (unlikely(!pc))
|
||||
return;
|
||||
|
@ -1617,7 +1618,7 @@ void mem_cgroup_update_page_stat(struct page *page,
|
|||
/* pc->mem_cgroup is unstable ? */
|
||||
if (unlikely(mem_cgroup_stealed(mem))) {
|
||||
/* take a lock against to access pc->mem_cgroup */
|
||||
lock_page_cgroup(pc);
|
||||
move_lock_page_cgroup(pc, &flags);
|
||||
need_unlock = true;
|
||||
mem = pc->mem_cgroup;
|
||||
if (!mem || !PageCgroupUsed(pc))
|
||||
|
@ -1640,7 +1641,7 @@ void mem_cgroup_update_page_stat(struct page *page,
|
|||
|
||||
out:
|
||||
if (unlikely(need_unlock))
|
||||
unlock_page_cgroup(pc);
|
||||
move_unlock_page_cgroup(pc, &flags);
|
||||
rcu_read_unlock();
|
||||
return;
|
||||
}
|
||||
|
@ -2211,9 +2212,13 @@ static int mem_cgroup_move_account(struct page_cgroup *pc,
|
|||
struct mem_cgroup *from, struct mem_cgroup *to, bool uncharge)
|
||||
{
|
||||
int ret = -EINVAL;
|
||||
unsigned long flags;
|
||||
|
||||
lock_page_cgroup(pc);
|
||||
if (PageCgroupUsed(pc) && pc->mem_cgroup == from) {
|
||||
move_lock_page_cgroup(pc, &flags);
|
||||
__mem_cgroup_move_account(pc, from, to, uncharge);
|
||||
move_unlock_page_cgroup(pc, &flags);
|
||||
ret = 0;
|
||||
}
|
||||
unlock_page_cgroup(pc);
|
||||
|
|
Loading…
Reference in a new issue