dbd4ea78f0
Introduce a new bit spin lock, PCG_MOVE_LOCK, to synchronize the page accounting and migration code. This reworks the locking scheme of _update_stat() and _move_account() by adding new lock bit PCG_MOVE_LOCK, which is always taken under IRQ disable. 1. If pages are being migrated from a memcg, then updates to that memcg page statistics are protected by grabbing PCG_MOVE_LOCK using move_lock_page_cgroup(). In an upcoming commit, memcg dirty page accounting will be updating memcg page accounting (specifically: num writeback pages) from IRQ context (softirq). Avoid a deadlocking nested spin lock attempt by disabling irq on the local processor when grabbing the PCG_MOVE_LOCK. 2. lock for update_page_stat is used only for avoiding race with move_account(). So, IRQ awareness of lock_page_cgroup() itself is not a problem. The problem is between mem_cgroup_update_page_stat() and mem_cgroup_move_account_page(). Trade-off: * Changing lock_page_cgroup() to always disable IRQ (or local_bh) has some impacts on performance and I think it's bad to disable IRQ when it's not necessary. * adding a new lock makes move_account() slower. Score is here. Performance Impact: moving a 8G anon process. Before: real 0m0.792s user 0m0.000s sys 0m0.780s After: real 0m0.854s user 0m0.000s sys 0m0.842s This score is bad but planned patches for optimization can reduce this impact. Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: Greg Thelen <gthelen@google.com> Reviewed-by: Minchan Kim <minchan.kim@gmail.com> Acked-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Cc: Andrea Righi <arighi@develer.com> Cc: Balbir Singh <balbir@linux.vnet.ibm.com> Cc: Wu Fengguang <fengguang.wu@intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
215 lines
5.6 KiB
C
215 lines
5.6 KiB
C
#ifndef __LINUX_PAGE_CGROUP_H
|
|
#define __LINUX_PAGE_CGROUP_H
|
|
|
|
#ifdef CONFIG_CGROUP_MEM_RES_CTLR
|
|
#include <linux/bit_spinlock.h>
|
|
/*
|
|
* Page Cgroup can be considered as an extended mem_map.
|
|
* A page_cgroup page is associated with every page descriptor. The
|
|
* page_cgroup helps us identify information about the cgroup
|
|
* All page cgroups are allocated at boot or memory hotplug event,
|
|
* then the page cgroup for pfn always exists.
|
|
*/
|
|
struct page_cgroup {
|
|
unsigned long flags;
|
|
struct mem_cgroup *mem_cgroup;
|
|
struct page *page;
|
|
struct list_head lru; /* per cgroup LRU list */
|
|
};
|
|
|
|
void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat);
|
|
|
|
#ifdef CONFIG_SPARSEMEM
|
|
static inline void __init page_cgroup_init_flatmem(void)
|
|
{
|
|
}
|
|
extern void __init page_cgroup_init(void);
|
|
#else
|
|
void __init page_cgroup_init_flatmem(void);
|
|
static inline void __init page_cgroup_init(void)
|
|
{
|
|
}
|
|
#endif
|
|
|
|
struct page_cgroup *lookup_page_cgroup(struct page *page);
|
|
|
|
enum {
|
|
/* flags for mem_cgroup */
|
|
PCG_LOCK, /* Lock for pc->mem_cgroup and following bits. */
|
|
PCG_CACHE, /* charged as cache */
|
|
PCG_USED, /* this object is in use. */
|
|
PCG_MIGRATION, /* under page migration */
|
|
/* flags for mem_cgroup and file and I/O status */
|
|
PCG_MOVE_LOCK, /* For race between move_account v.s. following bits */
|
|
PCG_FILE_MAPPED, /* page is accounted as "mapped" */
|
|
PCG_FILE_DIRTY, /* page is dirty */
|
|
PCG_FILE_WRITEBACK, /* page is under writeback */
|
|
PCG_FILE_UNSTABLE_NFS, /* page is NFS unstable */
|
|
/* No lock in page_cgroup */
|
|
PCG_ACCT_LRU, /* page has been accounted for (under lru_lock) */
|
|
};
|
|
|
|
#define TESTPCGFLAG(uname, lname) \
|
|
static inline int PageCgroup##uname(struct page_cgroup *pc) \
|
|
{ return test_bit(PCG_##lname, &pc->flags); }
|
|
|
|
#define SETPCGFLAG(uname, lname) \
|
|
static inline void SetPageCgroup##uname(struct page_cgroup *pc)\
|
|
{ set_bit(PCG_##lname, &pc->flags); }
|
|
|
|
#define CLEARPCGFLAG(uname, lname) \
|
|
static inline void ClearPageCgroup##uname(struct page_cgroup *pc) \
|
|
{ clear_bit(PCG_##lname, &pc->flags); }
|
|
|
|
#define TESTCLEARPCGFLAG(uname, lname) \
|
|
static inline int TestClearPageCgroup##uname(struct page_cgroup *pc) \
|
|
{ return test_and_clear_bit(PCG_##lname, &pc->flags); }
|
|
|
|
#define TESTSETPCGFLAG(uname, lname) \
|
|
static inline int TestSetPageCgroup##uname(struct page_cgroup *pc) \
|
|
{ return test_and_set_bit(PCG_##lname, &pc->flags); }
|
|
|
|
/* Cache flag is set only once (at allocation) */
|
|
TESTPCGFLAG(Cache, CACHE)
|
|
CLEARPCGFLAG(Cache, CACHE)
|
|
SETPCGFLAG(Cache, CACHE)
|
|
|
|
TESTPCGFLAG(Used, USED)
|
|
CLEARPCGFLAG(Used, USED)
|
|
SETPCGFLAG(Used, USED)
|
|
|
|
SETPCGFLAG(AcctLRU, ACCT_LRU)
|
|
CLEARPCGFLAG(AcctLRU, ACCT_LRU)
|
|
TESTPCGFLAG(AcctLRU, ACCT_LRU)
|
|
TESTCLEARPCGFLAG(AcctLRU, ACCT_LRU)
|
|
|
|
|
|
SETPCGFLAG(FileMapped, FILE_MAPPED)
|
|
CLEARPCGFLAG(FileMapped, FILE_MAPPED)
|
|
TESTPCGFLAG(FileMapped, FILE_MAPPED)
|
|
|
|
SETPCGFLAG(FileDirty, FILE_DIRTY)
|
|
CLEARPCGFLAG(FileDirty, FILE_DIRTY)
|
|
TESTPCGFLAG(FileDirty, FILE_DIRTY)
|
|
TESTCLEARPCGFLAG(FileDirty, FILE_DIRTY)
|
|
TESTSETPCGFLAG(FileDirty, FILE_DIRTY)
|
|
|
|
SETPCGFLAG(FileWriteback, FILE_WRITEBACK)
|
|
CLEARPCGFLAG(FileWriteback, FILE_WRITEBACK)
|
|
TESTPCGFLAG(FileWriteback, FILE_WRITEBACK)
|
|
|
|
SETPCGFLAG(FileUnstableNFS, FILE_UNSTABLE_NFS)
|
|
CLEARPCGFLAG(FileUnstableNFS, FILE_UNSTABLE_NFS)
|
|
TESTPCGFLAG(FileUnstableNFS, FILE_UNSTABLE_NFS)
|
|
TESTCLEARPCGFLAG(FileUnstableNFS, FILE_UNSTABLE_NFS)
|
|
TESTSETPCGFLAG(FileUnstableNFS, FILE_UNSTABLE_NFS)
|
|
|
|
SETPCGFLAG(Migration, MIGRATION)
|
|
CLEARPCGFLAG(Migration, MIGRATION)
|
|
TESTPCGFLAG(Migration, MIGRATION)
|
|
|
|
static inline int page_cgroup_nid(struct page_cgroup *pc)
|
|
{
|
|
return page_to_nid(pc->page);
|
|
}
|
|
|
|
static inline enum zone_type page_cgroup_zid(struct page_cgroup *pc)
|
|
{
|
|
return page_zonenum(pc->page);
|
|
}
|
|
|
|
static inline void lock_page_cgroup(struct page_cgroup *pc)
|
|
{
|
|
/*
|
|
* Don't take this lock in IRQ context.
|
|
* This lock is for pc->mem_cgroup, USED, CACHE, MIGRATION
|
|
*/
|
|
bit_spin_lock(PCG_LOCK, &pc->flags);
|
|
}
|
|
|
|
static inline void unlock_page_cgroup(struct page_cgroup *pc)
|
|
{
|
|
bit_spin_unlock(PCG_LOCK, &pc->flags);
|
|
}
|
|
|
|
static inline int page_is_cgroup_locked(struct page_cgroup *pc)
|
|
{
|
|
return bit_spin_is_locked(PCG_LOCK, &pc->flags);
|
|
}
|
|
|
|
static inline void move_lock_page_cgroup(struct page_cgroup *pc,
|
|
unsigned long *flags)
|
|
{
|
|
/*
|
|
* We know updates to pc->flags of page cache's stats are from both of
|
|
* usual context or IRQ context. Disable IRQ to avoid deadlock.
|
|
*/
|
|
local_irq_save(*flags);
|
|
bit_spin_lock(PCG_MOVE_LOCK, &pc->flags);
|
|
}
|
|
|
|
static inline void move_unlock_page_cgroup(struct page_cgroup *pc,
|
|
unsigned long *flags)
|
|
{
|
|
bit_spin_unlock(PCG_MOVE_LOCK, &pc->flags);
|
|
local_irq_restore(*flags);
|
|
}
|
|
|
|
#else /* CONFIG_CGROUP_MEM_RES_CTLR */
|
|
struct page_cgroup;
|
|
|
|
static inline void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat)
|
|
{
|
|
}
|
|
|
|
static inline struct page_cgroup *lookup_page_cgroup(struct page *page)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
static inline void page_cgroup_init(void)
|
|
{
|
|
}
|
|
|
|
static inline void __init page_cgroup_init_flatmem(void)
|
|
{
|
|
}
|
|
|
|
#endif
|
|
|
|
#include <linux/swap.h>
|
|
|
|
#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
|
|
extern unsigned short swap_cgroup_cmpxchg(swp_entry_t ent,
|
|
unsigned short old, unsigned short new);
|
|
extern unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id);
|
|
extern unsigned short lookup_swap_cgroup(swp_entry_t ent);
|
|
extern int swap_cgroup_swapon(int type, unsigned long max_pages);
|
|
extern void swap_cgroup_swapoff(int type);
|
|
#else
|
|
|
|
static inline
|
|
unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline
|
|
unsigned short lookup_swap_cgroup(swp_entry_t ent)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline int
|
|
swap_cgroup_swapon(int type, unsigned long max_pages)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline void swap_cgroup_swapoff(int type)
|
|
{
|
|
return;
|
|
}
|
|
|
|
#endif
|
|
#endif
|