kernel-fxtec-pro1x/include/linux/page_cgroup.h
KAMEZAWA Hiroyuki dbd4ea78f0 memcg: add lock to synchronize page accounting and migration
Introduce a new bit spin lock, PCG_MOVE_LOCK, to synchronize the page
accounting and migration code.  This reworks the locking scheme of
_update_stat() and _move_account() by adding new lock bit PCG_MOVE_LOCK,
which is always taken under IRQ disable.

1. If pages are being migrated from a memcg, then updates to that
   memcg page statistics are protected by grabbing PCG_MOVE_LOCK using
   move_lock_page_cgroup().  In an upcoming commit, memcg dirty page
   accounting will be updating memcg page accounting (specifically: num
   writeback pages) from IRQ context (softirq).  Avoid a deadlocking
   nested spin lock attempt by disabling irq on the local processor when
   grabbing the PCG_MOVE_LOCK.

2. lock for update_page_stat is used only for avoiding race with
   move_account().  So, IRQ awareness of lock_page_cgroup() itself is not
   a problem.  The problem is between mem_cgroup_update_page_stat() and
   mem_cgroup_move_account_page().

Trade-off:
  * Changing lock_page_cgroup() to always disable IRQ (or
    local_bh) has some impacts on performance and I think
    it's bad to disable IRQ when it's not necessary.
  * adding a new lock makes move_account() slower.  Score is
    here.

Performance Impact: moving a 8G anon process.

Before:
	real    0m0.792s
	user    0m0.000s
	sys     0m0.780s

After:
	real    0m0.854s
	user    0m0.000s
	sys     0m0.842s

This score is bad but planned patches for optimization can reduce
this impact.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Greg Thelen <gthelen@google.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Acked-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Andrea Righi <arighi@develer.com>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2011-01-13 17:32:50 -08:00

215 lines
5.6 KiB
C

#ifndef __LINUX_PAGE_CGROUP_H
#define __LINUX_PAGE_CGROUP_H
#ifdef CONFIG_CGROUP_MEM_RES_CTLR
#include <linux/bit_spinlock.h>
/*
* Page Cgroup can be considered as an extended mem_map.
* A page_cgroup page is associated with every page descriptor. The
* page_cgroup helps us identify information about the cgroup
* All page cgroups are allocated at boot or memory hotplug event,
* then the page cgroup for pfn always exists.
*/
struct page_cgroup {
unsigned long flags;
struct mem_cgroup *mem_cgroup;
struct page *page;
struct list_head lru; /* per cgroup LRU list */
};
void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat);
#ifdef CONFIG_SPARSEMEM
static inline void __init page_cgroup_init_flatmem(void)
{
}
extern void __init page_cgroup_init(void);
#else
void __init page_cgroup_init_flatmem(void);
static inline void __init page_cgroup_init(void)
{
}
#endif
struct page_cgroup *lookup_page_cgroup(struct page *page);
enum {
/* flags for mem_cgroup */
PCG_LOCK, /* Lock for pc->mem_cgroup and following bits. */
PCG_CACHE, /* charged as cache */
PCG_USED, /* this object is in use. */
PCG_MIGRATION, /* under page migration */
/* flags for mem_cgroup and file and I/O status */
PCG_MOVE_LOCK, /* For race between move_account v.s. following bits */
PCG_FILE_MAPPED, /* page is accounted as "mapped" */
PCG_FILE_DIRTY, /* page is dirty */
PCG_FILE_WRITEBACK, /* page is under writeback */
PCG_FILE_UNSTABLE_NFS, /* page is NFS unstable */
/* No lock in page_cgroup */
PCG_ACCT_LRU, /* page has been accounted for (under lru_lock) */
};
#define TESTPCGFLAG(uname, lname) \
static inline int PageCgroup##uname(struct page_cgroup *pc) \
{ return test_bit(PCG_##lname, &pc->flags); }
#define SETPCGFLAG(uname, lname) \
static inline void SetPageCgroup##uname(struct page_cgroup *pc)\
{ set_bit(PCG_##lname, &pc->flags); }
#define CLEARPCGFLAG(uname, lname) \
static inline void ClearPageCgroup##uname(struct page_cgroup *pc) \
{ clear_bit(PCG_##lname, &pc->flags); }
#define TESTCLEARPCGFLAG(uname, lname) \
static inline int TestClearPageCgroup##uname(struct page_cgroup *pc) \
{ return test_and_clear_bit(PCG_##lname, &pc->flags); }
#define TESTSETPCGFLAG(uname, lname) \
static inline int TestSetPageCgroup##uname(struct page_cgroup *pc) \
{ return test_and_set_bit(PCG_##lname, &pc->flags); }
/* Cache flag is set only once (at allocation) */
TESTPCGFLAG(Cache, CACHE)
CLEARPCGFLAG(Cache, CACHE)
SETPCGFLAG(Cache, CACHE)
TESTPCGFLAG(Used, USED)
CLEARPCGFLAG(Used, USED)
SETPCGFLAG(Used, USED)
SETPCGFLAG(AcctLRU, ACCT_LRU)
CLEARPCGFLAG(AcctLRU, ACCT_LRU)
TESTPCGFLAG(AcctLRU, ACCT_LRU)
TESTCLEARPCGFLAG(AcctLRU, ACCT_LRU)
SETPCGFLAG(FileMapped, FILE_MAPPED)
CLEARPCGFLAG(FileMapped, FILE_MAPPED)
TESTPCGFLAG(FileMapped, FILE_MAPPED)
SETPCGFLAG(FileDirty, FILE_DIRTY)
CLEARPCGFLAG(FileDirty, FILE_DIRTY)
TESTPCGFLAG(FileDirty, FILE_DIRTY)
TESTCLEARPCGFLAG(FileDirty, FILE_DIRTY)
TESTSETPCGFLAG(FileDirty, FILE_DIRTY)
SETPCGFLAG(FileWriteback, FILE_WRITEBACK)
CLEARPCGFLAG(FileWriteback, FILE_WRITEBACK)
TESTPCGFLAG(FileWriteback, FILE_WRITEBACK)
SETPCGFLAG(FileUnstableNFS, FILE_UNSTABLE_NFS)
CLEARPCGFLAG(FileUnstableNFS, FILE_UNSTABLE_NFS)
TESTPCGFLAG(FileUnstableNFS, FILE_UNSTABLE_NFS)
TESTCLEARPCGFLAG(FileUnstableNFS, FILE_UNSTABLE_NFS)
TESTSETPCGFLAG(FileUnstableNFS, FILE_UNSTABLE_NFS)
SETPCGFLAG(Migration, MIGRATION)
CLEARPCGFLAG(Migration, MIGRATION)
TESTPCGFLAG(Migration, MIGRATION)
static inline int page_cgroup_nid(struct page_cgroup *pc)
{
return page_to_nid(pc->page);
}
static inline enum zone_type page_cgroup_zid(struct page_cgroup *pc)
{
return page_zonenum(pc->page);
}
static inline void lock_page_cgroup(struct page_cgroup *pc)
{
/*
* Don't take this lock in IRQ context.
* This lock is for pc->mem_cgroup, USED, CACHE, MIGRATION
*/
bit_spin_lock(PCG_LOCK, &pc->flags);
}
static inline void unlock_page_cgroup(struct page_cgroup *pc)
{
bit_spin_unlock(PCG_LOCK, &pc->flags);
}
static inline int page_is_cgroup_locked(struct page_cgroup *pc)
{
return bit_spin_is_locked(PCG_LOCK, &pc->flags);
}
static inline void move_lock_page_cgroup(struct page_cgroup *pc,
unsigned long *flags)
{
/*
* We know updates to pc->flags of page cache's stats are from both of
* usual context or IRQ context. Disable IRQ to avoid deadlock.
*/
local_irq_save(*flags);
bit_spin_lock(PCG_MOVE_LOCK, &pc->flags);
}
static inline void move_unlock_page_cgroup(struct page_cgroup *pc,
unsigned long *flags)
{
bit_spin_unlock(PCG_MOVE_LOCK, &pc->flags);
local_irq_restore(*flags);
}
#else /* CONFIG_CGROUP_MEM_RES_CTLR */
struct page_cgroup;
static inline void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat)
{
}
static inline struct page_cgroup *lookup_page_cgroup(struct page *page)
{
return NULL;
}
static inline void page_cgroup_init(void)
{
}
static inline void __init page_cgroup_init_flatmem(void)
{
}
#endif
#include <linux/swap.h>
#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
extern unsigned short swap_cgroup_cmpxchg(swp_entry_t ent,
unsigned short old, unsigned short new);
extern unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id);
extern unsigned short lookup_swap_cgroup(swp_entry_t ent);
extern int swap_cgroup_swapon(int type, unsigned long max_pages);
extern void swap_cgroup_swapoff(int type);
#else
static inline
unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
{
return 0;
}
static inline
unsigned short lookup_swap_cgroup(swp_entry_t ent)
{
return 0;
}
static inline int
swap_cgroup_swapon(int type, unsigned long max_pages)
{
return 0;
}
static inline void swap_cgroup_swapoff(int type)
{
return;
}
#endif
#endif