mm: vmpressure: allow in-kernel clients to subscribe for events

Currently, vmpressure is tied to memcg and its events are
available only to userspace clients. This patch removes
the dependency on CONFIG_MEMCG and adds a mechanism for
in-kernel clients to subscribe for vmpressure events (in
fact raw vmpressure values are delivered instead of vmpressure
levels, to provide clients more flexibility to take actions
on custom pressure levels which are not currently defined
by vmpressure module).

Change-Id: I38010f166546e8d7f12f5f355b5dbfd6ba04d587
Signed-off-by: Vinayak Menon <vinmenon@codeaurora.org>
This commit is contained in:
Vinayak Menon 2015-03-04 16:38:28 +05:30
parent 9a64886bcd
commit 837fb793af
3 changed files with 121 additions and 33 deletions

View file

@ -29,11 +29,13 @@ struct vmpressure {
struct mem_cgroup;
#ifdef CONFIG_MEMCG
extern int vmpressure_notifier_register(struct notifier_block *nb);
extern int vmpressure_notifier_unregister(struct notifier_block *nb);
extern void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree,
unsigned long scanned, unsigned long reclaimed);
extern void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio);
#ifdef CONFIG_MEMCG
extern void vmpressure_init(struct vmpressure *vmpr);
extern void vmpressure_cleanup(struct vmpressure *vmpr);
extern struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg);
@ -44,9 +46,9 @@ extern int vmpressure_register_event(struct mem_cgroup *memcg,
extern void vmpressure_unregister_event(struct mem_cgroup *memcg,
struct eventfd_ctx *eventfd);
#else
static inline void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree,
unsigned long scanned, unsigned long reclaimed) {}
static inline void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg,
int prio) {}
static inline struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg)
{
return NULL;
}
#endif /* CONFIG_MEMCG */
#endif /* __LINUX_VMPRESSURE_H */

View file

@ -39,7 +39,7 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \
mm_init.o mmu_context.o percpu.o slab_common.o \
compaction.o vmacache.o \
interval_tree.o list_lru.o workingset.o \
debug.o $(mmu-y) showmem.o
debug.o $(mmu-y) showmem.o vmpressure.o
obj-y += init-mm.o
@ -76,7 +76,7 @@ obj-$(CONFIG_MIGRATION) += migrate.o
obj-$(CONFIG_QUICKLIST) += quicklist.o
obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o khugepaged.o
obj-$(CONFIG_PAGE_COUNTER) += page_counter.o
obj-$(CONFIG_MEMCG) += memcontrol.o vmpressure.o
obj-$(CONFIG_MEMCG) += memcontrol.o
obj-$(CONFIG_MEMCG_SWAP) += swap_cgroup.o
obj-$(CONFIG_CGROUP_HUGETLB) += hugetlb_cgroup.o
obj-$(CONFIG_GUP_BENCHMARK) += gup_benchmark.o

View file

@ -22,6 +22,8 @@
#include <linux/slab.h>
#include <linux/swap.h>
#include <linux/printk.h>
#include <linux/notifier.h>
#include <linux/init.h>
#include <linux/vmpressure.h>
/*
@ -49,6 +51,24 @@ static const unsigned long vmpressure_win = SWAP_CLUSTER_MAX * 16;
static const unsigned int vmpressure_level_med = 60;
static const unsigned int vmpressure_level_critical = 95;
static struct vmpressure global_vmpressure;
static BLOCKING_NOTIFIER_HEAD(vmpressure_notifier);
int vmpressure_notifier_register(struct notifier_block *nb)
{
return blocking_notifier_chain_register(&vmpressure_notifier, nb);
}
int vmpressure_notifier_unregister(struct notifier_block *nb)
{
return blocking_notifier_chain_unregister(&vmpressure_notifier, nb);
}
static void vmpressure_notify(unsigned long pressure)
{
blocking_notifier_call_chain(&vmpressure_notifier, pressure, NULL);
}
/*
* When there are too little pages left to scan, vmpressure() may miss the
* critical pressure as number of pages will be less than "window size".
@ -75,6 +95,7 @@ static struct vmpressure *work_to_vmpressure(struct work_struct *work)
return container_of(work, struct vmpressure, work);
}
#ifdef CONFIG_MEMCG
static struct vmpressure *vmpressure_parent(struct vmpressure *vmpr)
{
struct cgroup_subsys_state *css = vmpressure_to_css(vmpr);
@ -85,6 +106,12 @@ static struct vmpressure *vmpressure_parent(struct vmpressure *vmpr)
return NULL;
return memcg_to_vmpressure(memcg);
}
#else
static struct vmpressure *vmpressure_parent(struct vmpressure *vmpr)
{
return NULL;
}
#endif
enum vmpressure_levels {
VMPRESSURE_LOW = 0,
@ -121,7 +148,7 @@ static enum vmpressure_levels vmpressure_level(unsigned long pressure)
return VMPRESSURE_LOW;
}
static enum vmpressure_levels vmpressure_calc_level(unsigned long scanned,
static unsigned long vmpressure_calc_pressure(unsigned long scanned,
unsigned long reclaimed)
{
unsigned long scale = scanned + reclaimed;
@ -148,7 +175,7 @@ static enum vmpressure_levels vmpressure_calc_level(unsigned long scanned,
pr_debug("%s: %3lu (s: %lu r: %lu)\n", __func__, pressure,
scanned, reclaimed);
return vmpressure_level(pressure);
return pressure;
}
struct vmpressure_event {
@ -186,6 +213,7 @@ static void vmpressure_work_fn(struct work_struct *work)
struct vmpressure *vmpr = work_to_vmpressure(work);
unsigned long scanned;
unsigned long reclaimed;
unsigned long pressure;
enum vmpressure_levels level;
bool ancestor = false;
bool signalled = false;
@ -210,7 +238,8 @@ static void vmpressure_work_fn(struct work_struct *work)
vmpr->tree_reclaimed = 0;
spin_unlock(&vmpr->sr_lock);
level = vmpressure_calc_level(scanned, reclaimed);
pressure = vmpressure_calc_pressure(scanned, reclaimed);
level = vmpressure_level(pressure);
do {
if (vmpressure_event(vmpr, level, ancestor, signalled))
@ -219,28 +248,8 @@ static void vmpressure_work_fn(struct work_struct *work)
} while ((vmpr = vmpressure_parent(vmpr)));
}
/**
* vmpressure() - Account memory pressure through scanned/reclaimed ratio
* @gfp: reclaimer's gfp mask
* @memcg: cgroup memory controller handle
* @tree: legacy subtree mode
* @scanned: number of pages scanned
* @reclaimed: number of pages reclaimed
*
* This function should be called from the vmscan reclaim path to account
* "instantaneous" memory pressure (scanned/reclaimed ratio). The raw
* pressure index is then further refined and averaged over time.
*
* If @tree is set, vmpressure is in traditional userspace reporting
* mode: @memcg is considered the pressure root and userspace is
* notified of the entire subtree's reclaim efficiency.
*
* If @tree is not set, reclaim efficiency is recorded for @memcg, and
* only in-kernel users are notified.
*
* This function does not return any value.
*/
void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree,
#ifdef CONFIG_MEMCG
static void vmpressure_memcg(gfp_t gfp, struct mem_cgroup *memcg, bool tree,
unsigned long scanned, unsigned long reclaimed)
{
struct vmpressure *vmpr = memcg_to_vmpressure(memcg);
@ -281,6 +290,7 @@ void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree,
schedule_work(&vmpr->work);
} else {
enum vmpressure_levels level;
unsigned long pressure;
/* For now, no users for root-level efficiency */
if (!memcg || memcg == root_mem_cgroup)
@ -296,7 +306,8 @@ void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree,
vmpr->scanned = vmpr->reclaimed = 0;
spin_unlock(&vmpr->sr_lock);
level = vmpressure_calc_level(scanned, reclaimed);
pressure = vmpressure_calc_pressure(scanned, reclaimed);
level = vmpressure_level(pressure);
if (level > VMPRESSURE_LOW) {
/*
@ -311,6 +322,74 @@ void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree,
}
}
}
#else
static void vmpressure_memcg(gfp_t gfp, struct mem_cgroup *memcg, bool tree,
unsigned long scanned, unsigned long reclaimed)
{
}
#endif
static void vmpressure_global(gfp_t gfp, unsigned long scanned,
unsigned long reclaimed)
{
struct vmpressure *vmpr = &global_vmpressure;
unsigned long pressure;
if (!(gfp & (__GFP_HIGHMEM | __GFP_MOVABLE | __GFP_IO | __GFP_FS)))
return;
if (!scanned)
return;
spin_lock(&vmpr->sr_lock);
vmpr->scanned += scanned;
vmpr->reclaimed += reclaimed;
scanned = vmpr->scanned;
reclaimed = vmpr->reclaimed;
spin_unlock(&vmpr->sr_lock);
if (scanned < vmpressure_win)
return;
spin_lock(&vmpr->sr_lock);
vmpr->scanned = 0;
vmpr->reclaimed = 0;
spin_unlock(&vmpr->sr_lock);
pressure = vmpressure_calc_pressure(scanned, reclaimed);
vmpressure_notify(pressure);
}
/**
* vmpressure() - Account memory pressure through scanned/reclaimed ratio
* @gfp: reclaimer's gfp mask
* @memcg: cgroup memory controller handle
* @tree: legacy subtree mode
* @scanned: number of pages scanned
* @reclaimed: number of pages reclaimed
*
* This function should be called from the vmscan reclaim path to account
* "instantaneous" memory pressure (scanned/reclaimed ratio). The raw
* pressure index is then further refined and averaged over time.
*
* If @tree is set, vmpressure is in traditional userspace reporting
* mode: @memcg is considered the pressure root and userspace is
* notified of the entire subtree's reclaim efficiency.
*
* If @tree is not set, reclaim efficiency is recorded for @memcg, and
* only in-kernel users are notified.
*
* This function does not return any value.
*/
void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree,
unsigned long scanned, unsigned long reclaimed)
{
if (!memcg && tree)
vmpressure_global(gfp, scanned, reclaimed);
if (IS_ENABLED(CONFIG_MEMCG))
vmpressure_memcg(gfp, memcg, tree, scanned, reclaimed);
}
/**
* vmpressure_prio() - Account memory pressure through reclaimer priority level
@ -470,3 +549,10 @@ void vmpressure_cleanup(struct vmpressure *vmpr)
*/
flush_work(&vmpr->work);
}
static int vmpressure_global_init(void)
{
vmpressure_init(&global_vmpressure);
return 0;
}
late_initcall(vmpressure_global_init);