memcg: memory hotplug fix for notifier callback
Fixes for memcg/memory hotplug. While memory hotplug allocate/free memmap, page_cgroup doesn't free page_cgroup at OFFLINE when page_cgroup is allocated via bootomem. (Because freeing bootmem requires special care.) Then, if page_cgroup is allocated by bootmem and memmap is freed/allocated by memory hotplug, page_cgroup->page == page is no longer true. But current MEM_ONLINE handler doesn't check it and update page_cgroup->page if it's not necessary to allocate page_cgroup. (This was not found because memmap is not freed if SPARSEMEM_VMEMMAP is y.) And I noticed that MEM_ONLINE can be called against "part of section". So, freeing page_cgroup at CANCEL_ONLINE will cause trouble. (freeing used page_cgroup) Don't rollback at CANCEL. One more, current memory hotplug notifier is stopped by slub because it sets NOTIFY_STOP_MASK to return vaule. So, page_cgroup's callback never be called. (low priority than slub now.) I think this slub's behavior is not intentional(BUG). and fixes it. Another way to be considered about page_cgroup allocation: - free page_cgroup at OFFLINE even if it's from bootmem and remove specieal handler. But it requires more changes. Addresses http://bugzilla.kernel.org/show_bug.cgi?id=12041 Signed-off-by: KAMEZAWA Hiruyoki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Li Zefan <lizf@cn.fujitsu.com> Cc: Balbir Singh <balbir@in.ibm.com> Cc: Pavel Emelyanov <xemul@openvz.org> Tested-by: Badari Pulavarty <pbadari@us.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
b29acbdcf8
commit
dc19f9db38
2 changed files with 33 additions and 16 deletions
|
@ -107,19 +107,29 @@ int __init_refok init_section_page_cgroup(unsigned long pfn)
|
|||
|
||||
section = __pfn_to_section(pfn);
|
||||
|
||||
if (section->page_cgroup)
|
||||
return 0;
|
||||
|
||||
nid = page_to_nid(pfn_to_page(pfn));
|
||||
|
||||
table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION;
|
||||
if (slab_is_available()) {
|
||||
base = kmalloc_node(table_size, GFP_KERNEL, nid);
|
||||
if (!base)
|
||||
base = vmalloc_node(table_size, nid);
|
||||
} else {
|
||||
base = __alloc_bootmem_node_nopanic(NODE_DATA(nid), table_size,
|
||||
if (!section->page_cgroup) {
|
||||
nid = page_to_nid(pfn_to_page(pfn));
|
||||
table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION;
|
||||
if (slab_is_available()) {
|
||||
base = kmalloc_node(table_size, GFP_KERNEL, nid);
|
||||
if (!base)
|
||||
base = vmalloc_node(table_size, nid);
|
||||
} else {
|
||||
base = __alloc_bootmem_node_nopanic(NODE_DATA(nid),
|
||||
table_size,
|
||||
PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* We don't have to allocate page_cgroup again, but
|
||||
* address of memmap may be changed. So, we have to initialize
|
||||
* again.
|
||||
*/
|
||||
base = section->page_cgroup + pfn;
|
||||
table_size = 0;
|
||||
/* check address of memmap is changed or not. */
|
||||
if (base->page == pfn_to_page(pfn))
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!base) {
|
||||
|
@ -208,18 +218,23 @@ static int __meminit page_cgroup_callback(struct notifier_block *self,
|
|||
ret = online_page_cgroup(mn->start_pfn,
|
||||
mn->nr_pages, mn->status_change_nid);
|
||||
break;
|
||||
case MEM_CANCEL_ONLINE:
|
||||
case MEM_OFFLINE:
|
||||
offline_page_cgroup(mn->start_pfn,
|
||||
mn->nr_pages, mn->status_change_nid);
|
||||
break;
|
||||
case MEM_CANCEL_ONLINE:
|
||||
case MEM_GOING_OFFLINE:
|
||||
break;
|
||||
case MEM_ONLINE:
|
||||
case MEM_CANCEL_OFFLINE:
|
||||
break;
|
||||
}
|
||||
ret = notifier_from_errno(ret);
|
||||
|
||||
if (ret)
|
||||
ret = notifier_from_errno(ret);
|
||||
else
|
||||
ret = NOTIFY_OK;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
@ -2931,8 +2931,10 @@ static int slab_memory_callback(struct notifier_block *self,
|
|||
case MEM_CANCEL_OFFLINE:
|
||||
break;
|
||||
}
|
||||
|
||||
ret = notifier_from_errno(ret);
|
||||
if (ret)
|
||||
ret = notifier_from_errno(ret);
|
||||
else
|
||||
ret = NOTIFY_OK;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue