f2dbcfa738
What's happening is that the assertion in mm/page_alloc.c:move_freepages() is triggering: BUG_ON(page_zone(start_page) != page_zone(end_page)); Once I knew this is what was happening, I added some annotations: if (unlikely(page_zone(start_page) != page_zone(end_page))) { printk(KERN_ERR "move_freepages: Bogus zones: " "start_page[%p] end_page[%p] zone[%p]\n", start_page, end_page, zone); printk(KERN_ERR "move_freepages: " "start_zone[%p] end_zone[%p]\n", page_zone(start_page), page_zone(end_page)); printk(KERN_ERR "move_freepages: " "start_pfn[0x%lx] end_pfn[0x%lx]\n", page_to_pfn(start_page), page_to_pfn(end_page)); printk(KERN_ERR "move_freepages: " "start_nid[%d] end_nid[%d]\n", page_to_nid(start_page), page_to_nid(end_page)); ... And here's what I got: move_freepages: Bogus zones: start_page[2207d0000] end_page[2207dffc0] zone[fffff8103effcb00] move_freepages: start_zone[fffff8103effcb00] end_zone[fffff8003fffeb00] move_freepages: start_pfn[0x81f600] end_pfn[0x81f7ff] move_freepages: start_nid[1] end_nid[0] My memory layout on this box is: [ 0.000000] Zone PFN ranges: [ 0.000000] Normal 0x00000000 -> 0x0081ff5d [ 0.000000] Movable zone start PFN for each node [ 0.000000] early_node_map[8] active PFN ranges [ 0.000000] 0: 0x00000000 -> 0x00020000 [ 0.000000] 1: 0x00800000 -> 0x0081f7ff [ 0.000000] 1: 0x0081f800 -> 0x0081fe50 [ 0.000000] 1: 0x0081fed1 -> 0x0081fed8 [ 0.000000] 1: 0x0081feda -> 0x0081fedb [ 0.000000] 1: 0x0081fedd -> 0x0081fee5 [ 0.000000] 1: 0x0081fee7 -> 0x0081ff51 [ 0.000000] 1: 0x0081ff59 -> 0x0081ff5d So it's a block move in that 0x81f600-->0x81f7ff region which triggers the problem. This patch: Declaration of early_pfn_to_nid() is scattered over per-arch include files, and it seems it's complicated to know when the declaration is used. I think it makes fix-for-memmap-init not easy. This patch moves all declaration to include/linux/mm.h After this, if !CONFIG_NODES_POPULATES_NODE_MAP && !CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID -> Use static definition in include/linux/mm.h else if !CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID -> Use generic definition in mm/page_alloc.c else -> per-arch back end function will be called. Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Tested-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Reported-by: David Miller <davem@davemlloft.net> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: <stable@kernel.org> [2.6.25.x, 2.6.26.x, 2.6.27.x, 2.6.28.x] Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
136 lines
3.5 KiB
C
136 lines
3.5 KiB
C
/*
|
|
* Written by Pat Gaughen (gone@us.ibm.com) Mar 2002
|
|
*
|
|
*/
|
|
|
|
#ifndef _ASM_X86_MMZONE_32_H
|
|
#define _ASM_X86_MMZONE_32_H
|
|
|
|
#include <asm/smp.h>
|
|
|
|
#ifdef CONFIG_NUMA
|
|
extern struct pglist_data *node_data[];
|
|
#define NODE_DATA(nid) (node_data[nid])
|
|
|
|
#include <asm/numaq.h>
|
|
/* summit or generic arch */
|
|
#include <asm/srat.h>
|
|
|
|
extern int get_memcfg_numa_flat(void);
|
|
/*
|
|
* This allows any one NUMA architecture to be compiled
|
|
* for, and still fall back to the flat function if it
|
|
* fails.
|
|
*/
|
|
static inline void get_memcfg_numa(void)
|
|
{
|
|
|
|
if (get_memcfg_numaq())
|
|
return;
|
|
if (get_memcfg_from_srat())
|
|
return;
|
|
get_memcfg_numa_flat();
|
|
}
|
|
|
|
extern void resume_map_numa_kva(pgd_t *pgd);
|
|
|
|
#else /* !CONFIG_NUMA */
|
|
|
|
#define get_memcfg_numa get_memcfg_numa_flat
|
|
|
|
static inline void resume_map_numa_kva(pgd_t *pgd) {}
|
|
|
|
#endif /* CONFIG_NUMA */
|
|
|
|
#ifdef CONFIG_DISCONTIGMEM
|
|
|
|
/*
|
|
* generic node memory support, the following assumptions apply:
|
|
*
|
|
* 1) memory comes in 64Mb contigious chunks which are either present or not
|
|
* 2) we will not have more than 64Gb in total
|
|
*
|
|
* for now assume that 64Gb is max amount of RAM for whole system
|
|
* 64Gb / 4096bytes/page = 16777216 pages
|
|
*/
|
|
#define MAX_NR_PAGES 16777216
|
|
#define MAX_ELEMENTS 1024
|
|
#define PAGES_PER_ELEMENT (MAX_NR_PAGES/MAX_ELEMENTS)
|
|
|
|
extern s8 physnode_map[];
|
|
|
|
static inline int pfn_to_nid(unsigned long pfn)
|
|
{
|
|
#ifdef CONFIG_NUMA
|
|
return((int) physnode_map[(pfn) / PAGES_PER_ELEMENT]);
|
|
#else
|
|
return 0;
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
* Following are macros that each numa implmentation must define.
|
|
*/
|
|
|
|
#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn)
|
|
#define node_end_pfn(nid) \
|
|
({ \
|
|
pg_data_t *__pgdat = NODE_DATA(nid); \
|
|
__pgdat->node_start_pfn + __pgdat->node_spanned_pages; \
|
|
})
|
|
|
|
static inline int pfn_valid(int pfn)
|
|
{
|
|
int nid = pfn_to_nid(pfn);
|
|
|
|
if (nid >= 0)
|
|
return (pfn < node_end_pfn(nid));
|
|
return 0;
|
|
}
|
|
|
|
#endif /* CONFIG_DISCONTIGMEM */
|
|
|
|
#ifdef CONFIG_NEED_MULTIPLE_NODES
|
|
|
|
/*
|
|
* Following are macros that are specific to this numa platform.
|
|
*/
|
|
#define reserve_bootmem(addr, size, flags) \
|
|
reserve_bootmem_node(NODE_DATA(0), (addr), (size), (flags))
|
|
#define alloc_bootmem(x) \
|
|
__alloc_bootmem_node(NODE_DATA(0), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
|
|
#define alloc_bootmem_nopanic(x) \
|
|
__alloc_bootmem_node_nopanic(NODE_DATA(0), (x), SMP_CACHE_BYTES, \
|
|
__pa(MAX_DMA_ADDRESS))
|
|
#define alloc_bootmem_low(x) \
|
|
__alloc_bootmem_node(NODE_DATA(0), (x), SMP_CACHE_BYTES, 0)
|
|
#define alloc_bootmem_pages(x) \
|
|
__alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
|
|
#define alloc_bootmem_pages_nopanic(x) \
|
|
__alloc_bootmem_node_nopanic(NODE_DATA(0), (x), PAGE_SIZE, \
|
|
__pa(MAX_DMA_ADDRESS))
|
|
#define alloc_bootmem_low_pages(x) \
|
|
__alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, 0)
|
|
#define alloc_bootmem_node(pgdat, x) \
|
|
({ \
|
|
struct pglist_data __maybe_unused \
|
|
*__alloc_bootmem_node__pgdat = (pgdat); \
|
|
__alloc_bootmem_node(NODE_DATA(0), (x), SMP_CACHE_BYTES, \
|
|
__pa(MAX_DMA_ADDRESS)); \
|
|
})
|
|
#define alloc_bootmem_pages_node(pgdat, x) \
|
|
({ \
|
|
struct pglist_data __maybe_unused \
|
|
*__alloc_bootmem_node__pgdat = (pgdat); \
|
|
__alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, \
|
|
__pa(MAX_DMA_ADDRESS)); \
|
|
})
|
|
#define alloc_bootmem_low_pages_node(pgdat, x) \
|
|
({ \
|
|
struct pglist_data __maybe_unused \
|
|
*__alloc_bootmem_node__pgdat = (pgdat); \
|
|
__alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, 0); \
|
|
})
|
|
#endif /* CONFIG_NEED_MULTIPLE_NODES */
|
|
|
|
#endif /* _ASM_X86_MMZONE_32_H */
|