357a3cfb14
During bootup Xen supplies us with a P2M array. It sticks it right after the ramdisk, as can be seen with a 128GB PV guest: (certain parts removed for clarity): xc_dom_build_image: called xc_dom_alloc_segment: kernel : 0xffffffff81000000 -> 0xffffffff81e43000 (pfn 0x1000 + 0xe43 pages) xc_dom_pfn_to_ptr: domU mapping: pfn 0x1000+0xe43 at 0x7f097d8bf000 xc_dom_alloc_segment: ramdisk : 0xffffffff81e43000 -> 0xffffffff925c7000 (pfn 0x1e43 + 0x10784 pages) xc_dom_pfn_to_ptr: domU mapping: pfn 0x1e43+0x10784 at 0x7f0952dd2000 xc_dom_alloc_segment: phys2mach : 0xffffffff925c7000 -> 0xffffffffa25c7000 (pfn 0x125c7 + 0x10000 pages) xc_dom_pfn_to_ptr: domU mapping: pfn 0x125c7+0x10000 at 0x7f0942dd2000 xc_dom_alloc_page : start info : 0xffffffffa25c7000 (pfn 0x225c7) xc_dom_alloc_page : xenstore : 0xffffffffa25c8000 (pfn 0x225c8) xc_dom_alloc_page : console : 0xffffffffa25c9000 (pfn 0x225c9) nr_page_tables: 0x0000ffffffffffff/48: 0xffff000000000000 -> 0xffffffffffffffff, 1 table(s) nr_page_tables: 0x0000007fffffffff/39: 0xffffff8000000000 -> 0xffffffffffffffff, 1 table(s) nr_page_tables: 0x000000003fffffff/30: 0xffffffff80000000 -> 0xffffffffbfffffff, 1 table(s) nr_page_tables: 0x00000000001fffff/21: 0xffffffff80000000 -> 0xffffffffa27fffff, 276 table(s) xc_dom_alloc_segment: page tables : 0xffffffffa25ca000 -> 0xffffffffa26e1000 (pfn 0x225ca + 0x117 pages) xc_dom_pfn_to_ptr: domU mapping: pfn 0x225ca+0x117 at 0x7f097d7a8000 xc_dom_alloc_page : boot stack : 0xffffffffa26e1000 (pfn 0x226e1) xc_dom_build_image : virt_alloc_end : 0xffffffffa26e2000 xc_dom_build_image : virt_pgtab_end : 0xffffffffa2800000 So the physical memory and virtual (using __START_KERNEL_map addresses) layout looks as so: phys __ka /------------\ /-------------------\ | 0 | empty | 0xffffffff80000000| | .. | | .. | | 16MB | <= kernel starts | 0xffffffff81000000| | .. | | | | 30MB | <= kernel ends => | 0xffffffff81e43000| | .. | & ramdisk starts | .. | | 293MB | <= ramdisk ends=> | 0xffffffff925c7000| | .. | & P2M starts | .. | | .. | | .. | | 549MB | <= P2M ends => | 0xffffffffa25c7000| | .. | start_info | 0xffffffffa25c7000| | .. | xenstore | 0xffffffffa25c8000| | .. | cosole | 0xffffffffa25c9000| | 549MB | <= page tables => | 0xffffffffa25ca000| | .. | | | | 550MB | <= PGT end => | 0xffffffffa26e1000| | .. | boot stack | | \------------/ \-------------------/ As can be seen, the ramdisk, P2M and pagetables are taking a bit of __ka addresses space. Which is a problem since the MODULES_VADDR starts at 0xffffffffa0000000 - and P2M sits right in there! This results during bootup with the inability to load modules, with this error: ------------[ cut here ]------------ WARNING: at /home/konrad/ssd/linux/mm/vmalloc.c:106 vmap_page_range_noflush+0x2d9/0x370() Call Trace: [<ffffffff810719fa>] warn_slowpath_common+0x7a/0xb0 [<ffffffff81030279>] ? __raw_callee_save_xen_pmd_val+0x11/0x1e [<ffffffff81071a45>] warn_slowpath_null+0x15/0x20 [<ffffffff81130b89>] vmap_page_range_noflush+0x2d9/0x370 [<ffffffff81130c4d>] map_vm_area+0x2d/0x50 [<ffffffff811326d0>] __vmalloc_node_range+0x160/0x250 [<ffffffff810c5369>] ? module_alloc_update_bounds+0x19/0x80 [<ffffffff810c6186>] ? load_module+0x66/0x19c0 [<ffffffff8105cadc>] module_alloc+0x5c/0x60 [<ffffffff810c5369>] ? module_alloc_update_bounds+0x19/0x80 [<ffffffff810c5369>] module_alloc_update_bounds+0x19/0x80 [<ffffffff810c70c3>] load_module+0xfa3/0x19c0 [<ffffffff812491f6>] ? security_file_permission+0x86/0x90 [<ffffffff810c7b3a>] sys_init_module+0x5a/0x220 [<ffffffff815ce339>] system_call_fastpath+0x16/0x1b ---[ end trace fd8f7704fdea0291 ]--- vmalloc: allocation failure, allocated 16384 of 20480 bytes modprobe: page allocation failure: order:0, mode:0xd2 Since the __va and __ka are 1:1 up to MODULES_VADDR and cleanup_highmap rids __ka of the ramdisk mapping, what we want to do is similar - get rid of the P2M in the __ka address space. There are two ways of fixing this: 1) All P2M lookups instead of using the __ka address would use the __va address. This means we can safely erase from __ka space the PMD pointers that point to the PFNs for P2M array and be OK. 2). Allocate a new array, copy the existing P2M into it, revector the P2M tree to use that, and return the old P2M to the memory allocate. This has the advantage that it sets the stage for using XEN_ELF_NOTE_INIT_P2M feature. That feature allows us to set the exact virtual address space we want for the P2M - and allows us to boot as initial domain on large machines. So we pick option 2). This patch only lays the groundwork in the P2M code. The patch that modifies the MMU is called "xen/mmu: Copy and revector the P2M tree." Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
127 lines
3.3 KiB
C
127 lines
3.3 KiB
C
#ifndef XEN_OPS_H
|
|
#define XEN_OPS_H
|
|
|
|
#include <linux/init.h>
|
|
#include <linux/clocksource.h>
|
|
#include <linux/irqreturn.h>
|
|
#include <xen/xen-ops.h>
|
|
|
|
/* These are code, but not functions. Defined in entry.S */
|
|
extern const char xen_hypervisor_callback[];
|
|
extern const char xen_failsafe_callback[];
|
|
|
|
extern void *xen_initial_gdt;
|
|
|
|
struct trap_info;
|
|
void xen_copy_trap_info(struct trap_info *traps);
|
|
|
|
DECLARE_PER_CPU(struct vcpu_info, xen_vcpu_info);
|
|
DECLARE_PER_CPU(unsigned long, xen_cr3);
|
|
DECLARE_PER_CPU(unsigned long, xen_current_cr3);
|
|
|
|
extern struct start_info *xen_start_info;
|
|
extern struct shared_info xen_dummy_shared_info;
|
|
extern struct shared_info *HYPERVISOR_shared_info;
|
|
|
|
void xen_setup_mfn_list_list(void);
|
|
void xen_setup_shared_info(void);
|
|
void xen_build_mfn_list_list(void);
|
|
void xen_setup_machphys_mapping(void);
|
|
void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
|
|
void xen_reserve_top(void);
|
|
extern unsigned long xen_max_p2m_pfn;
|
|
|
|
void xen_set_pat(u64);
|
|
|
|
char * __init xen_memory_setup(void);
|
|
void __init xen_arch_setup(void);
|
|
void __init xen_init_IRQ(void);
|
|
void xen_enable_sysenter(void);
|
|
void xen_enable_syscall(void);
|
|
void xen_vcpu_restore(void);
|
|
|
|
void xen_callback_vector(void);
|
|
void xen_hvm_init_shared_info(void);
|
|
void xen_unplug_emulated_devices(void);
|
|
|
|
void __init xen_build_dynamic_phys_to_machine(void);
|
|
unsigned long __init xen_revector_p2m_tree(void);
|
|
|
|
void xen_init_irq_ops(void);
|
|
void xen_setup_timer(int cpu);
|
|
void xen_setup_runstate_info(int cpu);
|
|
void xen_teardown_timer(int cpu);
|
|
cycle_t xen_clocksource_read(void);
|
|
void xen_setup_cpu_clockevents(void);
|
|
void __init xen_init_time_ops(void);
|
|
void __init xen_hvm_init_time_ops(void);
|
|
|
|
irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
|
|
|
|
bool xen_vcpu_stolen(int vcpu);
|
|
|
|
void xen_setup_vcpu_info_placement(void);
|
|
|
|
#ifdef CONFIG_SMP
|
|
void xen_smp_init(void);
|
|
void __init xen_hvm_smp_init(void);
|
|
|
|
extern cpumask_var_t xen_cpu_initialized_map;
|
|
#else
|
|
static inline void xen_smp_init(void) {}
|
|
static inline void xen_hvm_smp_init(void) {}
|
|
#endif
|
|
|
|
#ifdef CONFIG_PARAVIRT_SPINLOCKS
|
|
void __init xen_init_spinlocks(void);
|
|
void __cpuinit xen_init_lock_cpu(int cpu);
|
|
void xen_uninit_lock_cpu(int cpu);
|
|
#else
|
|
static inline void xen_init_spinlocks(void)
|
|
{
|
|
}
|
|
static inline void xen_init_lock_cpu(int cpu)
|
|
{
|
|
}
|
|
static inline void xen_uninit_lock_cpu(int cpu)
|
|
{
|
|
}
|
|
#endif
|
|
|
|
struct dom0_vga_console_info;
|
|
|
|
#ifdef CONFIG_XEN_DOM0
|
|
void __init xen_init_vga(const struct dom0_vga_console_info *, size_t size);
|
|
void __init xen_init_apic(void);
|
|
#else
|
|
static inline void __init xen_init_vga(const struct dom0_vga_console_info *info,
|
|
size_t size)
|
|
{
|
|
}
|
|
static inline void __init xen_init_apic(void)
|
|
{
|
|
}
|
|
#endif
|
|
|
|
/* Declare an asm function, along with symbols needed to make it
|
|
inlineable */
|
|
#define DECL_ASM(ret, name, ...) \
|
|
ret name(__VA_ARGS__); \
|
|
extern char name##_end[]; \
|
|
extern char name##_reloc[] \
|
|
|
|
DECL_ASM(void, xen_irq_enable_direct, void);
|
|
DECL_ASM(void, xen_irq_disable_direct, void);
|
|
DECL_ASM(unsigned long, xen_save_fl_direct, void);
|
|
DECL_ASM(void, xen_restore_fl_direct, unsigned long);
|
|
|
|
/* These are not functions, and cannot be called normally */
|
|
void xen_iret(void);
|
|
void xen_sysexit(void);
|
|
void xen_sysret32(void);
|
|
void xen_sysret64(void);
|
|
void xen_adjust_exception_frame(void);
|
|
|
|
extern int xen_panic_handler_init(void);
|
|
|
|
#endif /* XEN_OPS_H */
|