x86, kexec: x86_64: add kexec jump support for x86_64
Impact: New major feature This patch add kexec jump support for x86_64. More information about kexec jump can be found in corresponding x86_32 support patch. Signed-off-by: Huang Ying <ying.huang@intel.com> Signed-off-by: H. Peter Anvin <hpa@zytor.com>
This commit is contained in:
parent
5359454701
commit
fee7b0d84c
5 changed files with 198 additions and 45 deletions
|
@ -1431,7 +1431,7 @@ config CRASH_DUMP
|
||||||
config KEXEC_JUMP
|
config KEXEC_JUMP
|
||||||
bool "kexec jump (EXPERIMENTAL)"
|
bool "kexec jump (EXPERIMENTAL)"
|
||||||
depends on EXPERIMENTAL
|
depends on EXPERIMENTAL
|
||||||
depends on KEXEC && HIBERNATION && X86_32
|
depends on KEXEC && HIBERNATION
|
||||||
---help---
|
---help---
|
||||||
Jump between original kernel and kexeced kernel and invoke
|
Jump between original kernel and kexeced kernel and invoke
|
||||||
code in physical address mode via KEXEC
|
code in physical address mode via KEXEC
|
||||||
|
|
|
@ -9,13 +9,13 @@
|
||||||
# define PAGES_NR 4
|
# define PAGES_NR 4
|
||||||
#else
|
#else
|
||||||
# define PA_CONTROL_PAGE 0
|
# define PA_CONTROL_PAGE 0
|
||||||
# define PA_TABLE_PAGE 1
|
# define VA_CONTROL_PAGE 1
|
||||||
# define PAGES_NR 2
|
# define PA_TABLE_PAGE 2
|
||||||
|
# define PA_SWAP_PAGE 3
|
||||||
|
# define PAGES_NR 4
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_X86_32
|
|
||||||
# define KEXEC_CONTROL_CODE_MAX_SIZE 2048
|
# define KEXEC_CONTROL_CODE_MAX_SIZE 2048
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef __ASSEMBLY__
|
#ifndef __ASSEMBLY__
|
||||||
|
|
||||||
|
@ -136,10 +136,11 @@ relocate_kernel(unsigned long indirection_page,
|
||||||
unsigned int has_pae,
|
unsigned int has_pae,
|
||||||
unsigned int preserve_context);
|
unsigned int preserve_context);
|
||||||
#else
|
#else
|
||||||
NORET_TYPE void
|
unsigned long
|
||||||
relocate_kernel(unsigned long indirection_page,
|
relocate_kernel(unsigned long indirection_page,
|
||||||
unsigned long page_list,
|
unsigned long page_list,
|
||||||
unsigned long start_address) ATTRIB_NORET;
|
unsigned long start_address,
|
||||||
|
unsigned int preserve_context);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define ARCH_HAS_KIMAGE_ARCH
|
#define ARCH_HAS_KIMAGE_ARCH
|
||||||
|
|
|
@ -13,6 +13,7 @@
|
||||||
#include <linux/numa.h>
|
#include <linux/numa.h>
|
||||||
#include <linux/ftrace.h>
|
#include <linux/ftrace.h>
|
||||||
#include <linux/io.h>
|
#include <linux/io.h>
|
||||||
|
#include <linux/suspend.h>
|
||||||
|
|
||||||
#include <asm/pgtable.h>
|
#include <asm/pgtable.h>
|
||||||
#include <asm/tlbflush.h>
|
#include <asm/tlbflush.h>
|
||||||
|
@ -270,19 +271,43 @@ void machine_kexec(struct kimage *image)
|
||||||
{
|
{
|
||||||
unsigned long page_list[PAGES_NR];
|
unsigned long page_list[PAGES_NR];
|
||||||
void *control_page;
|
void *control_page;
|
||||||
|
int save_ftrace_enabled;
|
||||||
|
|
||||||
tracer_disable();
|
#ifdef CONFIG_KEXEC_JUMP
|
||||||
|
if (kexec_image->preserve_context)
|
||||||
|
save_processor_state();
|
||||||
|
#endif
|
||||||
|
|
||||||
|
save_ftrace_enabled = __ftrace_enabled_save();
|
||||||
|
|
||||||
/* Interrupts aren't acceptable while we reboot */
|
/* Interrupts aren't acceptable while we reboot */
|
||||||
local_irq_disable();
|
local_irq_disable();
|
||||||
|
|
||||||
|
if (image->preserve_context) {
|
||||||
|
#ifdef CONFIG_X86_IO_APIC
|
||||||
|
/*
|
||||||
|
* We need to put APICs in legacy mode so that we can
|
||||||
|
* get timer interrupts in second kernel. kexec/kdump
|
||||||
|
* paths already have calls to disable_IO_APIC() in
|
||||||
|
* one form or other. kexec jump path also need
|
||||||
|
* one.
|
||||||
|
*/
|
||||||
|
disable_IO_APIC();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
control_page = page_address(image->control_code_page) + PAGE_SIZE;
|
control_page = page_address(image->control_code_page) + PAGE_SIZE;
|
||||||
memcpy(control_page, relocate_kernel, PAGE_SIZE);
|
memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE);
|
||||||
|
|
||||||
page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page);
|
page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page);
|
||||||
|
page_list[VA_CONTROL_PAGE] = (unsigned long)control_page;
|
||||||
page_list[PA_TABLE_PAGE] =
|
page_list[PA_TABLE_PAGE] =
|
||||||
(unsigned long)__pa(page_address(image->control_code_page));
|
(unsigned long)__pa(page_address(image->control_code_page));
|
||||||
|
|
||||||
|
if (image->type == KEXEC_TYPE_DEFAULT)
|
||||||
|
page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page)
|
||||||
|
<< PAGE_SHIFT);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The segment registers are funny things, they have both a
|
* The segment registers are funny things, they have both a
|
||||||
* visible and an invisible part. Whenever the visible part is
|
* visible and an invisible part. Whenever the visible part is
|
||||||
|
@ -302,8 +327,17 @@ void machine_kexec(struct kimage *image)
|
||||||
set_idt(phys_to_virt(0), 0);
|
set_idt(phys_to_virt(0), 0);
|
||||||
|
|
||||||
/* now call it */
|
/* now call it */
|
||||||
relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
|
image->start = relocate_kernel((unsigned long)image->head,
|
||||||
image->start);
|
(unsigned long)page_list,
|
||||||
|
image->start,
|
||||||
|
image->preserve_context);
|
||||||
|
|
||||||
|
#ifdef CONFIG_KEXEC_JUMP
|
||||||
|
if (kexec_image->preserve_context)
|
||||||
|
restore_processor_state();
|
||||||
|
#endif
|
||||||
|
|
||||||
|
__ftrace_enabled_restore(save_ftrace_enabled);
|
||||||
}
|
}
|
||||||
|
|
||||||
void arch_crash_save_vmcoreinfo(void)
|
void arch_crash_save_vmcoreinfo(void)
|
||||||
|
|
|
@ -19,6 +19,24 @@
|
||||||
#define PTR(x) (x << 3)
|
#define PTR(x) (x << 3)
|
||||||
#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
|
#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* control_page + KEXEC_CONTROL_CODE_MAX_SIZE
|
||||||
|
* ~ control_page + PAGE_SIZE are used as data storage and stack for
|
||||||
|
* jumping back
|
||||||
|
*/
|
||||||
|
#define DATA(offset) (KEXEC_CONTROL_CODE_MAX_SIZE+(offset))
|
||||||
|
|
||||||
|
/* Minimal CPU state */
|
||||||
|
#define RSP DATA(0x0)
|
||||||
|
#define CR0 DATA(0x8)
|
||||||
|
#define CR3 DATA(0x10)
|
||||||
|
#define CR4 DATA(0x18)
|
||||||
|
|
||||||
|
/* other data */
|
||||||
|
#define CP_PA_TABLE_PAGE DATA(0x20)
|
||||||
|
#define CP_PA_SWAP_PAGE DATA(0x28)
|
||||||
|
#define CP_PA_BACKUP_PAGES_MAP DATA(0x30)
|
||||||
|
|
||||||
.text
|
.text
|
||||||
.align PAGE_SIZE
|
.align PAGE_SIZE
|
||||||
.code64
|
.code64
|
||||||
|
@ -28,8 +46,27 @@ relocate_kernel:
|
||||||
* %rdi indirection_page
|
* %rdi indirection_page
|
||||||
* %rsi page_list
|
* %rsi page_list
|
||||||
* %rdx start address
|
* %rdx start address
|
||||||
|
* %rcx preserve_context
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/* Save the CPU context, used for jumping back */
|
||||||
|
pushq %rbx
|
||||||
|
pushq %rbp
|
||||||
|
pushq %r12
|
||||||
|
pushq %r13
|
||||||
|
pushq %r14
|
||||||
|
pushq %r15
|
||||||
|
pushf
|
||||||
|
|
||||||
|
movq PTR(VA_CONTROL_PAGE)(%rsi), %r11
|
||||||
|
movq %rsp, RSP(%r11)
|
||||||
|
movq %cr0, %rax
|
||||||
|
movq %rax, CR0(%r11)
|
||||||
|
movq %cr3, %rax
|
||||||
|
movq %rax, CR3(%r11)
|
||||||
|
movq %cr4, %rax
|
||||||
|
movq %rax, CR4(%r11)
|
||||||
|
|
||||||
/* zero out flags, and disable interrupts */
|
/* zero out flags, and disable interrupts */
|
||||||
pushq $0
|
pushq $0
|
||||||
popfq
|
popfq
|
||||||
|
@ -41,10 +78,18 @@ relocate_kernel:
|
||||||
movq PTR(PA_CONTROL_PAGE)(%rsi), %r8
|
movq PTR(PA_CONTROL_PAGE)(%rsi), %r8
|
||||||
|
|
||||||
/* get physical address of page table now too */
|
/* get physical address of page table now too */
|
||||||
movq PTR(PA_TABLE_PAGE)(%rsi), %rcx
|
movq PTR(PA_TABLE_PAGE)(%rsi), %r9
|
||||||
|
|
||||||
|
/* get physical address of swap page now */
|
||||||
|
movq PTR(PA_SWAP_PAGE)(%rsi), %r10
|
||||||
|
|
||||||
|
/* save some information for jumping back */
|
||||||
|
movq %r9, CP_PA_TABLE_PAGE(%r11)
|
||||||
|
movq %r10, CP_PA_SWAP_PAGE(%r11)
|
||||||
|
movq %rdi, CP_PA_BACKUP_PAGES_MAP(%r11)
|
||||||
|
|
||||||
/* Switch to the identity mapped page tables */
|
/* Switch to the identity mapped page tables */
|
||||||
movq %rcx, %cr3
|
movq %r9, %cr3
|
||||||
|
|
||||||
/* setup a new stack at the end of the physical control page */
|
/* setup a new stack at the end of the physical control page */
|
||||||
lea PAGE_SIZE(%r8), %rsp
|
lea PAGE_SIZE(%r8), %rsp
|
||||||
|
@ -83,9 +128,87 @@ identity_mapped:
|
||||||
1:
|
1:
|
||||||
|
|
||||||
/* Flush the TLB (needed?) */
|
/* Flush the TLB (needed?) */
|
||||||
movq %rcx, %cr3
|
movq %r9, %cr3
|
||||||
|
|
||||||
|
movq %rcx, %r11
|
||||||
|
call swap_pages
|
||||||
|
|
||||||
|
/*
|
||||||
|
* To be certain of avoiding problems with self-modifying code
|
||||||
|
* I need to execute a serializing instruction here.
|
||||||
|
* So I flush the TLB by reloading %cr3 here, it's handy,
|
||||||
|
* and not processor dependent.
|
||||||
|
*/
|
||||||
|
movq %cr3, %rax
|
||||||
|
movq %rax, %cr3
|
||||||
|
|
||||||
|
/*
|
||||||
|
* set all of the registers to known values
|
||||||
|
* leave %rsp alone
|
||||||
|
*/
|
||||||
|
|
||||||
|
testq %r11, %r11
|
||||||
|
jnz 1f
|
||||||
|
xorq %rax, %rax
|
||||||
|
xorq %rbx, %rbx
|
||||||
|
xorq %rcx, %rcx
|
||||||
|
xorq %rdx, %rdx
|
||||||
|
xorq %rsi, %rsi
|
||||||
|
xorq %rdi, %rdi
|
||||||
|
xorq %rbp, %rbp
|
||||||
|
xorq %r8, %r8
|
||||||
|
xorq %r9, %r9
|
||||||
|
xorq %r10, %r9
|
||||||
|
xorq %r11, %r11
|
||||||
|
xorq %r12, %r12
|
||||||
|
xorq %r13, %r13
|
||||||
|
xorq %r14, %r14
|
||||||
|
xorq %r15, %r15
|
||||||
|
|
||||||
|
ret
|
||||||
|
|
||||||
|
1:
|
||||||
|
popq %rdx
|
||||||
|
leaq PAGE_SIZE(%r10), %rsp
|
||||||
|
call *%rdx
|
||||||
|
|
||||||
|
/* get the re-entry point of the peer system */
|
||||||
|
movq 0(%rsp), %rbp
|
||||||
|
call 1f
|
||||||
|
1:
|
||||||
|
popq %r8
|
||||||
|
subq $(1b - relocate_kernel), %r8
|
||||||
|
movq CP_PA_SWAP_PAGE(%r8), %r10
|
||||||
|
movq CP_PA_BACKUP_PAGES_MAP(%r8), %rdi
|
||||||
|
movq CP_PA_TABLE_PAGE(%r8), %rax
|
||||||
|
movq %rax, %cr3
|
||||||
|
lea PAGE_SIZE(%r8), %rsp
|
||||||
|
call swap_pages
|
||||||
|
movq $virtual_mapped, %rax
|
||||||
|
pushq %rax
|
||||||
|
ret
|
||||||
|
|
||||||
|
virtual_mapped:
|
||||||
|
movq RSP(%r8), %rsp
|
||||||
|
movq CR4(%r8), %rax
|
||||||
|
movq %rax, %cr4
|
||||||
|
movq CR3(%r8), %rax
|
||||||
|
movq CR0(%r8), %r8
|
||||||
|
movq %rax, %cr3
|
||||||
|
movq %r8, %cr0
|
||||||
|
movq %rbp, %rax
|
||||||
|
|
||||||
|
popf
|
||||||
|
popq %r15
|
||||||
|
popq %r14
|
||||||
|
popq %r13
|
||||||
|
popq %r12
|
||||||
|
popq %rbp
|
||||||
|
popq %rbx
|
||||||
|
ret
|
||||||
|
|
||||||
/* Do the copies */
|
/* Do the copies */
|
||||||
|
swap_pages:
|
||||||
movq %rdi, %rcx /* Put the page_list in %rcx */
|
movq %rdi, %rcx /* Put the page_list in %rcx */
|
||||||
xorq %rdi, %rdi
|
xorq %rdi, %rdi
|
||||||
xorq %rsi, %rsi
|
xorq %rsi, %rsi
|
||||||
|
@ -117,39 +240,27 @@ identity_mapped:
|
||||||
movq %rcx, %rsi /* For ever source page do a copy */
|
movq %rcx, %rsi /* For ever source page do a copy */
|
||||||
andq $0xfffffffffffff000, %rsi
|
andq $0xfffffffffffff000, %rsi
|
||||||
|
|
||||||
|
movq %rdi, %rdx
|
||||||
|
movq %rsi, %rax
|
||||||
|
|
||||||
|
movq %r10, %rdi
|
||||||
movq $512, %rcx
|
movq $512, %rcx
|
||||||
rep ; movsq
|
rep ; movsq
|
||||||
|
|
||||||
|
movq %rax, %rdi
|
||||||
|
movq %rdx, %rsi
|
||||||
|
movq $512, %rcx
|
||||||
|
rep ; movsq
|
||||||
|
|
||||||
|
movq %rdx, %rdi
|
||||||
|
movq %r10, %rsi
|
||||||
|
movq $512, %rcx
|
||||||
|
rep ; movsq
|
||||||
|
|
||||||
|
lea PAGE_SIZE(%rax), %rsi
|
||||||
jmp 0b
|
jmp 0b
|
||||||
3:
|
3:
|
||||||
|
|
||||||
/*
|
|
||||||
* To be certain of avoiding problems with self-modifying code
|
|
||||||
* I need to execute a serializing instruction here.
|
|
||||||
* So I flush the TLB by reloading %cr3 here, it's handy,
|
|
||||||
* and not processor dependent.
|
|
||||||
*/
|
|
||||||
movq %cr3, %rax
|
|
||||||
movq %rax, %cr3
|
|
||||||
|
|
||||||
/*
|
|
||||||
* set all of the registers to known values
|
|
||||||
* leave %rsp alone
|
|
||||||
*/
|
|
||||||
|
|
||||||
xorq %rax, %rax
|
|
||||||
xorq %rbx, %rbx
|
|
||||||
xorq %rcx, %rcx
|
|
||||||
xorq %rdx, %rdx
|
|
||||||
xorq %rsi, %rsi
|
|
||||||
xorq %rdi, %rdi
|
|
||||||
xorq %rbp, %rbp
|
|
||||||
xorq %r8, %r8
|
|
||||||
xorq %r9, %r9
|
|
||||||
xorq %r10, %r9
|
|
||||||
xorq %r11, %r11
|
|
||||||
xorq %r12, %r12
|
|
||||||
xorq %r13, %r13
|
|
||||||
xorq %r14, %r14
|
|
||||||
xorq %r15, %r15
|
|
||||||
|
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
.globl kexec_control_code_size
|
||||||
|
.set kexec_control_code_size, . - relocate_kernel
|
||||||
|
|
|
@ -275,3 +275,10 @@ ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
|
||||||
ASSERT((per_cpu__irq_stack_union == 0),
|
ASSERT((per_cpu__irq_stack_union == 0),
|
||||||
"irq_stack_union is not at start of per-cpu area");
|
"irq_stack_union is not at start of per-cpu area");
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_KEXEC
|
||||||
|
#include <asm/kexec.h>
|
||||||
|
|
||||||
|
ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,
|
||||||
|
"kexec control code size is too big")
|
||||||
|
#endif
|
||||||
|
|
Loading…
Add table
Reference in a new issue