kernel-fxtec-pro1x/arch/x86/boot/compressed/head_64.S
H. Peter Anvin 36d3793c94 x86, boot: use appropriate rep string for move and clear
In the pre-decompression code, use the appropriate largest possible
rep movs and rep stos to move code and clear bss, respectively.  For
reverse copy, do note that the initial values are supposed to be the
address of the first (highest) copy datum, not one byte beyond the end
of the buffer.

rep strings are not necessarily the fastest way to perform these
operations on all current processors, but are likely to be in the
future, and perhaps more importantly, we want to encourage the
architecturally right thing to do here.

This also fixes a couple of trivial inefficiencies on 64 bits.

[ Impact: trivial performance enhancement, increase code similarity ]

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
2009-05-08 17:34:52 -07:00

339 lines
7.7 KiB
ArmAsm

/*
* linux/boot/head.S
*
* Copyright (C) 1991, 1992, 1993 Linus Torvalds
*/
/*
* head.S contains the 32-bit startup code.
*
* NOTE!!! Startup happens at absolute address 0x00001000, which is also where
* the page directory will exist. The startup code will be overwritten by
* the page directory. [According to comments etc elsewhere on a compressed
* kernel it will end up at 0x1000 + 1Mb I hope so as I assume this. - AC]
*
* Page 0 is deliberately kept safe, since System Management Mode code in
* laptops may need to access the BIOS data stored there. This is also
* useful for future device drivers that either access the BIOS via VM86
* mode.
*/
/*
* High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
*/
.code32
.text
#include <linux/linkage.h>
#include <asm/segment.h>
#include <asm/pgtable_types.h>
#include <asm/page_types.h>
#include <asm/boot.h>
#include <asm/msr.h>
#include <asm/processor-flags.h>
#include <asm/asm-offsets.h>
.section ".text.head"
.code32
ENTRY(startup_32)
cld
/*
* Test KEEP_SEGMENTS flag to see if the bootloader is asking
* us to not reload segments
*/
testb $(1<<6), BP_loadflags(%esi)
jnz 1f
cli
movl $(__KERNEL_DS), %eax
movl %eax, %ds
movl %eax, %es
movl %eax, %ss
1:
/*
* Calculate the delta between where we were compiled to run
* at and where we were actually loaded at. This can only be done
* with a short local call on x86. Nothing else will tell us what
* address we are running at. The reserved chunk of the real-mode
* data at 0x1e4 (defined as a scratch field) are used as the stack
* for this calculation. Only 4 bytes are needed.
*/
leal (BP_scratch+4)(%esi), %esp
call 1f
1: popl %ebp
subl $1b, %ebp
/* setup a stack and make sure cpu supports long mode. */
movl $boot_stack_end, %eax
addl %ebp, %eax
movl %eax, %esp
call verify_cpu
testl %eax, %eax
jnz no_longmode
/*
* Compute the delta between where we were compiled to run at
* and where the code will actually run at.
*
* %ebp contains the address we are loaded at by the boot loader and %ebx
* contains the address where we should move the kernel image temporarily
* for safe in-place decompression.
*/
#ifdef CONFIG_RELOCATABLE
movl %ebp, %ebx
addl $(PMD_PAGE_SIZE -1), %ebx
andl $PMD_PAGE_MASK, %ebx
#else
movl $CONFIG_PHYSICAL_START, %ebx
#endif
/* Replace the compressed data size with the uncompressed size */
subl input_len(%ebp), %ebx
movl output_len(%ebp), %eax
addl %eax, %ebx
/* Add 8 bytes for every 32K input block */
shrl $12, %eax
addl %eax, %ebx
/* Add 32K + 18 bytes of extra slack and align on a 4K boundary */
addl $(32768 + 18 + 4095), %ebx
andl $~4095, %ebx
/*
* Prepare for entering 64 bit mode
*/
/* Load new GDT with the 64bit segments using 32bit descriptor */
leal gdt(%ebp), %eax
movl %eax, gdt+2(%ebp)
lgdt gdt(%ebp)
/* Enable PAE mode */
xorl %eax, %eax
orl $(X86_CR4_PAE), %eax
movl %eax, %cr4
/*
* Build early 4G boot pagetable
*/
/* Initialize Page tables to 0 */
leal pgtable(%ebx), %edi
xorl %eax, %eax
movl $((4096*6)/4), %ecx
rep stosl
/* Build Level 4 */
leal pgtable + 0(%ebx), %edi
leal 0x1007 (%edi), %eax
movl %eax, 0(%edi)
/* Build Level 3 */
leal pgtable + 0x1000(%ebx), %edi
leal 0x1007(%edi), %eax
movl $4, %ecx
1: movl %eax, 0x00(%edi)
addl $0x00001000, %eax
addl $8, %edi
decl %ecx
jnz 1b
/* Build Level 2 */
leal pgtable + 0x2000(%ebx), %edi
movl $0x00000183, %eax
movl $2048, %ecx
1: movl %eax, 0(%edi)
addl $0x00200000, %eax
addl $8, %edi
decl %ecx
jnz 1b
/* Enable the boot page tables */
leal pgtable(%ebx), %eax
movl %eax, %cr3
/* Enable Long mode in EFER (Extended Feature Enable Register) */
movl $MSR_EFER, %ecx
rdmsr
btsl $_EFER_LME, %eax
wrmsr
/*
* Setup for the jump to 64bit mode
*
* When the jump is performend we will be in long mode but
* in 32bit compatibility mode with EFER.LME = 1, CS.L = 0, CS.D = 1
* (and in turn EFER.LMA = 1). To jump into 64bit mode we use
* the new gdt/idt that has __KERNEL_CS with CS.L = 1.
* We place all of the values on our mini stack so lret can
* used to perform that far jump.
*/
pushl $__KERNEL_CS
leal startup_64(%ebp), %eax
pushl %eax
/* Enter paged protected Mode, activating Long Mode */
movl $(X86_CR0_PG | X86_CR0_PE), %eax /* Enable Paging and Protected mode */
movl %eax, %cr0
/* Jump from 32bit compatibility mode into 64bit mode. */
lret
ENDPROC(startup_32)
no_longmode:
/* This isn't an x86-64 CPU so hang */
1:
hlt
jmp 1b
#include "../../kernel/verify_cpu_64.S"
/*
* Be careful here startup_64 needs to be at a predictable
* address so I can export it in an ELF header. Bootloaders
* should look at the ELF header to find this address, as
* it may change in the future.
*/
.code64
.org 0x200
ENTRY(startup_64)
/*
* We come here either from startup_32 or directly from a
* 64bit bootloader. If we come here from a bootloader we depend on
* an identity mapped page table being provied that maps our
* entire text+data+bss and hopefully all of memory.
*/
/* Setup data segments. */
xorl %eax, %eax
movl %eax, %ds
movl %eax, %es
movl %eax, %ss
movl %eax, %fs
movl %eax, %gs
lldt %ax
movl $0x20, %eax
ltr %ax
/*
* Compute the decompressed kernel start address. It is where
* we were loaded at aligned to a 2M boundary. %rbp contains the
* decompressed kernel start address.
*
* If it is a relocatable kernel then decompress and run the kernel
* from load address aligned to 2MB addr, otherwise decompress and
* run the kernel from CONFIG_PHYSICAL_START
*/
/* Start with the delta to where the kernel will run at. */
#ifdef CONFIG_RELOCATABLE
leaq startup_32(%rip) /* - $startup_32 */, %rbp
addq $(PMD_PAGE_SIZE - 1), %rbp
andq $PMD_PAGE_MASK, %rbp
movq %rbp, %rbx
#else
movq $CONFIG_PHYSICAL_START, %rbp
movq %rbp, %rbx
#endif
/* Replace the compressed data size with the uncompressed size */
movl input_len(%rip), %eax
subq %rax, %rbx
movl output_len(%rip), %eax
addq %rax, %rbx
/* Add 8 bytes for every 32K input block */
shrq $12, %rax
addq %rax, %rbx
/* Add 32K + 18 bytes of extra slack and align on a 4K boundary */
addq $(32768 + 18 + 4095), %rbx
andq $~4095, %rbx
/* Set up the stack */
leaq boot_stack_end(%rbx), %rsp
/* Zero EFLAGS */
pushq $0
popfq
/*
* Copy the compressed kernel to the end of our buffer
* where decompression in place becomes safe.
*/
pushq %rsi
leaq (_bss-8)(%rip), %rsi
leaq (_bss-8)(%rbx), %rdi
movq $_bss /* - $startup_32 */, %rcx
shrq $3, %rcx
std
rep movsq
cld
popq %rsi
/*
* Jump to the relocated address.
*/
leaq relocated(%rbx), %rax
jmp *%rax
.text
relocated:
/*
* Clear BSS (stack is currently empty)
*/
xorl %eax, %eax
leaq _bss(%rip), %rdi
leaq _ebss(%rip), %rcx
subq %rdi, %rcx
shrq $3, %rcx
rep stosq
/*
* Do the decompression, and jump to the new kernel..
*/
pushq %rsi # Save the real mode argument
movq %rsi, %rdi # real mode address
leaq boot_heap(%rip), %rsi # malloc area for uncompression
leaq input_data(%rip), %rdx # input_data
movl input_len(%rip), %eax
movq %rax, %rcx # input_len
movq %rbp, %r8 # output
call decompress_kernel
popq %rsi
/*
* Jump to the decompressed kernel.
*/
jmp *%rbp
.data
gdt:
.word gdt_end - gdt
.long gdt
.word 0
.quad 0x0000000000000000 /* NULL descriptor */
.quad 0x00af9a000000ffff /* __KERNEL_CS */
.quad 0x00cf92000000ffff /* __KERNEL_DS */
.quad 0x0080890000000000 /* TS descriptor */
.quad 0x0000000000000000 /* TS continued */
gdt_end:
/*
* Stack and heap for uncompression
*/
.bss
.balign 4
boot_heap:
.fill BOOT_HEAP_SIZE, 1, 0
boot_stack:
.fill BOOT_STACK_SIZE, 1, 0
boot_stack_end:
/*
* Space for page tables (not in .bss so not zeroed)
*/
.section ".pgtable","a",@nobits
.balign 4096
pgtable:
.fill 6*4096, 1, 0