368ff8f14d
We find the runtime address of _stext and relocate ourselves based on the following calculation. virtual_base = ALIGN(KERNELBASE,KERNEL_TLB_PIN_SIZE) + MODULO(_stext.run,KERNEL_TLB_PIN_SIZE) relocate() is called with the Effective Virtual Base Address (as shown below) | Phys. Addr| Virt. Addr | Page |------------------------| Boundary | | | | | | | | | Kernel Load |___________|_ __ _ _ _ _|<- Effective Addr(_stext)| | ^ |Virt. Base Addr | | | | | | | | | |reloc_offset| | | | | | | | | | |______v_____|<-(KERNELBASE)%TLB_SIZE | | | | | | | | | Page |-----------|------------| Boundary | | | On BookE, we need __va() & __pa() early in the boot process to access the device tree. Currently this has been defined as : #define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) - PHYSICAL_START + KERNELBASE) where: PHYSICAL_START is kernstart_addr - a variable updated at runtime. KERNELBASE is the compile time Virtual base address of kernel. This won't work for us, as kernstart_addr is dynamic and will yield different results for __va()/__pa() for same mapping. e.g., Let the kernel be loaded at 64MB and KERNELBASE be 0xc0000000 (same as PAGE_OFFSET). In this case, we would be mapping 0 to 0xc0000000, and kernstart_addr = 64M Now __va(1MB) = (0x100000) - (0x4000000) + 0xc0000000 = 0xbc100000 , which is wrong. it should be : 0xc0000000 + 0x100000 = 0xc0100000 On platforms which support AMP, like PPC_47x (based on 44x), the kernel could be loaded at highmem. Hence we cannot always depend on the compile time constants for mapping. Here are the possible solutions: 1) Update kernstart_addr(PHSYICAL_START) to match the Physical address of compile time KERNELBASE value, instead of the actual Physical_Address(_stext). The disadvantage is that we may break other users of PHYSICAL_START. They could be replaced with __pa(_stext). 2) Redefine __va() & __pa() with relocation offset #ifdef CONFIG_RELOCATABLE_PPC32 #define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) - PHYSICAL_START + (KERNELBASE + RELOC_OFFSET))) #define __pa(x) ((unsigned long)(x) + PHYSICAL_START - (KERNELBASE + RELOC_OFFSET)) #endif where, RELOC_OFFSET could be a) A variable, say relocation_offset (like kernstart_addr), updated at boot time. This impacts performance, as we have to load an additional variable from memory. OR b) #define RELOC_OFFSET ((PHYSICAL_START & PPC_PIN_SIZE_OFFSET_MASK) - \ (KERNELBASE & PPC_PIN_SIZE_OFFSET_MASK)) This introduces more calculations for doing the translation. 3) Redefine __va() & __pa() with a new variable i.e, #define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) + VIRT_PHYS_OFFSET)) where VIRT_PHYS_OFFSET : #ifdef CONFIG_RELOCATABLE_PPC32 #define VIRT_PHYS_OFFSET virt_phys_offset #else #define VIRT_PHYS_OFFSET (KERNELBASE - PHYSICAL_START) #endif /* CONFIG_RELOCATABLE_PPC32 */ where virt_phy_offset is updated at runtime to : Effective KERNELBASE - kernstart_addr. Taking our example, above: virt_phys_offset = effective_kernelstart_vaddr - kernstart_addr = 0xc0400000 - 0x400000 = 0xc0000000 and __va(0x100000) = 0xc0000000 + 0x100000 = 0xc0100000 which is what we want. I have implemented (3) in the following patch which has same cost of operation as the existing one. I have tested the patches on 440x platforms only. However this should work fine for PPC_47x also, as we only depend on the runtime address and the current TLB XLAT entry for the startup code, which is available in r25. I don't have access to a 47x board yet. So, it would be great if somebody could test this on 47x. Signed-off-by: Suzuki K. Poulose <suzuki@in.ibm.com> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Kumar Gala <galak@kernel.crashing.org> Cc: linuxppc-dev <linuxppc-dev@lists.ozlabs.org> Signed-off-by: Josh Boyer <jwboyer@gmail.com>
386 lines
11 KiB
C
386 lines
11 KiB
C
#ifndef _ASM_POWERPC_PAGE_H
|
|
#define _ASM_POWERPC_PAGE_H
|
|
|
|
/*
|
|
* Copyright (C) 2001,2005 IBM Corporation.
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version
|
|
* 2 of the License, or (at your option) any later version.
|
|
*/
|
|
|
|
#ifndef __ASSEMBLY__
|
|
#include <linux/types.h>
|
|
#else
|
|
#include <asm/types.h>
|
|
#endif
|
|
#include <asm/asm-compat.h>
|
|
#include <asm/kdump.h>
|
|
|
|
/*
|
|
* On regular PPC32 page size is 4K (but we support 4K/16K/64K/256K pages
|
|
* on PPC44x). For PPC64 we support either 4K or 64K software
|
|
* page size. When using 64K pages however, whether we are really supporting
|
|
* 64K pages in HW or not is irrelevant to those definitions.
|
|
*/
|
|
#if defined(CONFIG_PPC_256K_PAGES)
|
|
#define PAGE_SHIFT 18
|
|
#elif defined(CONFIG_PPC_64K_PAGES)
|
|
#define PAGE_SHIFT 16
|
|
#elif defined(CONFIG_PPC_16K_PAGES)
|
|
#define PAGE_SHIFT 14
|
|
#else
|
|
#define PAGE_SHIFT 12
|
|
#endif
|
|
|
|
#define PAGE_SIZE (ASM_CONST(1) << PAGE_SHIFT)
|
|
|
|
#ifndef __ASSEMBLY__
|
|
#ifdef CONFIG_HUGETLB_PAGE
|
|
extern unsigned int HPAGE_SHIFT;
|
|
#else
|
|
#define HPAGE_SHIFT PAGE_SHIFT
|
|
#endif
|
|
#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT)
|
|
#define HPAGE_MASK (~(HPAGE_SIZE - 1))
|
|
#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
|
|
#define HUGE_MAX_HSTATE (MMU_PAGE_COUNT-1)
|
|
#endif
|
|
|
|
/* We do define AT_SYSINFO_EHDR but don't use the gate mechanism */
|
|
#define __HAVE_ARCH_GATE_AREA 1
|
|
|
|
/*
|
|
* Subtle: (1 << PAGE_SHIFT) is an int, not an unsigned long. So if we
|
|
* assign PAGE_MASK to a larger type it gets extended the way we want
|
|
* (i.e. with 1s in the high bits)
|
|
*/
|
|
#define PAGE_MASK (~((1 << PAGE_SHIFT) - 1))
|
|
|
|
/*
|
|
* KERNELBASE is the virtual address of the start of the kernel, it's often
|
|
* the same as PAGE_OFFSET, but _might not be_.
|
|
*
|
|
* The kdump dump kernel is one example where KERNELBASE != PAGE_OFFSET.
|
|
*
|
|
* PAGE_OFFSET is the virtual address of the start of lowmem.
|
|
*
|
|
* PHYSICAL_START is the physical address of the start of the kernel.
|
|
*
|
|
* MEMORY_START is the physical address of the start of lowmem.
|
|
*
|
|
* KERNELBASE, PAGE_OFFSET, and PHYSICAL_START are all configurable on
|
|
* ppc32 and based on how they are set we determine MEMORY_START.
|
|
*
|
|
* For the linear mapping the following equation should be true:
|
|
* KERNELBASE - PAGE_OFFSET = PHYSICAL_START - MEMORY_START
|
|
*
|
|
* Also, KERNELBASE >= PAGE_OFFSET and PHYSICAL_START >= MEMORY_START
|
|
*
|
|
* There are two was to determine a physical address from a virtual one:
|
|
* va = pa + PAGE_OFFSET - MEMORY_START
|
|
* va = pa + KERNELBASE - PHYSICAL_START
|
|
*
|
|
* If you want to know something's offset from the start of the kernel you
|
|
* should subtract KERNELBASE.
|
|
*
|
|
* If you want to test if something's a kernel address, use is_kernel_addr().
|
|
*/
|
|
|
|
#define KERNELBASE ASM_CONST(CONFIG_KERNEL_START)
|
|
#define PAGE_OFFSET ASM_CONST(CONFIG_PAGE_OFFSET)
|
|
#define LOAD_OFFSET ASM_CONST((CONFIG_KERNEL_START-CONFIG_PHYSICAL_START))
|
|
|
|
#if defined(CONFIG_NONSTATIC_KERNEL)
|
|
#ifndef __ASSEMBLY__
|
|
|
|
extern phys_addr_t memstart_addr;
|
|
extern phys_addr_t kernstart_addr;
|
|
|
|
#ifdef CONFIG_RELOCATABLE_PPC32
|
|
extern long long virt_phys_offset;
|
|
#endif
|
|
|
|
#endif /* __ASSEMBLY__ */
|
|
#define PHYSICAL_START kernstart_addr
|
|
|
|
#else /* !CONFIG_NONSTATIC_KERNEL */
|
|
#define PHYSICAL_START ASM_CONST(CONFIG_PHYSICAL_START)
|
|
#endif
|
|
|
|
/* See Description below for VIRT_PHYS_OFFSET */
|
|
#ifdef CONFIG_RELOCATABLE_PPC32
|
|
#define VIRT_PHYS_OFFSET virt_phys_offset
|
|
#else
|
|
#define VIRT_PHYS_OFFSET (KERNELBASE - PHYSICAL_START)
|
|
#endif
|
|
|
|
|
|
#ifdef CONFIG_PPC64
|
|
#define MEMORY_START 0UL
|
|
#elif defined(CONFIG_NONSTATIC_KERNEL)
|
|
#define MEMORY_START memstart_addr
|
|
#else
|
|
#define MEMORY_START (PHYSICAL_START + PAGE_OFFSET - KERNELBASE)
|
|
#endif
|
|
|
|
#ifdef CONFIG_FLATMEM
|
|
#define ARCH_PFN_OFFSET ((unsigned long)(MEMORY_START >> PAGE_SHIFT))
|
|
#define pfn_valid(pfn) ((pfn) >= ARCH_PFN_OFFSET && (pfn) < max_mapnr)
|
|
#endif
|
|
|
|
#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
|
|
#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT)
|
|
#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
|
|
|
|
/*
|
|
* On Book-E parts we need __va to parse the device tree and we can't
|
|
* determine MEMORY_START until then. However we can determine PHYSICAL_START
|
|
* from information at hand (program counter, TLB lookup).
|
|
*
|
|
* On BookE with RELOCATABLE (RELOCATABLE_PPC32)
|
|
*
|
|
* With RELOCATABLE_PPC32, we support loading the kernel at any physical
|
|
* address without any restriction on the page alignment.
|
|
*
|
|
* We find the runtime address of _stext and relocate ourselves based on
|
|
* the following calculation:
|
|
*
|
|
* virtual_base = ALIGN_DOWN(KERNELBASE,256M) +
|
|
* MODULO(_stext.run,256M)
|
|
* and create the following mapping:
|
|
*
|
|
* ALIGN_DOWN(_stext.run,256M) => ALIGN_DOWN(KERNELBASE,256M)
|
|
*
|
|
* When we process relocations, we cannot depend on the
|
|
* existing equation for the __va()/__pa() translations:
|
|
*
|
|
* __va(x) = (x) - PHYSICAL_START + KERNELBASE
|
|
*
|
|
* Where:
|
|
* PHYSICAL_START = kernstart_addr = Physical address of _stext
|
|
* KERNELBASE = Compiled virtual address of _stext.
|
|
*
|
|
* This formula holds true iff, kernel load address is TLB page aligned.
|
|
*
|
|
* In our case, we need to also account for the shift in the kernel Virtual
|
|
* address.
|
|
*
|
|
* E.g.,
|
|
*
|
|
* Let the kernel be loaded at 64MB and KERNELBASE be 0xc0000000 (same as PAGE_OFFSET).
|
|
* In this case, we would be mapping 0 to 0xc0000000, and kernstart_addr = 64M
|
|
*
|
|
* Now __va(1MB) = (0x100000) - (0x4000000) + 0xc0000000
|
|
* = 0xbc100000 , which is wrong.
|
|
*
|
|
* Rather, it should be : 0xc0000000 + 0x100000 = 0xc0100000
|
|
* according to our mapping.
|
|
*
|
|
* Hence we use the following formula to get the translations right:
|
|
*
|
|
* __va(x) = (x) - [ PHYSICAL_START - Effective KERNELBASE ]
|
|
*
|
|
* Where :
|
|
* PHYSICAL_START = dynamic load address.(kernstart_addr variable)
|
|
* Effective KERNELBASE = virtual_base =
|
|
* = ALIGN_DOWN(KERNELBASE,256M) +
|
|
* MODULO(PHYSICAL_START,256M)
|
|
*
|
|
* To make the cost of __va() / __pa() more light weight, we introduce
|
|
* a new variable virt_phys_offset, which will hold :
|
|
*
|
|
* virt_phys_offset = Effective KERNELBASE - PHYSICAL_START
|
|
* = ALIGN_DOWN(KERNELBASE,256M) -
|
|
* ALIGN_DOWN(PHYSICALSTART,256M)
|
|
*
|
|
* Hence :
|
|
*
|
|
* __va(x) = x - PHYSICAL_START + Effective KERNELBASE
|
|
* = x + virt_phys_offset
|
|
*
|
|
* and
|
|
* __pa(x) = x + PHYSICAL_START - Effective KERNELBASE
|
|
* = x - virt_phys_offset
|
|
*
|
|
* On non-Book-E PPC64 PAGE_OFFSET and MEMORY_START are constants so use
|
|
* the other definitions for __va & __pa.
|
|
*/
|
|
#ifdef CONFIG_BOOKE
|
|
#define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) + VIRT_PHYS_OFFSET))
|
|
#define __pa(x) ((unsigned long)(x) - VIRT_PHYS_OFFSET)
|
|
#else
|
|
#define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) + PAGE_OFFSET - MEMORY_START))
|
|
#define __pa(x) ((unsigned long)(x) - PAGE_OFFSET + MEMORY_START)
|
|
#endif
|
|
|
|
/*
|
|
* Unfortunately the PLT is in the BSS in the PPC32 ELF ABI,
|
|
* and needs to be executable. This means the whole heap ends
|
|
* up being executable.
|
|
*/
|
|
#define VM_DATA_DEFAULT_FLAGS32 (VM_READ | VM_WRITE | VM_EXEC | \
|
|
VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
|
|
|
|
#define VM_DATA_DEFAULT_FLAGS64 (VM_READ | VM_WRITE | \
|
|
VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
|
|
|
|
#ifdef __powerpc64__
|
|
#include <asm/page_64.h>
|
|
#else
|
|
#include <asm/page_32.h>
|
|
#endif
|
|
|
|
/* align addr on a size boundary - adjust address up/down if needed */
|
|
#define _ALIGN_UP(addr,size) (((addr)+((size)-1))&(~((size)-1)))
|
|
#define _ALIGN_DOWN(addr,size) ((addr)&(~((size)-1)))
|
|
|
|
/* align addr on a size boundary - adjust address up if needed */
|
|
#define _ALIGN(addr,size) _ALIGN_UP(addr,size)
|
|
|
|
/*
|
|
* Don't compare things with KERNELBASE or PAGE_OFFSET to test for
|
|
* "kernelness", use is_kernel_addr() - it should do what you want.
|
|
*/
|
|
#ifdef CONFIG_PPC_BOOK3E_64
|
|
#define is_kernel_addr(x) ((x) >= 0x8000000000000000ul)
|
|
#else
|
|
#define is_kernel_addr(x) ((x) >= PAGE_OFFSET)
|
|
#endif
|
|
|
|
/*
|
|
* Use the top bit of the higher-level page table entries to indicate whether
|
|
* the entries we point to contain hugepages. This works because we know that
|
|
* the page tables live in kernel space. If we ever decide to support having
|
|
* page tables at arbitrary addresses, this breaks and will have to change.
|
|
*/
|
|
#ifdef CONFIG_PPC64
|
|
#define PD_HUGE 0x8000000000000000
|
|
#else
|
|
#define PD_HUGE 0x80000000
|
|
#endif
|
|
|
|
/*
|
|
* Some number of bits at the level of the page table that points to
|
|
* a hugepte are used to encode the size. This masks those bits.
|
|
*/
|
|
#define HUGEPD_SHIFT_MASK 0x3f
|
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
#undef STRICT_MM_TYPECHECKS
|
|
|
|
#ifdef STRICT_MM_TYPECHECKS
|
|
/* These are used to make use of C type-checking. */
|
|
|
|
/* PTE level */
|
|
typedef struct { pte_basic_t pte; } pte_t;
|
|
#define pte_val(x) ((x).pte)
|
|
#define __pte(x) ((pte_t) { (x) })
|
|
|
|
/* 64k pages additionally define a bigger "real PTE" type that gathers
|
|
* the "second half" part of the PTE for pseudo 64k pages
|
|
*/
|
|
#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64)
|
|
typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
|
|
#else
|
|
typedef struct { pte_t pte; } real_pte_t;
|
|
#endif
|
|
|
|
/* PMD level */
|
|
#ifdef CONFIG_PPC64
|
|
typedef struct { unsigned long pmd; } pmd_t;
|
|
#define pmd_val(x) ((x).pmd)
|
|
#define __pmd(x) ((pmd_t) { (x) })
|
|
|
|
/* PUD level exusts only on 4k pages */
|
|
#ifndef CONFIG_PPC_64K_PAGES
|
|
typedef struct { unsigned long pud; } pud_t;
|
|
#define pud_val(x) ((x).pud)
|
|
#define __pud(x) ((pud_t) { (x) })
|
|
#endif /* !CONFIG_PPC_64K_PAGES */
|
|
#endif /* CONFIG_PPC64 */
|
|
|
|
/* PGD level */
|
|
typedef struct { unsigned long pgd; } pgd_t;
|
|
#define pgd_val(x) ((x).pgd)
|
|
#define __pgd(x) ((pgd_t) { (x) })
|
|
|
|
/* Page protection bits */
|
|
typedef struct { unsigned long pgprot; } pgprot_t;
|
|
#define pgprot_val(x) ((x).pgprot)
|
|
#define __pgprot(x) ((pgprot_t) { (x) })
|
|
|
|
#else
|
|
|
|
/*
|
|
* .. while these make it easier on the compiler
|
|
*/
|
|
|
|
typedef pte_basic_t pte_t;
|
|
#define pte_val(x) (x)
|
|
#define __pte(x) (x)
|
|
|
|
#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64)
|
|
typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
|
|
#else
|
|
typedef pte_t real_pte_t;
|
|
#endif
|
|
|
|
|
|
#ifdef CONFIG_PPC64
|
|
typedef unsigned long pmd_t;
|
|
#define pmd_val(x) (x)
|
|
#define __pmd(x) (x)
|
|
|
|
#ifndef CONFIG_PPC_64K_PAGES
|
|
typedef unsigned long pud_t;
|
|
#define pud_val(x) (x)
|
|
#define __pud(x) (x)
|
|
#endif /* !CONFIG_PPC_64K_PAGES */
|
|
#endif /* CONFIG_PPC64 */
|
|
|
|
typedef unsigned long pgd_t;
|
|
#define pgd_val(x) (x)
|
|
#define pgprot_val(x) (x)
|
|
|
|
typedef unsigned long pgprot_t;
|
|
#define __pgd(x) (x)
|
|
#define __pgprot(x) (x)
|
|
|
|
#endif
|
|
|
|
typedef struct { signed long pd; } hugepd_t;
|
|
|
|
#ifdef CONFIG_HUGETLB_PAGE
|
|
static inline int hugepd_ok(hugepd_t hpd)
|
|
{
|
|
return (hpd.pd > 0);
|
|
}
|
|
|
|
#define is_hugepd(pdep) (hugepd_ok(*((hugepd_t *)(pdep))))
|
|
#else /* CONFIG_HUGETLB_PAGE */
|
|
#define is_hugepd(pdep) 0
|
|
#endif /* CONFIG_HUGETLB_PAGE */
|
|
|
|
struct page;
|
|
extern void clear_user_page(void *page, unsigned long vaddr, struct page *pg);
|
|
extern void copy_user_page(void *to, void *from, unsigned long vaddr,
|
|
struct page *p);
|
|
extern int page_is_ram(unsigned long pfn);
|
|
extern int devmem_is_allowed(unsigned long pfn);
|
|
|
|
#ifdef CONFIG_PPC_SMLPAR
|
|
void arch_free_page(struct page *page, int order);
|
|
#define HAVE_ARCH_FREE_PAGE
|
|
#endif
|
|
|
|
struct vm_area_struct;
|
|
|
|
typedef struct page *pgtable_t;
|
|
|
|
#include <asm-generic/memory_model.h>
|
|
#endif /* __ASSEMBLY__ */
|
|
|
|
#endif /* _ASM_POWERPC_PAGE_H */
|