Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 mm updates from Ingo Molnar: "The main x86 MM changes in this cycle were: - continued native kernel PCID support preparation patches to the TLB flushing code (Andy Lutomirski) - various fixes related to 32-bit compat syscall returning address over 4Gb in applications, launched from 64-bit binaries - motivated by C/R frameworks such as Virtuozzo. (Dmitry Safonov) - continued Intel 5-level paging enablement: in particular the conversion of x86 GUP to the generic GUP code. (Kirill A. Shutemov) - x86/mpx ABI corner case fixes/enhancements (Joerg Roedel) - ... plus misc updates, fixes and cleanups" * 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (62 commits) mm, zone_device: Replace {get, put}_zone_device_page() with a single reference to fix pmem crash x86/mm: Fix flush_tlb_page() on Xen x86/mm: Make flush_tlb_mm_range() more predictable x86/mm: Remove flush_tlb() and flush_tlb_current_task() x86/vm86/32: Switch to flush_tlb_mm_range() in mark_screen_rdonly() x86/mm/64: Fix crash in remove_pagetable() Revert "x86/mm/gup: Switch GUP to the generic get_user_page_fast() implementation" x86/boot/e820: Remove a redundant self assignment x86/mm: Fix dump pagetables for 4 levels of page tables x86/mpx, selftests: Only check bounds-vs-shadow when we keep shadow x86/mpx: Correctly report do_mpx_bt_fault() failures to user-space Revert "x86/mm/numa: Remove numa_nodemask_from_meminfo()" x86/espfix: Add support for 5-level paging x86/kasan: Extend KASAN to support 5-level paging x86/mm: Add basic defines/helpers for CONFIG_X86_5LEVEL=y x86/paravirt: Add 5-level support to the paravirt code x86/mm: Define virtual memory map for 5-level paging x86/asm: Remove __VIRTUAL_MASK_SHIFT==47 assert x86/boot: Detect 5-level paging support x86/mm/numa: Remove numa_nodemask_from_meminfo() ...
This commit is contained in:
commit
d3b5d35290
93 changed files with 1851 additions and 717 deletions
|
@ -4,7 +4,7 @@
|
|||
Virtual memory map with 4 level page tables:
|
||||
|
||||
0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm
|
||||
hole caused by [48:63] sign extension
|
||||
hole caused by [47:63] sign extension
|
||||
ffff800000000000 - ffff87ffffffffff (=43 bits) guard hole, reserved for hypervisor
|
||||
ffff880000000000 - ffffc7ffffffffff (=64 TB) direct mapping of all phys. memory
|
||||
ffffc80000000000 - ffffc8ffffffffff (=40 bits) hole
|
||||
|
@ -19,16 +19,43 @@ ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
|
|||
ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
|
||||
... unused hole ...
|
||||
ffffffff80000000 - ffffffff9fffffff (=512 MB) kernel text mapping, from phys 0
|
||||
ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space (variable)
|
||||
ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls
|
||||
ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
|
||||
|
||||
Virtual memory map with 5 level page tables:
|
||||
|
||||
0000000000000000 - 00ffffffffffffff (=56 bits) user space, different per mm
|
||||
hole caused by [56:63] sign extension
|
||||
ff00000000000000 - ff0fffffffffffff (=52 bits) guard hole, reserved for hypervisor
|
||||
ff10000000000000 - ff8fffffffffffff (=55 bits) direct mapping of all phys. memory
|
||||
ff90000000000000 - ff91ffffffffffff (=49 bits) hole
|
||||
ff92000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space
|
||||
ffd2000000000000 - ffd3ffffffffffff (=49 bits) hole
|
||||
ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB)
|
||||
... unused hole ...
|
||||
ffd8000000000000 - fff7ffffffffffff (=53 bits) kasan shadow memory (8PB)
|
||||
... unused hole ...
|
||||
ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
|
||||
... unused hole ...
|
||||
ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
|
||||
... unused hole ...
|
||||
ffffffff80000000 - ffffffff9fffffff (=512 MB) kernel text mapping, from phys 0
|
||||
ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space
|
||||
ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls
|
||||
ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
|
||||
|
||||
Architecture defines a 64-bit virtual address. Implementations can support
|
||||
less. Currently supported are 48- and 57-bit virtual addresses. Bits 63
|
||||
through to the most-significant implemented bit are set to either all ones
|
||||
or all zero. This causes hole between user space and kernel addresses.
|
||||
|
||||
The direct mapping covers all memory in the system up to the highest
|
||||
memory address (this means in some cases it can also include PCI memory
|
||||
holes).
|
||||
|
||||
vmalloc space is lazily synchronized into the different PML4 pages of
|
||||
the processes using the page fault handler, with init_level4_pgt as
|
||||
vmalloc space is lazily synchronized into the different PML4/PML5 pages of
|
||||
the processes using the page fault handler, with init_top_pgt as
|
||||
reference.
|
||||
|
||||
Current X86-64 implementations support up to 46 bits of address space (64 TB),
|
||||
|
@ -39,6 +66,9 @@ memory window (this size is arbitrary, it can be raised later if needed).
|
|||
The mappings are not part of any other kernel PGD and are only available
|
||||
during EFI runtime calls.
|
||||
|
||||
The module mapping space size changes based on the CONFIG requirements for the
|
||||
following fixmap section.
|
||||
|
||||
Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all
|
||||
physical memory, vmalloc/ioremap space and virtual memory map are randomized.
|
||||
Their order is preserved but their base will be offset early at boot time.
|
||||
|
|
|
@ -700,6 +700,13 @@ config ARCH_MMAP_RND_COMPAT_BITS
|
|||
This value can be changed after boot using the
|
||||
/proc/sys/vm/mmap_rnd_compat_bits tunable
|
||||
|
||||
config HAVE_ARCH_COMPAT_MMAP_BASES
|
||||
bool
|
||||
help
|
||||
This allows 64bit applications to invoke 32-bit mmap() syscall
|
||||
and vice-versa 32-bit applications to call 64-bit mmap().
|
||||
Required for applications doing different bitness syscalls.
|
||||
|
||||
config HAVE_COPY_THREAD_TLS
|
||||
bool
|
||||
help
|
||||
|
|
|
@ -163,11 +163,5 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
|
|||
/* by default, allow everything */
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool arch_pte_access_permitted(pte_t pte, bool write)
|
||||
{
|
||||
/* by default, allow everything */
|
||||
return true;
|
||||
}
|
||||
#endif /* __KERNEL__ */
|
||||
#endif /* __ASM_POWERPC_MMU_CONTEXT_H */
|
||||
|
|
|
@ -156,10 +156,4 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
|
|||
/* by default, allow everything */
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool arch_pte_access_permitted(pte_t pte, bool write)
|
||||
{
|
||||
/* by default, allow everything */
|
||||
return true;
|
||||
}
|
||||
#endif /* __S390_MMU_CONTEXT_H */
|
||||
|
|
|
@ -37,12 +37,6 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
|
|||
return true;
|
||||
}
|
||||
|
||||
static inline bool arch_pte_access_permitted(pte_t pte, bool write)
|
||||
{
|
||||
/* by default, allow everything */
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* end asm-generic/mm_hooks.h functions
|
||||
*/
|
||||
|
|
|
@ -103,10 +103,4 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
|
|||
/* by default, allow everything */
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool arch_pte_access_permitted(pte_t pte, bool write)
|
||||
{
|
||||
/* by default, allow everything */
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -105,6 +105,7 @@ config X86
|
|||
select HAVE_ARCH_KMEMCHECK
|
||||
select HAVE_ARCH_MMAP_RND_BITS if MMU
|
||||
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT
|
||||
select HAVE_ARCH_COMPAT_MMAP_BASES if MMU && COMPAT
|
||||
select HAVE_ARCH_SECCOMP_FILTER
|
||||
select HAVE_ARCH_TRACEHOOK
|
||||
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
|
||||
|
@ -289,6 +290,7 @@ config ARCH_SUPPORTS_DEBUG_PAGEALLOC
|
|||
config KASAN_SHADOW_OFFSET
|
||||
hex
|
||||
depends on KASAN
|
||||
default 0xdff8000000000000 if X86_5LEVEL
|
||||
default 0xdffffc0000000000
|
||||
|
||||
config HAVE_INTEL_TXT
|
||||
|
|
|
@ -44,6 +44,15 @@ static const u32 req_flags[NCAPINTS] =
|
|||
0, /* REQUIRED_MASK5 not implemented in this file */
|
||||
REQUIRED_MASK6,
|
||||
0, /* REQUIRED_MASK7 not implemented in this file */
|
||||
0, /* REQUIRED_MASK8 not implemented in this file */
|
||||
0, /* REQUIRED_MASK9 not implemented in this file */
|
||||
0, /* REQUIRED_MASK10 not implemented in this file */
|
||||
0, /* REQUIRED_MASK11 not implemented in this file */
|
||||
0, /* REQUIRED_MASK12 not implemented in this file */
|
||||
0, /* REQUIRED_MASK13 not implemented in this file */
|
||||
0, /* REQUIRED_MASK14 not implemented in this file */
|
||||
0, /* REQUIRED_MASK15 not implemented in this file */
|
||||
REQUIRED_MASK16,
|
||||
};
|
||||
|
||||
#define A32(a, b, c, d) (((d) << 24)+((c) << 16)+((b) << 8)+(a))
|
||||
|
|
|
@ -70,16 +70,19 @@ int has_eflag(unsigned long mask)
|
|||
# define EBX_REG "=b"
|
||||
#endif
|
||||
|
||||
static inline void cpuid(u32 id, u32 *a, u32 *b, u32 *c, u32 *d)
|
||||
static inline void cpuid_count(u32 id, u32 count,
|
||||
u32 *a, u32 *b, u32 *c, u32 *d)
|
||||
{
|
||||
asm volatile(".ifnc %%ebx,%3 ; movl %%ebx,%3 ; .endif \n\t"
|
||||
"cpuid \n\t"
|
||||
".ifnc %%ebx,%3 ; xchgl %%ebx,%3 ; .endif \n\t"
|
||||
: "=a" (*a), "=c" (*c), "=d" (*d), EBX_REG (*b)
|
||||
: "a" (id)
|
||||
: "a" (id), "c" (count)
|
||||
);
|
||||
}
|
||||
|
||||
#define cpuid(id, a, b, c, d) cpuid_count(id, 0, a, b, c, d)
|
||||
|
||||
void get_cpuflags(void)
|
||||
{
|
||||
u32 max_intel_level, max_amd_level;
|
||||
|
@ -108,6 +111,11 @@ void get_cpuflags(void)
|
|||
cpu.model += ((tfms >> 16) & 0xf) << 4;
|
||||
}
|
||||
|
||||
if (max_intel_level >= 0x00000007) {
|
||||
cpuid_count(0x00000007, 0, &ignored, &ignored,
|
||||
&cpu.flags[16], &ignored);
|
||||
}
|
||||
|
||||
cpuid(0x80000000, &max_amd_level, &ignored, &ignored,
|
||||
&ignored);
|
||||
|
||||
|
|
|
@ -265,12 +265,9 @@ return_from_SYSCALL_64:
|
|||
*
|
||||
* If width of "canonical tail" ever becomes variable, this will need
|
||||
* to be updated to remain correct on both old and new CPUs.
|
||||
*
|
||||
* Change top 16 bits to be the sign-extension of 47th bit
|
||||
*/
|
||||
.ifne __VIRTUAL_MASK_SHIFT - 47
|
||||
.error "virtual address width changed -- SYSRET checks need update"
|
||||
.endif
|
||||
|
||||
/* Change top 16 bits to be the sign-extension of 47th bit */
|
||||
shl $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
|
||||
sar $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
|
||||
|
||||
|
|
|
@ -361,7 +361,7 @@ static void vgetcpu_cpu_init(void *arg)
|
|||
d.p = 1; /* Present */
|
||||
d.d = 1; /* 32-bit */
|
||||
|
||||
write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
|
||||
write_gdt_entry(get_cpu_gdt_rw(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
|
||||
}
|
||||
|
||||
static int vgetcpu_online(unsigned int cpu)
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
#include <asm/desc_defs.h>
|
||||
#include <asm/ldt.h>
|
||||
#include <asm/mmu.h>
|
||||
#include <asm/fixmap.h>
|
||||
|
||||
#include <linux/smp.h>
|
||||
#include <linux/percpu.h>
|
||||
|
@ -45,11 +46,43 @@ struct gdt_page {
|
|||
|
||||
DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page);
|
||||
|
||||
static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
|
||||
/* Provide the original GDT */
|
||||
static inline struct desc_struct *get_cpu_gdt_rw(unsigned int cpu)
|
||||
{
|
||||
return per_cpu(gdt_page, cpu).gdt;
|
||||
}
|
||||
|
||||
/* Provide the current original GDT */
|
||||
static inline struct desc_struct *get_current_gdt_rw(void)
|
||||
{
|
||||
return this_cpu_ptr(&gdt_page)->gdt;
|
||||
}
|
||||
|
||||
/* Get the fixmap index for a specific processor */
|
||||
static inline unsigned int get_cpu_gdt_ro_index(int cpu)
|
||||
{
|
||||
return FIX_GDT_REMAP_BEGIN + cpu;
|
||||
}
|
||||
|
||||
/* Provide the fixmap address of the remapped GDT */
|
||||
static inline struct desc_struct *get_cpu_gdt_ro(int cpu)
|
||||
{
|
||||
unsigned int idx = get_cpu_gdt_ro_index(cpu);
|
||||
return (struct desc_struct *)__fix_to_virt(idx);
|
||||
}
|
||||
|
||||
/* Provide the current read-only GDT */
|
||||
static inline struct desc_struct *get_current_gdt_ro(void)
|
||||
{
|
||||
return get_cpu_gdt_ro(smp_processor_id());
|
||||
}
|
||||
|
||||
/* Provide the physical address of the GDT page. */
|
||||
static inline phys_addr_t get_cpu_gdt_paddr(unsigned int cpu)
|
||||
{
|
||||
return per_cpu_ptr_to_phys(get_cpu_gdt_rw(cpu));
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
||||
static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func,
|
||||
|
@ -174,7 +207,7 @@ static inline void set_tssldt_descriptor(void *d, unsigned long addr, unsigned t
|
|||
|
||||
static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr)
|
||||
{
|
||||
struct desc_struct *d = get_cpu_gdt_table(cpu);
|
||||
struct desc_struct *d = get_cpu_gdt_rw(cpu);
|
||||
tss_desc tss;
|
||||
|
||||
set_tssldt_descriptor(&tss, (unsigned long)addr, DESC_TSS,
|
||||
|
@ -194,22 +227,90 @@ static inline void native_set_ldt(const void *addr, unsigned int entries)
|
|||
|
||||
set_tssldt_descriptor(&ldt, (unsigned long)addr, DESC_LDT,
|
||||
entries * LDT_ENTRY_SIZE - 1);
|
||||
write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT,
|
||||
write_gdt_entry(get_cpu_gdt_rw(cpu), GDT_ENTRY_LDT,
|
||||
&ldt, DESC_LDT);
|
||||
asm volatile("lldt %w0"::"q" (GDT_ENTRY_LDT*8));
|
||||
}
|
||||
}
|
||||
|
||||
static inline void native_load_gdt(const struct desc_ptr *dtr)
|
||||
{
|
||||
asm volatile("lgdt %0"::"m" (*dtr));
|
||||
}
|
||||
|
||||
static inline void native_load_idt(const struct desc_ptr *dtr)
|
||||
{
|
||||
asm volatile("lidt %0"::"m" (*dtr));
|
||||
}
|
||||
|
||||
static inline void native_store_gdt(struct desc_ptr *dtr)
|
||||
{
|
||||
asm volatile("sgdt %0":"=m" (*dtr));
|
||||
}
|
||||
|
||||
static inline void native_store_idt(struct desc_ptr *dtr)
|
||||
{
|
||||
asm volatile("sidt %0":"=m" (*dtr));
|
||||
}
|
||||
|
||||
/*
|
||||
* The LTR instruction marks the TSS GDT entry as busy. On 64-bit, the GDT is
|
||||
* a read-only remapping. To prevent a page fault, the GDT is switched to the
|
||||
* original writeable version when needed.
|
||||
*/
|
||||
#ifdef CONFIG_X86_64
|
||||
static inline void native_load_tr_desc(void)
|
||||
{
|
||||
struct desc_ptr gdt;
|
||||
int cpu = raw_smp_processor_id();
|
||||
bool restore = 0;
|
||||
struct desc_struct *fixmap_gdt;
|
||||
|
||||
native_store_gdt(&gdt);
|
||||
fixmap_gdt = get_cpu_gdt_ro(cpu);
|
||||
|
||||
/*
|
||||
* If the current GDT is the read-only fixmap, swap to the original
|
||||
* writeable version. Swap back at the end.
|
||||
*/
|
||||
if (gdt.address == (unsigned long)fixmap_gdt) {
|
||||
load_direct_gdt(cpu);
|
||||
restore = 1;
|
||||
}
|
||||
asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
|
||||
if (restore)
|
||||
load_fixmap_gdt(cpu);
|
||||
}
|
||||
#else
|
||||
static inline void native_load_tr_desc(void)
|
||||
{
|
||||
asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline unsigned long native_store_tr(void)
|
||||
{
|
||||
unsigned long tr;
|
||||
|
||||
asm volatile("str %0":"=r" (tr));
|
||||
|
||||
return tr;
|
||||
}
|
||||
|
||||
static inline void native_load_tls(struct thread_struct *t, unsigned int cpu)
|
||||
{
|
||||
struct desc_struct *gdt = get_cpu_gdt_rw(cpu);
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
|
||||
gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i];
|
||||
}
|
||||
|
||||
DECLARE_PER_CPU(bool, __tss_limit_invalid);
|
||||
|
||||
static inline void force_reload_TR(void)
|
||||
{
|
||||
struct desc_struct *d = get_cpu_gdt_table(smp_processor_id());
|
||||
struct desc_struct *d = get_current_gdt_rw();
|
||||
tss_desc tss;
|
||||
|
||||
memcpy(&tss, &d[GDT_ENTRY_TSS], sizeof(tss_desc));
|
||||
|
@ -257,44 +358,6 @@ static inline void invalidate_tss_limit(void)
|
|||
this_cpu_write(__tss_limit_invalid, true);
|
||||
}
|
||||
|
||||
static inline void native_load_gdt(const struct desc_ptr *dtr)
|
||||
{
|
||||
asm volatile("lgdt %0"::"m" (*dtr));
|
||||
}
|
||||
|
||||
static inline void native_load_idt(const struct desc_ptr *dtr)
|
||||
{
|
||||
asm volatile("lidt %0"::"m" (*dtr));
|
||||
}
|
||||
|
||||
static inline void native_store_gdt(struct desc_ptr *dtr)
|
||||
{
|
||||
asm volatile("sgdt %0":"=m" (*dtr));
|
||||
}
|
||||
|
||||
static inline void native_store_idt(struct desc_ptr *dtr)
|
||||
{
|
||||
asm volatile("sidt %0":"=m" (*dtr));
|
||||
}
|
||||
|
||||
static inline unsigned long native_store_tr(void)
|
||||
{
|
||||
unsigned long tr;
|
||||
|
||||
asm volatile("str %0":"=r" (tr));
|
||||
|
||||
return tr;
|
||||
}
|
||||
|
||||
static inline void native_load_tls(struct thread_struct *t, unsigned int cpu)
|
||||
{
|
||||
struct desc_struct *gdt = get_cpu_gdt_table(cpu);
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
|
||||
gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i];
|
||||
}
|
||||
|
||||
/* This intentionally ignores lm, since 32-bit apps don't have that field. */
|
||||
#define LDT_empty(info) \
|
||||
((info)->base_addr == 0 && \
|
||||
|
|
|
@ -36,6 +36,12 @@
|
|||
# define DISABLE_OSPKE (1<<(X86_FEATURE_OSPKE & 31))
|
||||
#endif /* CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS */
|
||||
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
# define DISABLE_LA57 0
|
||||
#else
|
||||
# define DISABLE_LA57 (1<<(X86_FEATURE_LA57 & 31))
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Make sure to add features to the correct mask
|
||||
*/
|
||||
|
@ -55,7 +61,7 @@
|
|||
#define DISABLED_MASK13 0
|
||||
#define DISABLED_MASK14 0
|
||||
#define DISABLED_MASK15 0
|
||||
#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE)
|
||||
#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57)
|
||||
#define DISABLED_MASK17 0
|
||||
#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
|
||||
|
||||
|
|
|
@ -293,8 +293,23 @@ do { \
|
|||
} \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* True on X86_32 or when emulating IA32 on X86_64
|
||||
*/
|
||||
static inline int mmap_is_ia32(void)
|
||||
{
|
||||
return IS_ENABLED(CONFIG_X86_32) ||
|
||||
(IS_ENABLED(CONFIG_COMPAT) &&
|
||||
test_thread_flag(TIF_ADDR32));
|
||||
}
|
||||
|
||||
extern unsigned long tasksize_32bit(void);
|
||||
extern unsigned long tasksize_64bit(void);
|
||||
extern unsigned long get_mmap_base(int is_legacy);
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
|
||||
#define __STACK_RND_MASK(is32bit) (0x7ff)
|
||||
#define STACK_RND_MASK (0x7ff)
|
||||
|
||||
#define ARCH_DLINFO ARCH_DLINFO_IA32
|
||||
|
@ -304,7 +319,8 @@ do { \
|
|||
#else /* CONFIG_X86_32 */
|
||||
|
||||
/* 1GB for 64bit, 8MB for 32bit */
|
||||
#define STACK_RND_MASK (test_thread_flag(TIF_ADDR32) ? 0x7ff : 0x3fffff)
|
||||
#define __STACK_RND_MASK(is32bit) ((is32bit) ? 0x7ff : 0x3fffff)
|
||||
#define STACK_RND_MASK __STACK_RND_MASK(mmap_is_ia32())
|
||||
|
||||
#define ARCH_DLINFO \
|
||||
do { \
|
||||
|
@ -348,16 +364,6 @@ extern int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
|
|||
int uses_interp);
|
||||
#define compat_arch_setup_additional_pages compat_arch_setup_additional_pages
|
||||
|
||||
/*
|
||||
* True on X86_32 or when emulating IA32 on X86_64
|
||||
*/
|
||||
static inline int mmap_is_ia32(void)
|
||||
{
|
||||
return IS_ENABLED(CONFIG_X86_32) ||
|
||||
(IS_ENABLED(CONFIG_COMPAT) &&
|
||||
test_thread_flag(TIF_ADDR32));
|
||||
}
|
||||
|
||||
/* Do not change the values. See get_align_mask() */
|
||||
enum align_flags {
|
||||
ALIGN_VA_32 = BIT(0),
|
||||
|
|
|
@ -100,6 +100,10 @@ enum fixed_addresses {
|
|||
#ifdef CONFIG_X86_INTEL_MID
|
||||
FIX_LNW_VRTC,
|
||||
#endif
|
||||
/* Fixmap entries to remap the GDTs, one per processor. */
|
||||
FIX_GDT_REMAP_BEGIN,
|
||||
FIX_GDT_REMAP_END = FIX_GDT_REMAP_BEGIN + NR_CPUS - 1,
|
||||
|
||||
__end_of_permanent_fixed_addresses,
|
||||
|
||||
/*
|
||||
|
|
|
@ -11,9 +11,12 @@
|
|||
* 'kernel address space start' >> KASAN_SHADOW_SCALE_SHIFT
|
||||
*/
|
||||
#define KASAN_SHADOW_START (KASAN_SHADOW_OFFSET + \
|
||||
(0xffff800000000000ULL >> 3))
|
||||
/* 47 bits for kernel address -> (47 - 3) bits for shadow */
|
||||
#define KASAN_SHADOW_END (KASAN_SHADOW_START + (1ULL << (47 - 3)))
|
||||
((-1UL << __VIRTUAL_MASK_SHIFT) >> 3))
|
||||
/*
|
||||
* 47 bits for kernel address -> (47 - 3) bits for shadow
|
||||
* 56 bits for kernel address -> (56 - 3) bits for shadow
|
||||
*/
|
||||
#define KASAN_SHADOW_END (KASAN_SHADOW_START + (1ULL << (__VIRTUAL_MASK_SHIFT - 3)))
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
|
|
|
@ -164,6 +164,7 @@ struct kimage_arch {
|
|||
};
|
||||
#else
|
||||
struct kimage_arch {
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
pte_t *pte;
|
||||
|
|
|
@ -268,8 +268,4 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
|
|||
return __pkru_allows_pkey(vma_pkey(vma), write);
|
||||
}
|
||||
|
||||
static inline bool arch_pte_access_permitted(pte_t pte, bool write)
|
||||
{
|
||||
return __pkru_allows_pkey(pte_flags_pkey(pte_flags(pte)), write);
|
||||
}
|
||||
#endif /* _ASM_X86_MMU_CONTEXT_H */
|
||||
|
|
|
@ -36,7 +36,12 @@
|
|||
* hypervisor to fit. Choosing 16 slots here is arbitrary, but it's
|
||||
* what Xen requires.
|
||||
*/
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
#define __PAGE_OFFSET_BASE _AC(0xff10000000000000, UL)
|
||||
#else
|
||||
#define __PAGE_OFFSET_BASE _AC(0xffff880000000000, UL)
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_RANDOMIZE_MEMORY
|
||||
#define __PAGE_OFFSET page_offset_base
|
||||
#else
|
||||
|
@ -46,8 +51,13 @@
|
|||
#define __START_KERNEL_map _AC(0xffffffff80000000, UL)
|
||||
|
||||
/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
#define __PHYSICAL_MASK_SHIFT 52
|
||||
#define __VIRTUAL_MASK_SHIFT 56
|
||||
#else
|
||||
#define __PHYSICAL_MASK_SHIFT 46
|
||||
#define __VIRTUAL_MASK_SHIFT 47
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Kernel image size is limited to 1GiB due to the fixmap living in the
|
||||
|
|
|
@ -357,6 +357,16 @@ static inline void paravirt_release_pud(unsigned long pfn)
|
|||
PVOP_VCALL1(pv_mmu_ops.release_pud, pfn);
|
||||
}
|
||||
|
||||
static inline void paravirt_alloc_p4d(struct mm_struct *mm, unsigned long pfn)
|
||||
{
|
||||
PVOP_VCALL2(pv_mmu_ops.alloc_p4d, mm, pfn);
|
||||
}
|
||||
|
||||
static inline void paravirt_release_p4d(unsigned long pfn)
|
||||
{
|
||||
PVOP_VCALL1(pv_mmu_ops.release_p4d, pfn);
|
||||
}
|
||||
|
||||
static inline void pte_update(struct mm_struct *mm, unsigned long addr,
|
||||
pte_t *ptep)
|
||||
{
|
||||
|
@ -536,7 +546,7 @@ static inline void set_pud(pud_t *pudp, pud_t pud)
|
|||
PVOP_VCALL2(pv_mmu_ops.set_pud, pudp,
|
||||
val);
|
||||
}
|
||||
#if CONFIG_PGTABLE_LEVELS == 4
|
||||
#if CONFIG_PGTABLE_LEVELS >= 4
|
||||
static inline pud_t __pud(pudval_t val)
|
||||
{
|
||||
pudval_t ret;
|
||||
|
@ -565,16 +575,42 @@ static inline pudval_t pud_val(pud_t pud)
|
|||
return ret;
|
||||
}
|
||||
|
||||
static inline void pud_clear(pud_t *pudp)
|
||||
{
|
||||
set_pud(pudp, __pud(0));
|
||||
}
|
||||
|
||||
static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
|
||||
{
|
||||
p4dval_t val = native_p4d_val(p4d);
|
||||
|
||||
if (sizeof(p4dval_t) > sizeof(long))
|
||||
PVOP_VCALL3(pv_mmu_ops.set_p4d, p4dp,
|
||||
val, (u64)val >> 32);
|
||||
else
|
||||
PVOP_VCALL2(pv_mmu_ops.set_p4d, p4dp,
|
||||
val);
|
||||
}
|
||||
|
||||
#if CONFIG_PGTABLE_LEVELS >= 5
|
||||
|
||||
static inline p4d_t __p4d(p4dval_t val)
|
||||
{
|
||||
p4dval_t ret = PVOP_CALLEE1(p4dval_t, pv_mmu_ops.make_p4d, val);
|
||||
|
||||
return (p4d_t) { ret };
|
||||
}
|
||||
|
||||
static inline p4dval_t p4d_val(p4d_t p4d)
|
||||
{
|
||||
return PVOP_CALLEE1(p4dval_t, pv_mmu_ops.p4d_val, p4d.p4d);
|
||||
}
|
||||
|
||||
static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
|
||||
{
|
||||
pgdval_t val = native_pgd_val(pgd);
|
||||
|
||||
if (sizeof(pgdval_t) > sizeof(long))
|
||||
PVOP_VCALL3(pv_mmu_ops.set_pgd, pgdp,
|
||||
val, (u64)val >> 32);
|
||||
else
|
||||
PVOP_VCALL2(pv_mmu_ops.set_pgd, pgdp,
|
||||
val);
|
||||
PVOP_VCALL2(pv_mmu_ops.set_pgd, pgdp, val);
|
||||
}
|
||||
|
||||
static inline void pgd_clear(pgd_t *pgdp)
|
||||
|
@ -582,9 +618,11 @@ static inline void pgd_clear(pgd_t *pgdp)
|
|||
set_pgd(pgdp, __pgd(0));
|
||||
}
|
||||
|
||||
static inline void pud_clear(pud_t *pudp)
|
||||
#endif /* CONFIG_PGTABLE_LEVELS == 5 */
|
||||
|
||||
static inline void p4d_clear(p4d_t *p4dp)
|
||||
{
|
||||
set_pud(pudp, __pud(0));
|
||||
set_p4d(p4dp, __p4d(0));
|
||||
}
|
||||
|
||||
#endif /* CONFIG_PGTABLE_LEVELS == 4 */
|
||||
|
|
|
@ -238,9 +238,11 @@ struct pv_mmu_ops {
|
|||
void (*alloc_pte)(struct mm_struct *mm, unsigned long pfn);
|
||||
void (*alloc_pmd)(struct mm_struct *mm, unsigned long pfn);
|
||||
void (*alloc_pud)(struct mm_struct *mm, unsigned long pfn);
|
||||
void (*alloc_p4d)(struct mm_struct *mm, unsigned long pfn);
|
||||
void (*release_pte)(unsigned long pfn);
|
||||
void (*release_pmd)(unsigned long pfn);
|
||||
void (*release_pud)(unsigned long pfn);
|
||||
void (*release_p4d)(unsigned long pfn);
|
||||
|
||||
/* Pagetable manipulation functions */
|
||||
void (*set_pte)(pte_t *ptep, pte_t pteval);
|
||||
|
@ -279,12 +281,21 @@ struct pv_mmu_ops {
|
|||
struct paravirt_callee_save pmd_val;
|
||||
struct paravirt_callee_save make_pmd;
|
||||
|
||||
#if CONFIG_PGTABLE_LEVELS == 4
|
||||
#if CONFIG_PGTABLE_LEVELS >= 4
|
||||
struct paravirt_callee_save pud_val;
|
||||
struct paravirt_callee_save make_pud;
|
||||
|
||||
void (*set_pgd)(pgd_t *pudp, pgd_t pgdval);
|
||||
#endif /* CONFIG_PGTABLE_LEVELS == 4 */
|
||||
void (*set_p4d)(p4d_t *p4dp, p4d_t p4dval);
|
||||
|
||||
#if CONFIG_PGTABLE_LEVELS >= 5
|
||||
struct paravirt_callee_save p4d_val;
|
||||
struct paravirt_callee_save make_p4d;
|
||||
|
||||
void (*set_pgd)(pgd_t *pgdp, pgd_t pgdval);
|
||||
#endif /* CONFIG_PGTABLE_LEVELS >= 5 */
|
||||
|
||||
#endif /* CONFIG_PGTABLE_LEVELS >= 4 */
|
||||
|
||||
#endif /* CONFIG_PGTABLE_LEVELS >= 3 */
|
||||
|
||||
struct pv_lazy_ops lazy_mode;
|
||||
|
|
|
@ -17,9 +17,11 @@ static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned long pfn) {
|
|||
static inline void paravirt_alloc_pmd_clone(unsigned long pfn, unsigned long clonepfn,
|
||||
unsigned long start, unsigned long count) {}
|
||||
static inline void paravirt_alloc_pud(struct mm_struct *mm, unsigned long pfn) {}
|
||||
static inline void paravirt_alloc_p4d(struct mm_struct *mm, unsigned long pfn) {}
|
||||
static inline void paravirt_release_pte(unsigned long pfn) {}
|
||||
static inline void paravirt_release_pmd(unsigned long pfn) {}
|
||||
static inline void paravirt_release_pud(unsigned long pfn) {}
|
||||
static inline void paravirt_release_p4d(unsigned long pfn) {}
|
||||
#endif
|
||||
|
||||
/*
|
||||
|
@ -121,10 +123,10 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
|
|||
#endif /* CONFIG_X86_PAE */
|
||||
|
||||
#if CONFIG_PGTABLE_LEVELS > 3
|
||||
static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
|
||||
static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud)
|
||||
{
|
||||
paravirt_alloc_pud(mm, __pa(pud) >> PAGE_SHIFT);
|
||||
set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(pud)));
|
||||
set_p4d(p4d, __p4d(_PAGE_TABLE | __pa(pud)));
|
||||
}
|
||||
|
||||
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
|
||||
|
@ -150,6 +152,37 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
|
|||
___pud_free_tlb(tlb, pud);
|
||||
}
|
||||
|
||||
#if CONFIG_PGTABLE_LEVELS > 4
|
||||
static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d)
|
||||
{
|
||||
paravirt_alloc_p4d(mm, __pa(p4d) >> PAGE_SHIFT);
|
||||
set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(p4d)));
|
||||
}
|
||||
|
||||
static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr)
|
||||
{
|
||||
gfp_t gfp = GFP_KERNEL_ACCOUNT;
|
||||
|
||||
if (mm == &init_mm)
|
||||
gfp &= ~__GFP_ACCOUNT;
|
||||
return (p4d_t *)get_zeroed_page(gfp);
|
||||
}
|
||||
|
||||
static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
|
||||
{
|
||||
BUG_ON((unsigned long)p4d & (PAGE_SIZE-1));
|
||||
free_page((unsigned long)p4d);
|
||||
}
|
||||
|
||||
extern void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d);
|
||||
|
||||
static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
|
||||
unsigned long address)
|
||||
{
|
||||
___p4d_free_tlb(tlb, p4d);
|
||||
}
|
||||
|
||||
#endif /* CONFIG_PGTABLE_LEVELS > 4 */
|
||||
#endif /* CONFIG_PGTABLE_LEVELS > 3 */
|
||||
#endif /* CONFIG_PGTABLE_LEVELS > 2 */
|
||||
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
typedef unsigned long pteval_t;
|
||||
typedef unsigned long pmdval_t;
|
||||
typedef unsigned long pudval_t;
|
||||
typedef unsigned long p4dval_t;
|
||||
typedef unsigned long pgdval_t;
|
||||
typedef unsigned long pgprotval_t;
|
||||
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
typedef u64 pteval_t;
|
||||
typedef u64 pmdval_t;
|
||||
typedef u64 pudval_t;
|
||||
typedef u64 p4dval_t;
|
||||
typedef u64 pgdval_t;
|
||||
typedef u64 pgprotval_t;
|
||||
|
||||
|
|
|
@ -51,11 +51,19 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page);
|
|||
|
||||
#define set_pmd(pmdp, pmd) native_set_pmd(pmdp, pmd)
|
||||
|
||||
#ifndef __PAGETABLE_PUD_FOLDED
|
||||
#ifndef __PAGETABLE_P4D_FOLDED
|
||||
#define set_pgd(pgdp, pgd) native_set_pgd(pgdp, pgd)
|
||||
#define pgd_clear(pgd) native_pgd_clear(pgd)
|
||||
#endif
|
||||
|
||||
#ifndef set_p4d
|
||||
# define set_p4d(p4dp, p4d) native_set_p4d(p4dp, p4d)
|
||||
#endif
|
||||
|
||||
#ifndef __PAGETABLE_PUD_FOLDED
|
||||
#define p4d_clear(p4d) native_p4d_clear(p4d)
|
||||
#endif
|
||||
|
||||
#ifndef set_pud
|
||||
# define set_pud(pudp, pud) native_set_pud(pudp, pud)
|
||||
#endif
|
||||
|
@ -72,6 +80,11 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page);
|
|||
#define pgd_val(x) native_pgd_val(x)
|
||||
#define __pgd(x) native_make_pgd(x)
|
||||
|
||||
#ifndef __PAGETABLE_P4D_FOLDED
|
||||
#define p4d_val(x) native_p4d_val(x)
|
||||
#define __p4d(x) native_make_p4d(x)
|
||||
#endif
|
||||
|
||||
#ifndef __PAGETABLE_PUD_FOLDED
|
||||
#define pud_val(x) native_pud_val(x)
|
||||
#define __pud(x) native_make_pud(x)
|
||||
|
@ -177,6 +190,17 @@ static inline unsigned long pud_pfn(pud_t pud)
|
|||
return (pud_val(pud) & pud_pfn_mask(pud)) >> PAGE_SHIFT;
|
||||
}
|
||||
|
||||
static inline unsigned long p4d_pfn(p4d_t p4d)
|
||||
{
|
||||
return (p4d_val(p4d) & p4d_pfn_mask(p4d)) >> PAGE_SHIFT;
|
||||
}
|
||||
|
||||
static inline int p4d_large(p4d_t p4d)
|
||||
{
|
||||
/* No 512 GiB pages yet */
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define pte_page(pte) pfn_to_page(pte_pfn(pte))
|
||||
|
||||
static inline int pmd_large(pmd_t pte)
|
||||
|
@ -536,6 +560,7 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
|
|||
#define pte_pgprot(x) __pgprot(pte_flags(x))
|
||||
#define pmd_pgprot(x) __pgprot(pmd_flags(x))
|
||||
#define pud_pgprot(x) __pgprot(pud_flags(x))
|
||||
#define p4d_pgprot(x) __pgprot(p4d_flags(x))
|
||||
|
||||
#define canon_pgprot(p) __pgprot(massage_pgprot(p))
|
||||
|
||||
|
@ -585,6 +610,7 @@ pte_t *populate_extra_pte(unsigned long vaddr);
|
|||
#include <linux/mm_types.h>
|
||||
#include <linux/mmdebug.h>
|
||||
#include <linux/log2.h>
|
||||
#include <asm/fixmap.h>
|
||||
|
||||
static inline int pte_none(pte_t pte)
|
||||
{
|
||||
|
@ -768,7 +794,52 @@ static inline int pud_large(pud_t pud)
|
|||
}
|
||||
#endif /* CONFIG_PGTABLE_LEVELS > 2 */
|
||||
|
||||
static inline unsigned long pud_index(unsigned long address)
|
||||
{
|
||||
return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1);
|
||||
}
|
||||
|
||||
#if CONFIG_PGTABLE_LEVELS > 3
|
||||
static inline int p4d_none(p4d_t p4d)
|
||||
{
|
||||
return (native_p4d_val(p4d) & ~(_PAGE_KNL_ERRATUM_MASK)) == 0;
|
||||
}
|
||||
|
||||
static inline int p4d_present(p4d_t p4d)
|
||||
{
|
||||
return p4d_flags(p4d) & _PAGE_PRESENT;
|
||||
}
|
||||
|
||||
static inline unsigned long p4d_page_vaddr(p4d_t p4d)
|
||||
{
|
||||
return (unsigned long)__va(p4d_val(p4d) & p4d_pfn_mask(p4d));
|
||||
}
|
||||
|
||||
/*
|
||||
* Currently stuck as a macro due to indirect forward reference to
|
||||
* linux/mmzone.h's __section_mem_map_addr() definition:
|
||||
*/
|
||||
#define p4d_page(p4d) \
|
||||
pfn_to_page((p4d_val(p4d) & p4d_pfn_mask(p4d)) >> PAGE_SHIFT)
|
||||
|
||||
/* Find an entry in the third-level page table.. */
|
||||
static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
|
||||
{
|
||||
return (pud_t *)p4d_page_vaddr(*p4d) + pud_index(address);
|
||||
}
|
||||
|
||||
static inline int p4d_bad(p4d_t p4d)
|
||||
{
|
||||
return (p4d_flags(p4d) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0;
|
||||
}
|
||||
#endif /* CONFIG_PGTABLE_LEVELS > 3 */
|
||||
|
||||
static inline unsigned long p4d_index(unsigned long address)
|
||||
{
|
||||
return (address >> P4D_SHIFT) & (PTRS_PER_P4D - 1);
|
||||
}
|
||||
|
||||
#if CONFIG_PGTABLE_LEVELS > 4
|
||||
static inline int pgd_present(pgd_t pgd)
|
||||
{
|
||||
return pgd_flags(pgd) & _PAGE_PRESENT;
|
||||
|
@ -786,14 +857,9 @@ static inline unsigned long pgd_page_vaddr(pgd_t pgd)
|
|||
#define pgd_page(pgd) pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT)
|
||||
|
||||
/* to find an entry in a page-table-directory. */
|
||||
static inline unsigned long pud_index(unsigned long address)
|
||||
static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
|
||||
{
|
||||
return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1);
|
||||
}
|
||||
|
||||
static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address)
|
||||
{
|
||||
return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(address);
|
||||
return (p4d_t *)pgd_page_vaddr(*pgd) + p4d_index(address);
|
||||
}
|
||||
|
||||
static inline int pgd_bad(pgd_t pgd)
|
||||
|
@ -811,7 +877,7 @@ static inline int pgd_none(pgd_t pgd)
|
|||
*/
|
||||
return !native_pgd_val(pgd);
|
||||
}
|
||||
#endif /* CONFIG_PGTABLE_LEVELS > 3 */
|
||||
#endif /* CONFIG_PGTABLE_LEVELS > 4 */
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
||||
|
|
|
@ -14,7 +14,6 @@
|
|||
*/
|
||||
#ifndef __ASSEMBLY__
|
||||
#include <asm/processor.h>
|
||||
#include <asm/fixmap.h>
|
||||
#include <linux/threads.h>
|
||||
#include <asm/paravirt.h>
|
||||
|
||||
|
|
|
@ -35,15 +35,22 @@ extern void paging_init(void);
|
|||
#define pud_ERROR(e) \
|
||||
pr_err("%s:%d: bad pud %p(%016lx)\n", \
|
||||
__FILE__, __LINE__, &(e), pud_val(e))
|
||||
|
||||
#if CONFIG_PGTABLE_LEVELS >= 5
|
||||
#define p4d_ERROR(e) \
|
||||
pr_err("%s:%d: bad p4d %p(%016lx)\n", \
|
||||
__FILE__, __LINE__, &(e), p4d_val(e))
|
||||
#endif
|
||||
|
||||
#define pgd_ERROR(e) \
|
||||
pr_err("%s:%d: bad pgd %p(%016lx)\n", \
|
||||
__FILE__, __LINE__, &(e), pgd_val(e))
|
||||
|
||||
struct mm_struct;
|
||||
|
||||
void set_pte_vaddr_p4d(p4d_t *p4d_page, unsigned long vaddr, pte_t new_pte);
|
||||
void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte);
|
||||
|
||||
|
||||
static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr,
|
||||
pte_t *ptep)
|
||||
{
|
||||
|
@ -121,6 +128,20 @@ static inline pud_t native_pudp_get_and_clear(pud_t *xp)
|
|||
#endif
|
||||
}
|
||||
|
||||
static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
|
||||
{
|
||||
*p4dp = p4d;
|
||||
}
|
||||
|
||||
static inline void native_p4d_clear(p4d_t *p4d)
|
||||
{
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
native_set_p4d(p4d, native_make_p4d(0));
|
||||
#else
|
||||
native_set_p4d(p4d, (p4d_t) { .pgd = native_make_pgd(0)});
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
|
||||
{
|
||||
*pgdp = pgd;
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
typedef unsigned long pteval_t;
|
||||
typedef unsigned long pmdval_t;
|
||||
typedef unsigned long pudval_t;
|
||||
typedef unsigned long p4dval_t;
|
||||
typedef unsigned long pgdval_t;
|
||||
typedef unsigned long pgprotval_t;
|
||||
|
||||
|
@ -22,12 +23,32 @@ typedef struct { pteval_t pte; } pte_t;
|
|||
|
||||
#define SHARED_KERNEL_PMD 0
|
||||
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
|
||||
/*
|
||||
* PGDIR_SHIFT determines what a top-level page table entry can map
|
||||
*/
|
||||
#define PGDIR_SHIFT 48
|
||||
#define PTRS_PER_PGD 512
|
||||
|
||||
/*
|
||||
* 4th level page in 5-level paging case
|
||||
*/
|
||||
#define P4D_SHIFT 39
|
||||
#define PTRS_PER_P4D 512
|
||||
#define P4D_SIZE (_AC(1, UL) << P4D_SHIFT)
|
||||
#define P4D_MASK (~(P4D_SIZE - 1))
|
||||
|
||||
#else /* CONFIG_X86_5LEVEL */
|
||||
|
||||
/*
|
||||
* PGDIR_SHIFT determines what a top-level page table entry can map
|
||||
*/
|
||||
#define PGDIR_SHIFT 39
|
||||
#define PTRS_PER_PGD 512
|
||||
|
||||
#endif /* CONFIG_X86_5LEVEL */
|
||||
|
||||
/*
|
||||
* 3rd level page
|
||||
*/
|
||||
|
@ -55,9 +76,15 @@ typedef struct { pteval_t pte; } pte_t;
|
|||
|
||||
/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
|
||||
#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
#define VMALLOC_SIZE_TB _AC(16384, UL)
|
||||
#define __VMALLOC_BASE _AC(0xff92000000000000, UL)
|
||||
#define __VMEMMAP_BASE _AC(0xffd4000000000000, UL)
|
||||
#else
|
||||
#define VMALLOC_SIZE_TB _AC(32, UL)
|
||||
#define __VMALLOC_BASE _AC(0xffffc90000000000, UL)
|
||||
#define __VMEMMAP_BASE _AC(0xffffea0000000000, UL)
|
||||
#endif
|
||||
#ifdef CONFIG_RANDOMIZE_MEMORY
|
||||
#define VMALLOC_START vmalloc_base
|
||||
#define VMEMMAP_START vmemmap_base
|
||||
|
@ -67,10 +94,11 @@ typedef struct { pteval_t pte; } pte_t;
|
|||
#endif /* CONFIG_RANDOMIZE_MEMORY */
|
||||
#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
|
||||
#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
|
||||
#define MODULES_END _AC(0xffffffffff000000, UL)
|
||||
/* The module sections ends with the start of the fixmap */
|
||||
#define MODULES_END __fix_to_virt(__end_of_fixed_addresses + 1)
|
||||
#define MODULES_LEN (MODULES_END - MODULES_VADDR)
|
||||
#define ESPFIX_PGD_ENTRY _AC(-2, UL)
|
||||
#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << PGDIR_SHIFT)
|
||||
#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT)
|
||||
#define EFI_VA_START ( -4 * (_AC(1, UL) << 30))
|
||||
#define EFI_VA_END (-68 * (_AC(1, UL) << 30))
|
||||
|
||||
|
|
|
@ -272,9 +272,28 @@ static inline pgdval_t pgd_flags(pgd_t pgd)
|
|||
return native_pgd_val(pgd) & PTE_FLAGS_MASK;
|
||||
}
|
||||
|
||||
#if CONFIG_PGTABLE_LEVELS > 3
|
||||
#include <asm-generic/5level-fixup.h>
|
||||
#if CONFIG_PGTABLE_LEVELS > 4
|
||||
typedef struct { p4dval_t p4d; } p4d_t;
|
||||
|
||||
static inline p4d_t native_make_p4d(pudval_t val)
|
||||
{
|
||||
return (p4d_t) { val };
|
||||
}
|
||||
|
||||
static inline p4dval_t native_p4d_val(p4d_t p4d)
|
||||
{
|
||||
return p4d.p4d;
|
||||
}
|
||||
#else
|
||||
#include <asm-generic/pgtable-nop4d.h>
|
||||
|
||||
static inline p4dval_t native_p4d_val(p4d_t p4d)
|
||||
{
|
||||
return native_pgd_val(p4d.pgd);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if CONFIG_PGTABLE_LEVELS > 3
|
||||
typedef struct { pudval_t pud; } pud_t;
|
||||
|
||||
static inline pud_t native_make_pud(pmdval_t val)
|
||||
|
@ -287,12 +306,11 @@ static inline pudval_t native_pud_val(pud_t pud)
|
|||
return pud.pud;
|
||||
}
|
||||
#else
|
||||
#define __ARCH_USE_5LEVEL_HACK
|
||||
#include <asm-generic/pgtable-nopud.h>
|
||||
|
||||
static inline pudval_t native_pud_val(pud_t pud)
|
||||
{
|
||||
return native_pgd_val(pud.pgd);
|
||||
return native_pgd_val(pud.p4d.pgd);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -309,15 +327,30 @@ static inline pmdval_t native_pmd_val(pmd_t pmd)
|
|||
return pmd.pmd;
|
||||
}
|
||||
#else
|
||||
#define __ARCH_USE_5LEVEL_HACK
|
||||
#include <asm-generic/pgtable-nopmd.h>
|
||||
|
||||
static inline pmdval_t native_pmd_val(pmd_t pmd)
|
||||
{
|
||||
return native_pgd_val(pmd.pud.pgd);
|
||||
return native_pgd_val(pmd.pud.p4d.pgd);
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline p4dval_t p4d_pfn_mask(p4d_t p4d)
|
||||
{
|
||||
/* No 512 GiB huge pages yet */
|
||||
return PTE_PFN_MASK;
|
||||
}
|
||||
|
||||
static inline p4dval_t p4d_flags_mask(p4d_t p4d)
|
||||
{
|
||||
return ~p4d_pfn_mask(p4d);
|
||||
}
|
||||
|
||||
static inline p4dval_t p4d_flags(p4d_t p4d)
|
||||
{
|
||||
return native_p4d_val(p4d) & p4d_flags_mask(p4d);
|
||||
}
|
||||
|
||||
static inline pudval_t pud_pfn_mask(pud_t pud)
|
||||
{
|
||||
if (native_pud_val(pud) & _PAGE_PSE)
|
||||
|
@ -461,6 +494,7 @@ enum pg_level {
|
|||
PG_LEVEL_4K,
|
||||
PG_LEVEL_2M,
|
||||
PG_LEVEL_1G,
|
||||
PG_LEVEL_512G,
|
||||
PG_LEVEL_NUM
|
||||
};
|
||||
|
||||
|
|
|
@ -709,6 +709,8 @@ extern struct desc_ptr early_gdt_descr;
|
|||
|
||||
extern void cpu_set_gdt(int);
|
||||
extern void switch_to_new_gdt(int);
|
||||
extern void load_direct_gdt(int);
|
||||
extern void load_fixmap_gdt(int);
|
||||
extern void load_percpu_segment(int);
|
||||
extern void cpu_init(void);
|
||||
|
||||
|
@ -790,6 +792,7 @@ static inline void spin_lock_prefetch(const void *x)
|
|||
/*
|
||||
* User space process size: 3GB (default).
|
||||
*/
|
||||
#define IA32_PAGE_OFFSET PAGE_OFFSET
|
||||
#define TASK_SIZE PAGE_OFFSET
|
||||
#define TASK_SIZE_MAX TASK_SIZE
|
||||
#define STACK_TOP TASK_SIZE
|
||||
|
@ -866,7 +869,8 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
|
|||
* This decides where the kernel will search for a free chunk of vm
|
||||
* space during mmap's.
|
||||
*/
|
||||
#define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 3))
|
||||
#define __TASK_UNMAPPED_BASE(task_size) (PAGE_ALIGN(task_size / 3))
|
||||
#define TASK_UNMAPPED_BASE __TASK_UNMAPPED_BASE(TASK_SIZE)
|
||||
|
||||
#define KSTK_EIP(task) (task_pt_regs(task)->ip)
|
||||
|
||||
|
|
|
@ -53,6 +53,12 @@
|
|||
# define NEED_MOVBE 0
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
# define NEED_LA57 (1<<(X86_FEATURE_LA57 & 31))
|
||||
#else
|
||||
# define NEED_LA57 0
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
/* Paravirtualized systems may not have PSE or PGE available */
|
||||
|
@ -98,7 +104,7 @@
|
|||
#define REQUIRED_MASK13 0
|
||||
#define REQUIRED_MASK14 0
|
||||
#define REQUIRED_MASK15 0
|
||||
#define REQUIRED_MASK16 0
|
||||
#define REQUIRED_MASK16 (NEED_LA57)
|
||||
#define REQUIRED_MASK17 0
|
||||
#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
|
||||
|
||||
|
|
|
@ -26,8 +26,13 @@
|
|||
# endif
|
||||
#else /* CONFIG_X86_32 */
|
||||
# define SECTION_SIZE_BITS 27 /* matt - 128 is convenient right now */
|
||||
# define MAX_PHYSADDR_BITS 44
|
||||
# define MAX_PHYSMEM_BITS 46
|
||||
# ifdef CONFIG_X86_5LEVEL
|
||||
# define MAX_PHYSADDR_BITS 52
|
||||
# define MAX_PHYSMEM_BITS 52
|
||||
# else
|
||||
# define MAX_PHYSADDR_BITS 44
|
||||
# define MAX_PHYSMEM_BITS 46
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#endif /* CONFIG_SPARSEMEM */
|
||||
|
|
|
@ -87,7 +87,7 @@ static inline void setup_stack_canary_segment(int cpu)
|
|||
{
|
||||
#ifdef CONFIG_X86_32
|
||||
unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu);
|
||||
struct desc_struct *gdt_table = get_cpu_gdt_table(cpu);
|
||||
struct desc_struct *gdt_table = get_cpu_gdt_rw(cpu);
|
||||
struct desc_struct desc;
|
||||
|
||||
desc = gdt_table[GDT_ENTRY_STACK_CANARY];
|
||||
|
|
|
@ -215,7 +215,6 @@ static inline void __flush_tlb_one(unsigned long addr)
|
|||
/*
|
||||
* TLB flushing:
|
||||
*
|
||||
* - flush_tlb() flushes the current mm struct TLBs
|
||||
* - flush_tlb_all() flushes all processes TLBs
|
||||
* - flush_tlb_mm(mm) flushes the specified mm context TLB's
|
||||
* - flush_tlb_page(vma, vmaddr) flushes one page
|
||||
|
@ -247,11 +246,6 @@ static inline void flush_tlb_all(void)
|
|||
__flush_tlb_all();
|
||||
}
|
||||
|
||||
static inline void flush_tlb(void)
|
||||
{
|
||||
__flush_tlb_up();
|
||||
}
|
||||
|
||||
static inline void local_flush_tlb(void)
|
||||
{
|
||||
__flush_tlb_up();
|
||||
|
@ -313,14 +307,11 @@ static inline void flush_tlb_kernel_range(unsigned long start,
|
|||
flush_tlb_mm_range(vma->vm_mm, start, end, vma->vm_flags)
|
||||
|
||||
extern void flush_tlb_all(void);
|
||||
extern void flush_tlb_current_task(void);
|
||||
extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
|
||||
extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
|
||||
unsigned long end, unsigned long vmflag);
|
||||
extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
|
||||
|
||||
#define flush_tlb() flush_tlb_current_task()
|
||||
|
||||
void native_flush_tlb_others(const struct cpumask *cpumask,
|
||||
struct mm_struct *mm,
|
||||
unsigned long start, unsigned long end);
|
||||
|
|
|
@ -280,13 +280,17 @@ static inline pte_t __pte_ma(pteval_t x)
|
|||
|
||||
#define pmd_val_ma(v) ((v).pmd)
|
||||
#ifdef __PAGETABLE_PUD_FOLDED
|
||||
#define pud_val_ma(v) ((v).pgd.pgd)
|
||||
#define pud_val_ma(v) ((v).p4d.pgd.pgd)
|
||||
#else
|
||||
#define pud_val_ma(v) ((v).pud)
|
||||
#endif
|
||||
#define __pmd_ma(x) ((pmd_t) { (x) } )
|
||||
|
||||
#define pgd_val_ma(x) ((x).pgd)
|
||||
#ifdef __PAGETABLE_P4D_FOLDED
|
||||
#define p4d_val_ma(x) ((x).pgd.pgd)
|
||||
#else
|
||||
#define p4d_val_ma(x) ((x).p4d)
|
||||
#endif
|
||||
|
||||
void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid);
|
||||
|
||||
|
|
|
@ -101,7 +101,7 @@ int x86_acpi_suspend_lowlevel(void)
|
|||
#ifdef CONFIG_SMP
|
||||
initial_stack = (unsigned long)temp_stack + sizeof(temp_stack);
|
||||
early_gdt_descr.address =
|
||||
(unsigned long)get_cpu_gdt_table(smp_processor_id());
|
||||
(unsigned long)get_cpu_gdt_rw(smp_processor_id());
|
||||
initial_gs = per_cpu_offset(smp_processor_id());
|
||||
#endif
|
||||
initial_code = (unsigned long)wakeup_long64;
|
||||
|
|
|
@ -609,7 +609,7 @@ static long __apm_bios_call(void *_call)
|
|||
|
||||
cpu = get_cpu();
|
||||
BUG_ON(cpu != 0);
|
||||
gdt = get_cpu_gdt_table(cpu);
|
||||
gdt = get_cpu_gdt_rw(cpu);
|
||||
save_desc_40 = gdt[0x40 / 8];
|
||||
gdt[0x40 / 8] = bad_bios_desc;
|
||||
|
||||
|
@ -685,7 +685,7 @@ static long __apm_bios_call_simple(void *_call)
|
|||
|
||||
cpu = get_cpu();
|
||||
BUG_ON(cpu != 0);
|
||||
gdt = get_cpu_gdt_table(cpu);
|
||||
gdt = get_cpu_gdt_rw(cpu);
|
||||
save_desc_40 = gdt[0x40 / 8];
|
||||
gdt[0x40 / 8] = bad_bios_desc;
|
||||
|
||||
|
@ -2352,7 +2352,7 @@ static int __init apm_init(void)
|
|||
* Note we only set APM segments on CPU zero, since we pin the APM
|
||||
* code to that CPU.
|
||||
*/
|
||||
gdt = get_cpu_gdt_table(0);
|
||||
gdt = get_cpu_gdt_rw(0);
|
||||
set_desc_base(&gdt[APM_CS >> 3],
|
||||
(unsigned long)__va((unsigned long)apm_info.bios.cseg << 4));
|
||||
set_desc_base(&gdt[APM_CS_16 >> 3],
|
||||
|
|
|
@ -448,19 +448,60 @@ void load_percpu_segment(int cpu)
|
|||
load_stack_canary_segment();
|
||||
}
|
||||
|
||||
/* Setup the fixmap mapping only once per-processor */
|
||||
static inline void setup_fixmap_gdt(int cpu)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
/* On 64-bit systems, we use a read-only fixmap GDT. */
|
||||
pgprot_t prot = PAGE_KERNEL_RO;
|
||||
#else
|
||||
/*
|
||||
* On native 32-bit systems, the GDT cannot be read-only because
|
||||
* our double fault handler uses a task gate, and entering through
|
||||
* a task gate needs to change an available TSS to busy. If the GDT
|
||||
* is read-only, that will triple fault.
|
||||
*
|
||||
* On Xen PV, the GDT must be read-only because the hypervisor requires
|
||||
* it.
|
||||
*/
|
||||
pgprot_t prot = boot_cpu_has(X86_FEATURE_XENPV) ?
|
||||
PAGE_KERNEL_RO : PAGE_KERNEL;
|
||||
#endif
|
||||
|
||||
__set_fixmap(get_cpu_gdt_ro_index(cpu), get_cpu_gdt_paddr(cpu), prot);
|
||||
}
|
||||
|
||||
/* Load the original GDT from the per-cpu structure */
|
||||
void load_direct_gdt(int cpu)
|
||||
{
|
||||
struct desc_ptr gdt_descr;
|
||||
|
||||
gdt_descr.address = (long)get_cpu_gdt_rw(cpu);
|
||||
gdt_descr.size = GDT_SIZE - 1;
|
||||
load_gdt(&gdt_descr);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(load_direct_gdt);
|
||||
|
||||
/* Load a fixmap remapping of the per-cpu GDT */
|
||||
void load_fixmap_gdt(int cpu)
|
||||
{
|
||||
struct desc_ptr gdt_descr;
|
||||
|
||||
gdt_descr.address = (long)get_cpu_gdt_ro(cpu);
|
||||
gdt_descr.size = GDT_SIZE - 1;
|
||||
load_gdt(&gdt_descr);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(load_fixmap_gdt);
|
||||
|
||||
/*
|
||||
* Current gdt points %fs at the "master" per-cpu area: after this,
|
||||
* it's on the real one.
|
||||
*/
|
||||
void switch_to_new_gdt(int cpu)
|
||||
{
|
||||
struct desc_ptr gdt_descr;
|
||||
|
||||
gdt_descr.address = (long)get_cpu_gdt_table(cpu);
|
||||
gdt_descr.size = GDT_SIZE - 1;
|
||||
load_gdt(&gdt_descr);
|
||||
/* Load the original GDT */
|
||||
load_direct_gdt(cpu);
|
||||
/* Reload the per-cpu base */
|
||||
|
||||
load_percpu_segment(cpu);
|
||||
}
|
||||
|
||||
|
@ -1526,6 +1567,9 @@ void cpu_init(void)
|
|||
|
||||
if (is_uv_system())
|
||||
uv_cpu_init();
|
||||
|
||||
setup_fixmap_gdt(cpu);
|
||||
load_fixmap_gdt(cpu);
|
||||
}
|
||||
|
||||
#else
|
||||
|
@ -1581,6 +1625,9 @@ void cpu_init(void)
|
|||
dbg_restore_debug_regs();
|
||||
|
||||
fpu__init_cpu();
|
||||
|
||||
setup_fixmap_gdt(cpu);
|
||||
load_fixmap_gdt(cpu);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
@ -270,7 +270,6 @@ int __init e820__update_table(struct e820_table *table)
|
|||
if (table->nr_entries < 2)
|
||||
return -1;
|
||||
|
||||
table->nr_entries = table->nr_entries;
|
||||
BUG_ON(table->nr_entries > max_nr_entries);
|
||||
|
||||
/* Bail out if we find any unreasonable addresses in the map: */
|
||||
|
|
|
@ -50,11 +50,11 @@
|
|||
#define ESPFIX_STACKS_PER_PAGE (PAGE_SIZE/ESPFIX_STACK_SIZE)
|
||||
|
||||
/* There is address space for how many espfix pages? */
|
||||
#define ESPFIX_PAGE_SPACE (1UL << (PGDIR_SHIFT-PAGE_SHIFT-16))
|
||||
#define ESPFIX_PAGE_SPACE (1UL << (P4D_SHIFT-PAGE_SHIFT-16))
|
||||
|
||||
#define ESPFIX_MAX_CPUS (ESPFIX_STACKS_PER_PAGE * ESPFIX_PAGE_SPACE)
|
||||
#if CONFIG_NR_CPUS > ESPFIX_MAX_CPUS
|
||||
# error "Need more than one PGD for the ESPFIX hack"
|
||||
# error "Need more virtual address space for the ESPFIX hack"
|
||||
#endif
|
||||
|
||||
#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO)
|
||||
|
@ -121,11 +121,13 @@ static void init_espfix_random(void)
|
|||
|
||||
void __init init_espfix_bsp(void)
|
||||
{
|
||||
pgd_t *pgd_p;
|
||||
pgd_t *pgd;
|
||||
p4d_t *p4d;
|
||||
|
||||
/* Install the espfix pud into the kernel page directory */
|
||||
pgd_p = &init_level4_pgt[pgd_index(ESPFIX_BASE_ADDR)];
|
||||
pgd_populate(&init_mm, pgd_p, (pud_t *)espfix_pud_page);
|
||||
pgd = &init_level4_pgt[pgd_index(ESPFIX_BASE_ADDR)];
|
||||
p4d = p4d_alloc(&init_mm, pgd, ESPFIX_BASE_ADDR);
|
||||
p4d_populate(&init_mm, p4d, espfix_pud_page);
|
||||
|
||||
/* Randomize the locations */
|
||||
init_espfix_random();
|
||||
|
|
|
@ -103,6 +103,7 @@ static void machine_kexec_page_table_set_one(
|
|||
pgd_t *pgd, pmd_t *pmd, pte_t *pte,
|
||||
unsigned long vaddr, unsigned long paddr)
|
||||
{
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
|
||||
pgd += pgd_index(vaddr);
|
||||
|
@ -110,7 +111,8 @@ static void machine_kexec_page_table_set_one(
|
|||
if (!(pgd_val(*pgd) & _PAGE_PRESENT))
|
||||
set_pgd(pgd, __pgd(__pa(pmd) | _PAGE_PRESENT));
|
||||
#endif
|
||||
pud = pud_offset(pgd, vaddr);
|
||||
p4d = p4d_offset(pgd, vaddr);
|
||||
pud = pud_offset(p4d, vaddr);
|
||||
pmd = pmd_offset(pud, vaddr);
|
||||
if (!(pmd_val(*pmd) & _PAGE_PRESENT))
|
||||
set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE));
|
||||
|
|
|
@ -36,6 +36,7 @@ static struct kexec_file_ops *kexec_file_loaders[] = {
|
|||
|
||||
static void free_transition_pgtable(struct kimage *image)
|
||||
{
|
||||
free_page((unsigned long)image->arch.p4d);
|
||||
free_page((unsigned long)image->arch.pud);
|
||||
free_page((unsigned long)image->arch.pmd);
|
||||
free_page((unsigned long)image->arch.pte);
|
||||
|
@ -43,6 +44,7 @@ static void free_transition_pgtable(struct kimage *image)
|
|||
|
||||
static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
|
||||
{
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
pte_t *pte;
|
||||
|
@ -53,13 +55,21 @@ static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
|
|||
paddr = __pa(page_address(image->control_code_page)+PAGE_SIZE);
|
||||
pgd += pgd_index(vaddr);
|
||||
if (!pgd_present(*pgd)) {
|
||||
p4d = (p4d_t *)get_zeroed_page(GFP_KERNEL);
|
||||
if (!p4d)
|
||||
goto err;
|
||||
image->arch.p4d = p4d;
|
||||
set_pgd(pgd, __pgd(__pa(p4d) | _KERNPG_TABLE));
|
||||
}
|
||||
p4d = p4d_offset(pgd, vaddr);
|
||||
if (!p4d_present(*p4d)) {
|
||||
pud = (pud_t *)get_zeroed_page(GFP_KERNEL);
|
||||
if (!pud)
|
||||
goto err;
|
||||
image->arch.pud = pud;
|
||||
set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
|
||||
set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE));
|
||||
}
|
||||
pud = pud_offset(pgd, vaddr);
|
||||
pud = pud_offset(p4d, vaddr);
|
||||
if (!pud_present(*pud)) {
|
||||
pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL);
|
||||
if (!pmd)
|
||||
|
|
|
@ -405,9 +405,11 @@ struct pv_mmu_ops pv_mmu_ops __ro_after_init = {
|
|||
.alloc_pte = paravirt_nop,
|
||||
.alloc_pmd = paravirt_nop,
|
||||
.alloc_pud = paravirt_nop,
|
||||
.alloc_p4d = paravirt_nop,
|
||||
.release_pte = paravirt_nop,
|
||||
.release_pmd = paravirt_nop,
|
||||
.release_pud = paravirt_nop,
|
||||
.release_p4d = paravirt_nop,
|
||||
|
||||
.set_pte = native_set_pte,
|
||||
.set_pte_at = native_set_pte_at,
|
||||
|
@ -430,12 +432,19 @@ struct pv_mmu_ops pv_mmu_ops __ro_after_init = {
|
|||
.pmd_val = PTE_IDENT,
|
||||
.make_pmd = PTE_IDENT,
|
||||
|
||||
#if CONFIG_PGTABLE_LEVELS == 4
|
||||
#if CONFIG_PGTABLE_LEVELS >= 4
|
||||
.pud_val = PTE_IDENT,
|
||||
.make_pud = PTE_IDENT,
|
||||
|
||||
.set_p4d = native_set_p4d,
|
||||
|
||||
#if CONFIG_PGTABLE_LEVELS >= 5
|
||||
.p4d_val = PTE_IDENT,
|
||||
.make_p4d = PTE_IDENT,
|
||||
|
||||
.set_pgd = native_set_pgd,
|
||||
#endif
|
||||
#endif /* CONFIG_PGTABLE_LEVELS >= 5 */
|
||||
#endif /* CONFIG_PGTABLE_LEVELS >= 4 */
|
||||
#endif /* CONFIG_PGTABLE_LEVELS >= 3 */
|
||||
|
||||
.pte_val = PTE_IDENT,
|
||||
|
|
|
@ -53,6 +53,11 @@
|
|||
#include <asm/xen/hypervisor.h>
|
||||
#include <asm/vdso.h>
|
||||
#include <asm/intel_rdt.h>
|
||||
#include <asm/unistd.h>
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
/* Not included via unistd.h */
|
||||
#include <asm/unistd_32_ia32.h>
|
||||
#endif
|
||||
|
||||
__visible DEFINE_PER_CPU(unsigned long, rsp_scratch);
|
||||
|
||||
|
@ -494,6 +499,8 @@ void set_personality_64bit(void)
|
|||
clear_thread_flag(TIF_IA32);
|
||||
clear_thread_flag(TIF_ADDR32);
|
||||
clear_thread_flag(TIF_X32);
|
||||
/* Pretend that this comes from a 64bit execve */
|
||||
task_pt_regs(current)->orig_ax = __NR_execve;
|
||||
|
||||
/* Ensure the corresponding mm is not marked. */
|
||||
if (current->mm)
|
||||
|
@ -506,32 +513,50 @@ void set_personality_64bit(void)
|
|||
current->personality &= ~READ_IMPLIES_EXEC;
|
||||
}
|
||||
|
||||
static void __set_personality_x32(void)
|
||||
{
|
||||
#ifdef CONFIG_X86_X32
|
||||
clear_thread_flag(TIF_IA32);
|
||||
set_thread_flag(TIF_X32);
|
||||
if (current->mm)
|
||||
current->mm->context.ia32_compat = TIF_X32;
|
||||
current->personality &= ~READ_IMPLIES_EXEC;
|
||||
/*
|
||||
* in_compat_syscall() uses the presence of the x32 syscall bit
|
||||
* flag to determine compat status. The x86 mmap() code relies on
|
||||
* the syscall bitness so set x32 syscall bit right here to make
|
||||
* in_compat_syscall() work during exec().
|
||||
*
|
||||
* Pretend to come from a x32 execve.
|
||||
*/
|
||||
task_pt_regs(current)->orig_ax = __NR_x32_execve | __X32_SYSCALL_BIT;
|
||||
current->thread.status &= ~TS_COMPAT;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void __set_personality_ia32(void)
|
||||
{
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
set_thread_flag(TIF_IA32);
|
||||
clear_thread_flag(TIF_X32);
|
||||
if (current->mm)
|
||||
current->mm->context.ia32_compat = TIF_IA32;
|
||||
current->personality |= force_personality32;
|
||||
/* Prepare the first "return" to user space */
|
||||
task_pt_regs(current)->orig_ax = __NR_ia32_execve;
|
||||
current->thread.status |= TS_COMPAT;
|
||||
#endif
|
||||
}
|
||||
|
||||
void set_personality_ia32(bool x32)
|
||||
{
|
||||
/* inherit personality from parent */
|
||||
|
||||
/* Make sure to be in 32bit mode */
|
||||
set_thread_flag(TIF_ADDR32);
|
||||
|
||||
/* Mark the associated mm as containing 32-bit tasks. */
|
||||
if (x32) {
|
||||
clear_thread_flag(TIF_IA32);
|
||||
set_thread_flag(TIF_X32);
|
||||
if (current->mm)
|
||||
current->mm->context.ia32_compat = TIF_X32;
|
||||
current->personality &= ~READ_IMPLIES_EXEC;
|
||||
/* in_compat_syscall() uses the presence of the x32
|
||||
syscall bit flag to determine compat status */
|
||||
current->thread.status &= ~TS_COMPAT;
|
||||
} else {
|
||||
set_thread_flag(TIF_IA32);
|
||||
clear_thread_flag(TIF_X32);
|
||||
if (current->mm)
|
||||
current->mm->context.ia32_compat = TIF_IA32;
|
||||
current->personality |= force_personality32;
|
||||
/* Prepare the first "return" to user space */
|
||||
current->thread.status |= TS_COMPAT;
|
||||
}
|
||||
if (x32)
|
||||
__set_personality_x32();
|
||||
else
|
||||
__set_personality_ia32();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(set_personality_ia32);
|
||||
|
||||
|
|
|
@ -1225,21 +1225,6 @@ void __init setup_arch(char **cmdline_p)
|
|||
|
||||
kasan_init();
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/* sync back kernel address range */
|
||||
clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
|
||||
swapper_pg_dir + KERNEL_PGD_BOUNDARY,
|
||||
KERNEL_PGD_PTRS);
|
||||
|
||||
/*
|
||||
* sync back low identity map too. It is used for example
|
||||
* in the 32-bit EFI stub.
|
||||
*/
|
||||
clone_pgd_range(initial_page_table,
|
||||
swapper_pg_dir + KERNEL_PGD_BOUNDARY,
|
||||
min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
|
||||
#endif
|
||||
|
||||
tboot_probe();
|
||||
|
||||
map_vsyscall();
|
||||
|
|
|
@ -160,7 +160,7 @@ static inline void setup_percpu_segment(int cpu)
|
|||
pack_descriptor(&gdt, per_cpu_offset(cpu), 0xFFFFF,
|
||||
0x2 | DESCTYPE_S, 0x8);
|
||||
gdt.s = 1;
|
||||
write_gdt_entry(get_cpu_gdt_table(cpu),
|
||||
write_gdt_entry(get_cpu_gdt_rw(cpu),
|
||||
GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S);
|
||||
#endif
|
||||
}
|
||||
|
@ -288,4 +288,25 @@ void __init setup_per_cpu_areas(void)
|
|||
|
||||
/* Setup cpu initialized, callin, callout masks */
|
||||
setup_cpu_local_masks();
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/*
|
||||
* Sync back kernel address range. We want to make sure that
|
||||
* all kernel mappings, including percpu mappings, are available
|
||||
* in the smpboot asm. We can't reliably pick up percpu
|
||||
* mappings using vmalloc_fault(), because exception dispatch
|
||||
* needs percpu data.
|
||||
*/
|
||||
clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
|
||||
swapper_pg_dir + KERNEL_PGD_BOUNDARY,
|
||||
KERNEL_PGD_PTRS);
|
||||
|
||||
/*
|
||||
* sync back low identity map too. It is used for example
|
||||
* in the 32-bit EFI stub.
|
||||
*/
|
||||
clone_pgd_range(initial_page_table,
|
||||
swapper_pg_dir + KERNEL_PGD_BOUNDARY,
|
||||
min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -983,7 +983,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
|
|||
unsigned long timeout;
|
||||
|
||||
idle->thread.sp = (unsigned long)task_pt_regs(idle);
|
||||
early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
|
||||
early_gdt_descr.address = (unsigned long)get_cpu_gdt_rw(cpu);
|
||||
initial_code = (unsigned long)start_secondary;
|
||||
initial_stack = idle->thread.sp;
|
||||
|
||||
|
|
|
@ -17,6 +17,8 @@
|
|||
#include <linux/uaccess.h>
|
||||
#include <linux/elf.h>
|
||||
|
||||
#include <asm/elf.h>
|
||||
#include <asm/compat.h>
|
||||
#include <asm/ia32.h>
|
||||
#include <asm/syscalls.h>
|
||||
|
||||
|
@ -101,7 +103,7 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
|
|||
static void find_start_end(unsigned long flags, unsigned long *begin,
|
||||
unsigned long *end)
|
||||
{
|
||||
if (!test_thread_flag(TIF_ADDR32) && (flags & MAP_32BIT)) {
|
||||
if (!in_compat_syscall() && (flags & MAP_32BIT)) {
|
||||
/* This is usually used needed to map code in small
|
||||
model, so it needs to be in the first 31bit. Limit
|
||||
it to that. This means we need to move the
|
||||
|
@ -114,10 +116,11 @@ static void find_start_end(unsigned long flags, unsigned long *begin,
|
|||
if (current->flags & PF_RANDOMIZE) {
|
||||
*begin = randomize_page(*begin, 0x02000000);
|
||||
}
|
||||
} else {
|
||||
*begin = current->mm->mmap_legacy_base;
|
||||
*end = TASK_SIZE;
|
||||
return;
|
||||
}
|
||||
|
||||
*begin = get_mmap_base(1);
|
||||
*end = in_compat_syscall() ? tasksize_32bit() : tasksize_64bit();
|
||||
}
|
||||
|
||||
unsigned long
|
||||
|
@ -176,7 +179,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
|
|||
return addr;
|
||||
|
||||
/* for MAP_32BIT mappings we force the legacy mmap base */
|
||||
if (!test_thread_flag(TIF_ADDR32) && (flags & MAP_32BIT))
|
||||
if (!in_compat_syscall() && (flags & MAP_32BIT))
|
||||
goto bottomup;
|
||||
|
||||
/* requesting a specific address */
|
||||
|
@ -191,7 +194,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
|
|||
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
|
||||
info.length = len;
|
||||
info.low_limit = PAGE_SIZE;
|
||||
info.high_limit = mm->mmap_base;
|
||||
info.high_limit = get_mmap_base(0);
|
||||
info.align_mask = 0;
|
||||
info.align_offset = pgoff << PAGE_SHIFT;
|
||||
if (filp) {
|
||||
|
|
|
@ -118,12 +118,16 @@ static int map_tboot_page(unsigned long vaddr, unsigned long pfn,
|
|||
pgprot_t prot)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
pte_t *pte;
|
||||
|
||||
pgd = pgd_offset(&tboot_mm, vaddr);
|
||||
pud = pud_alloc(&tboot_mm, pgd, vaddr);
|
||||
p4d = p4d_alloc(&tboot_mm, pgd, vaddr);
|
||||
if (!p4d)
|
||||
return -1;
|
||||
pud = pud_alloc(&tboot_mm, p4d, vaddr);
|
||||
if (!pud)
|
||||
return -1;
|
||||
pmd = pmd_alloc(&tboot_mm, pud, vaddr);
|
||||
|
|
|
@ -92,10 +92,17 @@ static void set_tls_desc(struct task_struct *p, int idx,
|
|||
cpu = get_cpu();
|
||||
|
||||
while (n-- > 0) {
|
||||
if (LDT_empty(info) || LDT_zero(info))
|
||||
if (LDT_empty(info) || LDT_zero(info)) {
|
||||
desc->a = desc->b = 0;
|
||||
else
|
||||
} else {
|
||||
fill_ldt(desc, info);
|
||||
|
||||
/*
|
||||
* Always set the accessed bit so that the CPU
|
||||
* doesn't try to write to the (read-only) GDT.
|
||||
*/
|
||||
desc->type |= 1;
|
||||
}
|
||||
++info;
|
||||
++desc;
|
||||
}
|
||||
|
|
|
@ -164,6 +164,7 @@ static void mark_screen_rdonly(struct mm_struct *mm)
|
|||
struct vm_area_struct *vma;
|
||||
spinlock_t *ptl;
|
||||
pgd_t *pgd;
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
pte_t *pte;
|
||||
|
@ -173,7 +174,10 @@ static void mark_screen_rdonly(struct mm_struct *mm)
|
|||
pgd = pgd_offset(mm, 0xA0000);
|
||||
if (pgd_none_or_clear_bad(pgd))
|
||||
goto out;
|
||||
pud = pud_offset(pgd, 0xA0000);
|
||||
p4d = p4d_offset(pgd, 0xA0000);
|
||||
if (p4d_none_or_clear_bad(p4d))
|
||||
goto out;
|
||||
pud = pud_offset(p4d, 0xA0000);
|
||||
if (pud_none_or_clear_bad(pud))
|
||||
goto out;
|
||||
pmd = pmd_offset(pud, 0xA0000);
|
||||
|
@ -193,7 +197,7 @@ static void mark_screen_rdonly(struct mm_struct *mm)
|
|||
pte_unmap_unlock(pte, ptl);
|
||||
out:
|
||||
up_write(&mm->mmap_sem);
|
||||
flush_tlb();
|
||||
flush_tlb_mm_range(mm, 0xA0000, 0xA0000 + 32*PAGE_SIZE, 0UL);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -741,7 +741,6 @@ static int svm_hardware_enable(void)
|
|||
|
||||
struct svm_cpu_data *sd;
|
||||
uint64_t efer;
|
||||
struct desc_ptr gdt_descr;
|
||||
struct desc_struct *gdt;
|
||||
int me = raw_smp_processor_id();
|
||||
|
||||
|
@ -763,8 +762,7 @@ static int svm_hardware_enable(void)
|
|||
sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
|
||||
sd->next_asid = sd->max_asid + 1;
|
||||
|
||||
native_store_gdt(&gdt_descr);
|
||||
gdt = (struct desc_struct *)gdt_descr.address;
|
||||
gdt = get_current_gdt_rw();
|
||||
sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
|
||||
|
||||
wrmsrl(MSR_EFER, efer | EFER_SVME);
|
||||
|
|
|
@ -935,7 +935,6 @@ static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
|
|||
* when a CPU is brought down, and we need to VMCLEAR all VMCSs loaded on it.
|
||||
*/
|
||||
static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
|
||||
static DEFINE_PER_CPU(struct desc_ptr, host_gdt);
|
||||
|
||||
/*
|
||||
* We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we
|
||||
|
@ -2057,14 +2056,13 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
|
|||
*/
|
||||
static unsigned long segment_base(u16 selector)
|
||||
{
|
||||
struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
|
||||
struct desc_struct *table;
|
||||
unsigned long v;
|
||||
|
||||
if (!(selector & ~SEGMENT_RPL_MASK))
|
||||
return 0;
|
||||
|
||||
table = (struct desc_struct *)gdt->address;
|
||||
table = get_current_gdt_ro();
|
||||
|
||||
if ((selector & SEGMENT_TI_MASK) == SEGMENT_LDT) {
|
||||
u16 ldt_selector = kvm_read_ldt();
|
||||
|
@ -2169,7 +2167,7 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
|
|||
#endif
|
||||
if (vmx->host_state.msr_host_bndcfgs)
|
||||
wrmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs);
|
||||
load_gdt(this_cpu_ptr(&host_gdt));
|
||||
load_fixmap_gdt(raw_smp_processor_id());
|
||||
}
|
||||
|
||||
static void vmx_load_host_state(struct vcpu_vmx *vmx)
|
||||
|
@ -2271,7 +2269,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
|||
}
|
||||
|
||||
if (!already_loaded) {
|
||||
struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
|
||||
void *gdt = get_current_gdt_ro();
|
||||
unsigned long sysenter_esp;
|
||||
|
||||
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
|
||||
|
@ -2282,7 +2280,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
|||
*/
|
||||
vmcs_writel(HOST_TR_BASE,
|
||||
(unsigned long)this_cpu_ptr(&cpu_tss));
|
||||
vmcs_writel(HOST_GDTR_BASE, gdt->address);
|
||||
vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt); /* 22.2.4 */
|
||||
|
||||
/*
|
||||
* VM exits change the host TR limit to 0x67 after a VM
|
||||
|
@ -3471,8 +3469,6 @@ static int hardware_enable(void)
|
|||
ept_sync_global();
|
||||
}
|
||||
|
||||
native_store_gdt(this_cpu_ptr(&host_gdt));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -110,7 +110,8 @@ static struct addr_marker address_markers[] = {
|
|||
#define PTE_LEVEL_MULT (PAGE_SIZE)
|
||||
#define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT)
|
||||
#define PUD_LEVEL_MULT (PTRS_PER_PMD * PMD_LEVEL_MULT)
|
||||
#define PGD_LEVEL_MULT (PTRS_PER_PUD * PUD_LEVEL_MULT)
|
||||
#define P4D_LEVEL_MULT (PTRS_PER_PUD * PUD_LEVEL_MULT)
|
||||
#define PGD_LEVEL_MULT (PTRS_PER_P4D * P4D_LEVEL_MULT)
|
||||
|
||||
#define pt_dump_seq_printf(m, to_dmesg, fmt, args...) \
|
||||
({ \
|
||||
|
@ -286,14 +287,13 @@ static void note_page(struct seq_file *m, struct pg_state *st,
|
|||
}
|
||||
}
|
||||
|
||||
static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr,
|
||||
unsigned long P)
|
||||
static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr, unsigned long P)
|
||||
{
|
||||
int i;
|
||||
pte_t *start;
|
||||
pgprotval_t prot;
|
||||
|
||||
start = (pte_t *) pmd_page_vaddr(addr);
|
||||
start = (pte_t *)pmd_page_vaddr(addr);
|
||||
for (i = 0; i < PTRS_PER_PTE; i++) {
|
||||
prot = pte_flags(*start);
|
||||
st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT);
|
||||
|
@ -304,14 +304,13 @@ static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr,
|
|||
|
||||
#if PTRS_PER_PMD > 1
|
||||
|
||||
static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr,
|
||||
unsigned long P)
|
||||
static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr, unsigned long P)
|
||||
{
|
||||
int i;
|
||||
pmd_t *start;
|
||||
pgprotval_t prot;
|
||||
|
||||
start = (pmd_t *) pud_page_vaddr(addr);
|
||||
start = (pmd_t *)pud_page_vaddr(addr);
|
||||
for (i = 0; i < PTRS_PER_PMD; i++) {
|
||||
st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT);
|
||||
if (!pmd_none(*start)) {
|
||||
|
@ -347,15 +346,14 @@ static bool pud_already_checked(pud_t *prev_pud, pud_t *pud, bool checkwx)
|
|||
return checkwx && prev_pud && (pud_val(*prev_pud) == pud_val(*pud));
|
||||
}
|
||||
|
||||
static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
|
||||
unsigned long P)
|
||||
static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr, unsigned long P)
|
||||
{
|
||||
int i;
|
||||
pud_t *start;
|
||||
pgprotval_t prot;
|
||||
pud_t *prev_pud = NULL;
|
||||
|
||||
start = (pud_t *) pgd_page_vaddr(addr);
|
||||
start = (pud_t *)p4d_page_vaddr(addr);
|
||||
|
||||
for (i = 0; i < PTRS_PER_PUD; i++) {
|
||||
st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT);
|
||||
|
@ -377,9 +375,42 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
|
|||
}
|
||||
|
||||
#else
|
||||
#define walk_pud_level(m,s,a,p) walk_pmd_level(m,s,__pud(pgd_val(a)),p)
|
||||
#define pgd_large(a) pud_large(__pud(pgd_val(a)))
|
||||
#define pgd_none(a) pud_none(__pud(pgd_val(a)))
|
||||
#define walk_pud_level(m,s,a,p) walk_pmd_level(m,s,__pud(p4d_val(a)),p)
|
||||
#define p4d_large(a) pud_large(__pud(p4d_val(a)))
|
||||
#define p4d_none(a) pud_none(__pud(p4d_val(a)))
|
||||
#endif
|
||||
|
||||
#if PTRS_PER_P4D > 1
|
||||
|
||||
static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr, unsigned long P)
|
||||
{
|
||||
int i;
|
||||
p4d_t *start;
|
||||
pgprotval_t prot;
|
||||
|
||||
start = (p4d_t *)pgd_page_vaddr(addr);
|
||||
|
||||
for (i = 0; i < PTRS_PER_P4D; i++) {
|
||||
st->current_address = normalize_addr(P + i * P4D_LEVEL_MULT);
|
||||
if (!p4d_none(*start)) {
|
||||
if (p4d_large(*start) || !p4d_present(*start)) {
|
||||
prot = p4d_flags(*start);
|
||||
note_page(m, st, __pgprot(prot), 2);
|
||||
} else {
|
||||
walk_pud_level(m, st, *start,
|
||||
P + i * P4D_LEVEL_MULT);
|
||||
}
|
||||
} else
|
||||
note_page(m, st, __pgprot(0), 2);
|
||||
|
||||
start++;
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
#define walk_p4d_level(m,s,a,p) walk_pud_level(m,s,__p4d(pgd_val(a)),p)
|
||||
#define pgd_large(a) p4d_large(__p4d(pgd_val(a)))
|
||||
#define pgd_none(a) p4d_none(__p4d(pgd_val(a)))
|
||||
#endif
|
||||
|
||||
static inline bool is_hypervisor_range(int idx)
|
||||
|
@ -424,7 +455,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
|
|||
prot = pgd_flags(*start);
|
||||
note_page(m, &st, __pgprot(prot), 1);
|
||||
} else {
|
||||
walk_pud_level(m, &st, *start,
|
||||
walk_p4d_level(m, &st, *start,
|
||||
i * PGD_LEVEL_MULT);
|
||||
}
|
||||
} else
|
||||
|
|
|
@ -253,6 +253,7 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
|
|||
{
|
||||
unsigned index = pgd_index(address);
|
||||
pgd_t *pgd_k;
|
||||
p4d_t *p4d, *p4d_k;
|
||||
pud_t *pud, *pud_k;
|
||||
pmd_t *pmd, *pmd_k;
|
||||
|
||||
|
@ -265,10 +266,15 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
|
|||
/*
|
||||
* set_pgd(pgd, *pgd_k); here would be useless on PAE
|
||||
* and redundant with the set_pmd() on non-PAE. As would
|
||||
* set_pud.
|
||||
* set_p4d/set_pud.
|
||||
*/
|
||||
pud = pud_offset(pgd, address);
|
||||
pud_k = pud_offset(pgd_k, address);
|
||||
p4d = p4d_offset(pgd, address);
|
||||
p4d_k = p4d_offset(pgd_k, address);
|
||||
if (!p4d_present(*p4d_k))
|
||||
return NULL;
|
||||
|
||||
pud = pud_offset(p4d, address);
|
||||
pud_k = pud_offset(p4d_k, address);
|
||||
if (!pud_present(*pud_k))
|
||||
return NULL;
|
||||
|
||||
|
@ -384,6 +390,8 @@ static void dump_pagetable(unsigned long address)
|
|||
{
|
||||
pgd_t *base = __va(read_cr3());
|
||||
pgd_t *pgd = &base[pgd_index(address)];
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
pte_t *pte;
|
||||
|
||||
|
@ -392,7 +400,9 @@ static void dump_pagetable(unsigned long address)
|
|||
if (!low_pfn(pgd_val(*pgd) >> PAGE_SHIFT) || !pgd_present(*pgd))
|
||||
goto out;
|
||||
#endif
|
||||
pmd = pmd_offset(pud_offset(pgd, address), address);
|
||||
p4d = p4d_offset(pgd, address);
|
||||
pud = pud_offset(p4d, address);
|
||||
pmd = pmd_offset(pud, address);
|
||||
printk(KERN_CONT "*pde = %0*Lx ", sizeof(*pmd) * 2, (u64)pmd_val(*pmd));
|
||||
|
||||
/*
|
||||
|
@ -425,6 +435,7 @@ void vmalloc_sync_all(void)
|
|||
static noinline int vmalloc_fault(unsigned long address)
|
||||
{
|
||||
pgd_t *pgd, *pgd_ref;
|
||||
p4d_t *p4d, *p4d_ref;
|
||||
pud_t *pud, *pud_ref;
|
||||
pmd_t *pmd, *pmd_ref;
|
||||
pte_t *pte, *pte_ref;
|
||||
|
@ -448,17 +459,37 @@ static noinline int vmalloc_fault(unsigned long address)
|
|||
if (pgd_none(*pgd)) {
|
||||
set_pgd(pgd, *pgd_ref);
|
||||
arch_flush_lazy_mmu_mode();
|
||||
} else {
|
||||
} else if (CONFIG_PGTABLE_LEVELS > 4) {
|
||||
/*
|
||||
* With folded p4d, pgd_none() is always false, so the pgd may
|
||||
* point to an empty page table entry and pgd_page_vaddr()
|
||||
* will return garbage.
|
||||
*
|
||||
* We will do the correct sanity check on the p4d level.
|
||||
*/
|
||||
BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
|
||||
}
|
||||
|
||||
/* With 4-level paging, copying happens on the p4d level. */
|
||||
p4d = p4d_offset(pgd, address);
|
||||
p4d_ref = p4d_offset(pgd_ref, address);
|
||||
if (p4d_none(*p4d_ref))
|
||||
return -1;
|
||||
|
||||
if (p4d_none(*p4d)) {
|
||||
set_p4d(p4d, *p4d_ref);
|
||||
arch_flush_lazy_mmu_mode();
|
||||
} else {
|
||||
BUG_ON(p4d_pfn(*p4d) != p4d_pfn(*p4d_ref));
|
||||
}
|
||||
|
||||
/*
|
||||
* Below here mismatches are bugs because these lower tables
|
||||
* are shared:
|
||||
*/
|
||||
|
||||
pud = pud_offset(pgd, address);
|
||||
pud_ref = pud_offset(pgd_ref, address);
|
||||
pud = pud_offset(p4d, address);
|
||||
pud_ref = pud_offset(p4d_ref, address);
|
||||
if (pud_none(*pud_ref))
|
||||
return -1;
|
||||
|
||||
|
@ -526,6 +557,7 @@ static void dump_pagetable(unsigned long address)
|
|||
{
|
||||
pgd_t *base = __va(read_cr3() & PHYSICAL_PAGE_MASK);
|
||||
pgd_t *pgd = base + pgd_index(address);
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
pte_t *pte;
|
||||
|
@ -538,7 +570,15 @@ static void dump_pagetable(unsigned long address)
|
|||
if (!pgd_present(*pgd))
|
||||
goto out;
|
||||
|
||||
pud = pud_offset(pgd, address);
|
||||
p4d = p4d_offset(pgd, address);
|
||||
if (bad_address(p4d))
|
||||
goto bad;
|
||||
|
||||
printk("P4D %lx ", p4d_val(*p4d));
|
||||
if (!p4d_present(*p4d) || p4d_large(*p4d))
|
||||
goto out;
|
||||
|
||||
pud = pud_offset(p4d, address);
|
||||
if (bad_address(pud))
|
||||
goto bad;
|
||||
|
||||
|
@ -1082,6 +1122,7 @@ static noinline int
|
|||
spurious_fault(unsigned long error_code, unsigned long address)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
pte_t *pte;
|
||||
|
@ -1104,7 +1145,14 @@ spurious_fault(unsigned long error_code, unsigned long address)
|
|||
if (!pgd_present(*pgd))
|
||||
return 0;
|
||||
|
||||
pud = pud_offset(pgd, address);
|
||||
p4d = p4d_offset(pgd, address);
|
||||
if (!p4d_present(*p4d))
|
||||
return 0;
|
||||
|
||||
if (p4d_large(*p4d))
|
||||
return spurious_fault_check(error_code, (pte_t *) p4d);
|
||||
|
||||
pud = pud_offset(p4d, address);
|
||||
if (!pud_present(*pud))
|
||||
return 0;
|
||||
|
||||
|
|
|
@ -76,9 +76,9 @@ static void undo_dev_pagemap(int *nr, int nr_start, struct page **pages)
|
|||
}
|
||||
|
||||
/*
|
||||
* 'pteval' can come from a pte, pmd or pud. We only check
|
||||
* 'pteval' can come from a pte, pmd, pud or p4d. We only check
|
||||
* _PAGE_PRESENT, _PAGE_USER, and _PAGE_RW in here which are the
|
||||
* same value on all 3 types.
|
||||
* same value on all 4 types.
|
||||
*/
|
||||
static inline int pte_allows_gup(unsigned long pteval, int write)
|
||||
{
|
||||
|
@ -295,13 +295,13 @@ static noinline int gup_huge_pud(pud_t pud, unsigned long addr,
|
|||
return 1;
|
||||
}
|
||||
|
||||
static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
|
||||
static int gup_pud_range(p4d_t p4d, unsigned long addr, unsigned long end,
|
||||
int write, struct page **pages, int *nr)
|
||||
{
|
||||
unsigned long next;
|
||||
pud_t *pudp;
|
||||
|
||||
pudp = pud_offset(&pgd, addr);
|
||||
pudp = pud_offset(&p4d, addr);
|
||||
do {
|
||||
pud_t pud = *pudp;
|
||||
|
||||
|
@ -320,6 +320,27 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
|
|||
return 1;
|
||||
}
|
||||
|
||||
static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
|
||||
int write, struct page **pages, int *nr)
|
||||
{
|
||||
unsigned long next;
|
||||
p4d_t *p4dp;
|
||||
|
||||
p4dp = p4d_offset(&pgd, addr);
|
||||
do {
|
||||
p4d_t p4d = *p4dp;
|
||||
|
||||
next = p4d_addr_end(addr, end);
|
||||
if (p4d_none(p4d))
|
||||
return 0;
|
||||
BUILD_BUG_ON(p4d_large(p4d));
|
||||
if (!gup_pud_range(p4d, addr, next, write, pages, nr))
|
||||
return 0;
|
||||
} while (p4dp++, addr = next, addr != end);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Like get_user_pages_fast() except its IRQ-safe in that it won't fall
|
||||
* back to the regular GUP.
|
||||
|
@ -368,7 +389,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
|
|||
next = pgd_addr_end(addr, end);
|
||||
if (pgd_none(pgd))
|
||||
break;
|
||||
if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
|
||||
if (!gup_p4d_range(pgd, addr, next, write, pages, &nr))
|
||||
break;
|
||||
} while (pgdp++, addr = next, addr != end);
|
||||
local_irq_restore(flags);
|
||||
|
@ -440,7 +461,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
|
|||
next = pgd_addr_end(addr, end);
|
||||
if (pgd_none(pgd))
|
||||
goto slow;
|
||||
if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
|
||||
if (!gup_p4d_range(pgd, addr, next, write, pages, &nr))
|
||||
goto slow;
|
||||
} while (pgdp++, addr = next, addr != end);
|
||||
local_irq_enable();
|
||||
|
|
|
@ -12,10 +12,12 @@
|
|||
#include <linux/pagemap.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/sysctl.h>
|
||||
#include <linux/compat.h>
|
||||
#include <asm/mman.h>
|
||||
#include <asm/tlb.h>
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/pgalloc.h>
|
||||
#include <asm/elf.h>
|
||||
|
||||
#if 0 /* This is just for testing */
|
||||
struct page *
|
||||
|
@ -82,8 +84,9 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
|
|||
|
||||
info.flags = 0;
|
||||
info.length = len;
|
||||
info.low_limit = current->mm->mmap_legacy_base;
|
||||
info.high_limit = TASK_SIZE;
|
||||
info.low_limit = get_mmap_base(1);
|
||||
info.high_limit = in_compat_syscall() ?
|
||||
tasksize_32bit() : tasksize_64bit();
|
||||
info.align_mask = PAGE_MASK & ~huge_page_mask(h);
|
||||
info.align_offset = 0;
|
||||
return vm_unmapped_area(&info);
|
||||
|
@ -100,7 +103,7 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
|
|||
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
|
||||
info.length = len;
|
||||
info.low_limit = PAGE_SIZE;
|
||||
info.high_limit = current->mm->mmap_base;
|
||||
info.high_limit = get_mmap_base(0);
|
||||
info.align_mask = PAGE_MASK & ~huge_page_mask(h);
|
||||
info.align_offset = 0;
|
||||
addr = vm_unmapped_area(&info);
|
||||
|
|
|
@ -45,6 +45,34 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int ident_p4d_init(struct x86_mapping_info *info, p4d_t *p4d_page,
|
||||
unsigned long addr, unsigned long end)
|
||||
{
|
||||
unsigned long next;
|
||||
|
||||
for (; addr < end; addr = next) {
|
||||
p4d_t *p4d = p4d_page + p4d_index(addr);
|
||||
pud_t *pud;
|
||||
|
||||
next = (addr & P4D_MASK) + P4D_SIZE;
|
||||
if (next > end)
|
||||
next = end;
|
||||
|
||||
if (p4d_present(*p4d)) {
|
||||
pud = pud_offset(p4d, 0);
|
||||
ident_pud_init(info, pud, addr, next);
|
||||
continue;
|
||||
}
|
||||
pud = (pud_t *)info->alloc_pgt_page(info->context);
|
||||
if (!pud)
|
||||
return -ENOMEM;
|
||||
ident_pud_init(info, pud, addr, next);
|
||||
set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE));
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
|
||||
unsigned long pstart, unsigned long pend)
|
||||
{
|
||||
|
@ -55,27 +83,36 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
|
|||
|
||||
for (; addr < end; addr = next) {
|
||||
pgd_t *pgd = pgd_page + pgd_index(addr);
|
||||
pud_t *pud;
|
||||
p4d_t *p4d;
|
||||
|
||||
next = (addr & PGDIR_MASK) + PGDIR_SIZE;
|
||||
if (next > end)
|
||||
next = end;
|
||||
|
||||
if (pgd_present(*pgd)) {
|
||||
pud = pud_offset(pgd, 0);
|
||||
result = ident_pud_init(info, pud, addr, next);
|
||||
p4d = p4d_offset(pgd, 0);
|
||||
result = ident_p4d_init(info, p4d, addr, next);
|
||||
if (result)
|
||||
return result;
|
||||
continue;
|
||||
}
|
||||
|
||||
pud = (pud_t *)info->alloc_pgt_page(info->context);
|
||||
if (!pud)
|
||||
p4d = (p4d_t *)info->alloc_pgt_page(info->context);
|
||||
if (!p4d)
|
||||
return -ENOMEM;
|
||||
result = ident_pud_init(info, pud, addr, next);
|
||||
result = ident_p4d_init(info, p4d, addr, next);
|
||||
if (result)
|
||||
return result;
|
||||
set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
|
||||
if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
|
||||
set_pgd(pgd, __pgd(__pa(p4d) | _KERNPG_TABLE));
|
||||
} else {
|
||||
/*
|
||||
* With p4d folded, pgd is equal to p4d.
|
||||
* The pgd entry has to point to the pud page table in this case.
|
||||
*/
|
||||
pud_t *pud = pud_offset(p4d, 0);
|
||||
set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -56,8 +56,6 @@
|
|||
|
||||
unsigned long highstart_pfn, highend_pfn;
|
||||
|
||||
static noinline int do_test_wp_bit(void);
|
||||
|
||||
bool __read_mostly __vmalloc_start_set = false;
|
||||
|
||||
/*
|
||||
|
@ -67,6 +65,7 @@ bool __read_mostly __vmalloc_start_set = false;
|
|||
*/
|
||||
static pmd_t * __init one_md_table_init(pgd_t *pgd)
|
||||
{
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd_table;
|
||||
|
||||
|
@ -75,13 +74,15 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
|
|||
pmd_table = (pmd_t *)alloc_low_page();
|
||||
paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
|
||||
set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
|
||||
pud = pud_offset(pgd, 0);
|
||||
p4d = p4d_offset(pgd, 0);
|
||||
pud = pud_offset(p4d, 0);
|
||||
BUG_ON(pmd_table != pmd_offset(pud, 0));
|
||||
|
||||
return pmd_table;
|
||||
}
|
||||
#endif
|
||||
pud = pud_offset(pgd, 0);
|
||||
p4d = p4d_offset(pgd, 0);
|
||||
pud = pud_offset(p4d, 0);
|
||||
pmd_table = pmd_offset(pud, 0);
|
||||
|
||||
return pmd_table;
|
||||
|
@ -390,8 +391,11 @@ pte_t *kmap_pte;
|
|||
|
||||
static inline pte_t *kmap_get_fixmap_pte(unsigned long vaddr)
|
||||
{
|
||||
return pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr),
|
||||
vaddr), vaddr), vaddr);
|
||||
pgd_t *pgd = pgd_offset_k(vaddr);
|
||||
p4d_t *p4d = p4d_offset(pgd, vaddr);
|
||||
pud_t *pud = pud_offset(p4d, vaddr);
|
||||
pmd_t *pmd = pmd_offset(pud, vaddr);
|
||||
return pte_offset_kernel(pmd, vaddr);
|
||||
}
|
||||
|
||||
static void __init kmap_init(void)
|
||||
|
@ -410,6 +414,7 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base)
|
|||
{
|
||||
unsigned long vaddr;
|
||||
pgd_t *pgd;
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
pte_t *pte;
|
||||
|
@ -418,7 +423,8 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base)
|
|||
page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
|
||||
|
||||
pgd = swapper_pg_dir + pgd_index(vaddr);
|
||||
pud = pud_offset(pgd, vaddr);
|
||||
p4d = p4d_offset(pgd, vaddr);
|
||||
pud = pud_offset(p4d, vaddr);
|
||||
pmd = pmd_offset(pud, vaddr);
|
||||
pte = pte_offset_kernel(pmd, vaddr);
|
||||
pkmap_page_table = pte;
|
||||
|
@ -450,6 +456,7 @@ void __init native_pagetable_init(void)
|
|||
{
|
||||
unsigned long pfn, va;
|
||||
pgd_t *pgd, *base = swapper_pg_dir;
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
pte_t *pte;
|
||||
|
@ -469,7 +476,8 @@ void __init native_pagetable_init(void)
|
|||
if (!pgd_present(*pgd))
|
||||
break;
|
||||
|
||||
pud = pud_offset(pgd, va);
|
||||
p4d = p4d_offset(pgd, va);
|
||||
pud = pud_offset(p4d, va);
|
||||
pmd = pmd_offset(pud, va);
|
||||
if (!pmd_present(*pmd))
|
||||
break;
|
||||
|
@ -716,22 +724,20 @@ void __init paging_init(void)
|
|||
*/
|
||||
static void __init test_wp_bit(void)
|
||||
{
|
||||
int wp_works_ok;
|
||||
char z = 0;
|
||||
|
||||
printk(KERN_INFO
|
||||
"Checking if this processor honours the WP bit even in supervisor mode...");
|
||||
printk(KERN_INFO "Checking if this processor honours the WP bit even in supervisor mode...");
|
||||
|
||||
/* Any page-aligned address will do, the test is non-destructive */
|
||||
__set_fixmap(FIX_WP_TEST, __pa(&swapper_pg_dir), PAGE_KERNEL_RO);
|
||||
wp_works_ok = do_test_wp_bit();
|
||||
clear_fixmap(FIX_WP_TEST);
|
||||
__set_fixmap(FIX_WP_TEST, __pa_symbol(empty_zero_page), PAGE_KERNEL_RO);
|
||||
|
||||
if (!wp_works_ok) {
|
||||
printk(KERN_CONT "No.\n");
|
||||
panic("Linux doesn't support CPUs with broken WP.");
|
||||
} else {
|
||||
if (probe_kernel_write((char *)fix_to_virt(FIX_WP_TEST), &z, 1)) {
|
||||
clear_fixmap(FIX_WP_TEST);
|
||||
printk(KERN_CONT "Ok.\n");
|
||||
return;
|
||||
}
|
||||
|
||||
printk(KERN_CONT "No.\n");
|
||||
panic("Linux doesn't support CPUs with broken WP.");
|
||||
}
|
||||
|
||||
void __init mem_init(void)
|
||||
|
@ -841,30 +847,6 @@ int arch_remove_memory(u64 start, u64 size)
|
|||
#endif
|
||||
#endif
|
||||
|
||||
/*
|
||||
* This function cannot be __init, since exceptions don't work in that
|
||||
* section. Put this after the callers, so that it cannot be inlined.
|
||||
*/
|
||||
static noinline int do_test_wp_bit(void)
|
||||
{
|
||||
char tmp_reg;
|
||||
int flag;
|
||||
|
||||
__asm__ __volatile__(
|
||||
" movb %0, %1 \n"
|
||||
"1: movb %1, %0 \n"
|
||||
" xorl %2, %2 \n"
|
||||
"2: \n"
|
||||
_ASM_EXTABLE(1b,2b)
|
||||
:"=m" (*(char *)fix_to_virt(FIX_WP_TEST)),
|
||||
"=q" (tmp_reg),
|
||||
"=r" (flag)
|
||||
:"2" (1)
|
||||
:"memory");
|
||||
|
||||
return flag;
|
||||
}
|
||||
|
||||
int kernel_set_to_readonly __read_mostly;
|
||||
|
||||
void set_kernel_text_rw(void)
|
||||
|
|
|
@ -97,28 +97,38 @@ void sync_global_pgds(unsigned long start, unsigned long end)
|
|||
unsigned long address;
|
||||
|
||||
for (address = start; address <= end; address += PGDIR_SIZE) {
|
||||
const pgd_t *pgd_ref = pgd_offset_k(address);
|
||||
pgd_t *pgd_ref = pgd_offset_k(address);
|
||||
const p4d_t *p4d_ref;
|
||||
struct page *page;
|
||||
|
||||
if (pgd_none(*pgd_ref))
|
||||
/*
|
||||
* With folded p4d, pgd_none() is always false, we need to
|
||||
* handle synchonization on p4d level.
|
||||
*/
|
||||
BUILD_BUG_ON(pgd_none(*pgd_ref));
|
||||
p4d_ref = p4d_offset(pgd_ref, address);
|
||||
|
||||
if (p4d_none(*p4d_ref))
|
||||
continue;
|
||||
|
||||
spin_lock(&pgd_lock);
|
||||
list_for_each_entry(page, &pgd_list, lru) {
|
||||
pgd_t *pgd;
|
||||
p4d_t *p4d;
|
||||
spinlock_t *pgt_lock;
|
||||
|
||||
pgd = (pgd_t *)page_address(page) + pgd_index(address);
|
||||
p4d = p4d_offset(pgd, address);
|
||||
/* the pgt_lock only for Xen */
|
||||
pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
|
||||
spin_lock(pgt_lock);
|
||||
|
||||
if (!pgd_none(*pgd_ref) && !pgd_none(*pgd))
|
||||
BUG_ON(pgd_page_vaddr(*pgd)
|
||||
!= pgd_page_vaddr(*pgd_ref));
|
||||
if (!p4d_none(*p4d_ref) && !p4d_none(*p4d))
|
||||
BUG_ON(p4d_page_vaddr(*p4d)
|
||||
!= p4d_page_vaddr(*p4d_ref));
|
||||
|
||||
if (pgd_none(*pgd))
|
||||
set_pgd(pgd, *pgd_ref);
|
||||
if (p4d_none(*p4d))
|
||||
set_p4d(p4d, *p4d_ref);
|
||||
|
||||
spin_unlock(pgt_lock);
|
||||
}
|
||||
|
@ -149,16 +159,28 @@ static __ref void *spp_getpage(void)
|
|||
return ptr;
|
||||
}
|
||||
|
||||
static pud_t *fill_pud(pgd_t *pgd, unsigned long vaddr)
|
||||
static p4d_t *fill_p4d(pgd_t *pgd, unsigned long vaddr)
|
||||
{
|
||||
if (pgd_none(*pgd)) {
|
||||
pud_t *pud = (pud_t *)spp_getpage();
|
||||
pgd_populate(&init_mm, pgd, pud);
|
||||
if (pud != pud_offset(pgd, 0))
|
||||
p4d_t *p4d = (p4d_t *)spp_getpage();
|
||||
pgd_populate(&init_mm, pgd, p4d);
|
||||
if (p4d != p4d_offset(pgd, 0))
|
||||
printk(KERN_ERR "PAGETABLE BUG #00! %p <-> %p\n",
|
||||
pud, pud_offset(pgd, 0));
|
||||
p4d, p4d_offset(pgd, 0));
|
||||
}
|
||||
return pud_offset(pgd, vaddr);
|
||||
return p4d_offset(pgd, vaddr);
|
||||
}
|
||||
|
||||
static pud_t *fill_pud(p4d_t *p4d, unsigned long vaddr)
|
||||
{
|
||||
if (p4d_none(*p4d)) {
|
||||
pud_t *pud = (pud_t *)spp_getpage();
|
||||
p4d_populate(&init_mm, p4d, pud);
|
||||
if (pud != pud_offset(p4d, 0))
|
||||
printk(KERN_ERR "PAGETABLE BUG #01! %p <-> %p\n",
|
||||
pud, pud_offset(p4d, 0));
|
||||
}
|
||||
return pud_offset(p4d, vaddr);
|
||||
}
|
||||
|
||||
static pmd_t *fill_pmd(pud_t *pud, unsigned long vaddr)
|
||||
|
@ -167,7 +189,7 @@ static pmd_t *fill_pmd(pud_t *pud, unsigned long vaddr)
|
|||
pmd_t *pmd = (pmd_t *) spp_getpage();
|
||||
pud_populate(&init_mm, pud, pmd);
|
||||
if (pmd != pmd_offset(pud, 0))
|
||||
printk(KERN_ERR "PAGETABLE BUG #01! %p <-> %p\n",
|
||||
printk(KERN_ERR "PAGETABLE BUG #02! %p <-> %p\n",
|
||||
pmd, pmd_offset(pud, 0));
|
||||
}
|
||||
return pmd_offset(pud, vaddr);
|
||||
|
@ -179,20 +201,15 @@ static pte_t *fill_pte(pmd_t *pmd, unsigned long vaddr)
|
|||
pte_t *pte = (pte_t *) spp_getpage();
|
||||
pmd_populate_kernel(&init_mm, pmd, pte);
|
||||
if (pte != pte_offset_kernel(pmd, 0))
|
||||
printk(KERN_ERR "PAGETABLE BUG #02!\n");
|
||||
printk(KERN_ERR "PAGETABLE BUG #03!\n");
|
||||
}
|
||||
return pte_offset_kernel(pmd, vaddr);
|
||||
}
|
||||
|
||||
void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte)
|
||||
static void __set_pte_vaddr(pud_t *pud, unsigned long vaddr, pte_t new_pte)
|
||||
{
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
pte_t *pte;
|
||||
|
||||
pud = pud_page + pud_index(vaddr);
|
||||
pmd = fill_pmd(pud, vaddr);
|
||||
pte = fill_pte(pmd, vaddr);
|
||||
pmd_t *pmd = fill_pmd(pud, vaddr);
|
||||
pte_t *pte = fill_pte(pmd, vaddr);
|
||||
|
||||
set_pte(pte, new_pte);
|
||||
|
||||
|
@ -203,10 +220,25 @@ void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte)
|
|||
__flush_tlb_one(vaddr);
|
||||
}
|
||||
|
||||
void set_pte_vaddr_p4d(p4d_t *p4d_page, unsigned long vaddr, pte_t new_pte)
|
||||
{
|
||||
p4d_t *p4d = p4d_page + p4d_index(vaddr);
|
||||
pud_t *pud = fill_pud(p4d, vaddr);
|
||||
|
||||
__set_pte_vaddr(pud, vaddr, new_pte);
|
||||
}
|
||||
|
||||
void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte)
|
||||
{
|
||||
pud_t *pud = pud_page + pud_index(vaddr);
|
||||
|
||||
__set_pte_vaddr(pud, vaddr, new_pte);
|
||||
}
|
||||
|
||||
void set_pte_vaddr(unsigned long vaddr, pte_t pteval)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
pud_t *pud_page;
|
||||
p4d_t *p4d_page;
|
||||
|
||||
pr_debug("set_pte_vaddr %lx to %lx\n", vaddr, native_pte_val(pteval));
|
||||
|
||||
|
@ -216,17 +248,20 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pteval)
|
|||
"PGD FIXMAP MISSING, it should be setup in head.S!\n");
|
||||
return;
|
||||
}
|
||||
pud_page = (pud_t*)pgd_page_vaddr(*pgd);
|
||||
set_pte_vaddr_pud(pud_page, vaddr, pteval);
|
||||
|
||||
p4d_page = p4d_offset(pgd, 0);
|
||||
set_pte_vaddr_p4d(p4d_page, vaddr, pteval);
|
||||
}
|
||||
|
||||
pmd_t * __init populate_extra_pmd(unsigned long vaddr)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
|
||||
pgd = pgd_offset_k(vaddr);
|
||||
pud = fill_pud(pgd, vaddr);
|
||||
p4d = fill_p4d(pgd, vaddr);
|
||||
pud = fill_pud(p4d, vaddr);
|
||||
return fill_pmd(pud, vaddr);
|
||||
}
|
||||
|
||||
|
@ -245,6 +280,7 @@ static void __init __init_extra_mapping(unsigned long phys, unsigned long size,
|
|||
enum page_cache_mode cache)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
pgprot_t prot;
|
||||
|
@ -255,11 +291,17 @@ static void __init __init_extra_mapping(unsigned long phys, unsigned long size,
|
|||
for (; size; phys += PMD_SIZE, size -= PMD_SIZE) {
|
||||
pgd = pgd_offset_k((unsigned long)__va(phys));
|
||||
if (pgd_none(*pgd)) {
|
||||
pud = (pud_t *) spp_getpage();
|
||||
set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE |
|
||||
p4d = (p4d_t *) spp_getpage();
|
||||
set_pgd(pgd, __pgd(__pa(p4d) | _KERNPG_TABLE |
|
||||
_PAGE_USER));
|
||||
}
|
||||
pud = pud_offset(pgd, (unsigned long)__va(phys));
|
||||
p4d = p4d_offset(pgd, (unsigned long)__va(phys));
|
||||
if (p4d_none(*p4d)) {
|
||||
pud = (pud_t *) spp_getpage();
|
||||
set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE |
|
||||
_PAGE_USER));
|
||||
}
|
||||
pud = pud_offset(p4d, (unsigned long)__va(phys));
|
||||
if (pud_none(*pud)) {
|
||||
pmd = (pmd_t *) spp_getpage();
|
||||
set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE |
|
||||
|
@ -563,12 +605,15 @@ kernel_physical_mapping_init(unsigned long paddr_start,
|
|||
|
||||
for (; vaddr < vaddr_end; vaddr = vaddr_next) {
|
||||
pgd_t *pgd = pgd_offset_k(vaddr);
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
|
||||
vaddr_next = (vaddr & PGDIR_MASK) + PGDIR_SIZE;
|
||||
|
||||
if (pgd_val(*pgd)) {
|
||||
pud = (pud_t *)pgd_page_vaddr(*pgd);
|
||||
BUILD_BUG_ON(pgd_none(*pgd));
|
||||
p4d = p4d_offset(pgd, vaddr);
|
||||
if (p4d_val(*p4d)) {
|
||||
pud = (pud_t *)p4d_page_vaddr(*p4d);
|
||||
paddr_last = phys_pud_init(pud, __pa(vaddr),
|
||||
__pa(vaddr_end),
|
||||
page_size_mask);
|
||||
|
@ -580,7 +625,7 @@ kernel_physical_mapping_init(unsigned long paddr_start,
|
|||
page_size_mask);
|
||||
|
||||
spin_lock(&init_mm.page_table_lock);
|
||||
pgd_populate(&init_mm, pgd, pud);
|
||||
p4d_populate(&init_mm, p4d, pud);
|
||||
spin_unlock(&init_mm.page_table_lock);
|
||||
pgd_changed = true;
|
||||
}
|
||||
|
@ -726,6 +771,24 @@ static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud)
|
|||
spin_unlock(&init_mm.page_table_lock);
|
||||
}
|
||||
|
||||
static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d)
|
||||
{
|
||||
pud_t *pud;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < PTRS_PER_PUD; i++) {
|
||||
pud = pud_start + i;
|
||||
if (!pud_none(*pud))
|
||||
return;
|
||||
}
|
||||
|
||||
/* free a pud talbe */
|
||||
free_pagetable(p4d_page(*p4d), 0);
|
||||
spin_lock(&init_mm.page_table_lock);
|
||||
p4d_clear(p4d);
|
||||
spin_unlock(&init_mm.page_table_lock);
|
||||
}
|
||||
|
||||
static void __meminit
|
||||
remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
|
||||
bool direct)
|
||||
|
@ -899,7 +962,7 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
|
|||
continue;
|
||||
}
|
||||
|
||||
pmd_base = (pmd_t *)pud_page_vaddr(*pud);
|
||||
pmd_base = pmd_offset(pud, 0);
|
||||
remove_pmd_table(pmd_base, addr, next, direct);
|
||||
free_pmd_table(pmd_base, pud);
|
||||
}
|
||||
|
@ -908,6 +971,32 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
|
|||
update_page_count(PG_LEVEL_1G, -pages);
|
||||
}
|
||||
|
||||
static void __meminit
|
||||
remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end,
|
||||
bool direct)
|
||||
{
|
||||
unsigned long next, pages = 0;
|
||||
pud_t *pud_base;
|
||||
p4d_t *p4d;
|
||||
|
||||
p4d = p4d_start + p4d_index(addr);
|
||||
for (; addr < end; addr = next, p4d++) {
|
||||
next = p4d_addr_end(addr, end);
|
||||
|
||||
if (!p4d_present(*p4d))
|
||||
continue;
|
||||
|
||||
BUILD_BUG_ON(p4d_large(*p4d));
|
||||
|
||||
pud_base = pud_offset(p4d, 0);
|
||||
remove_pud_table(pud_base, addr, next, direct);
|
||||
free_pud_table(pud_base, p4d);
|
||||
}
|
||||
|
||||
if (direct)
|
||||
update_page_count(PG_LEVEL_512G, -pages);
|
||||
}
|
||||
|
||||
/* start and end are both virtual address. */
|
||||
static void __meminit
|
||||
remove_pagetable(unsigned long start, unsigned long end, bool direct)
|
||||
|
@ -915,7 +1004,7 @@ remove_pagetable(unsigned long start, unsigned long end, bool direct)
|
|||
unsigned long next;
|
||||
unsigned long addr;
|
||||
pgd_t *pgd;
|
||||
pud_t *pud;
|
||||
p4d_t *p4d;
|
||||
|
||||
for (addr = start; addr < end; addr = next) {
|
||||
next = pgd_addr_end(addr, end);
|
||||
|
@ -924,8 +1013,8 @@ remove_pagetable(unsigned long start, unsigned long end, bool direct)
|
|||
if (!pgd_present(*pgd))
|
||||
continue;
|
||||
|
||||
pud = (pud_t *)pgd_page_vaddr(*pgd);
|
||||
remove_pud_table(pud, addr, next, direct);
|
||||
p4d = p4d_offset(pgd, 0);
|
||||
remove_p4d_table(p4d, addr, next, direct);
|
||||
}
|
||||
|
||||
flush_tlb_all();
|
||||
|
@ -1090,6 +1179,7 @@ int kern_addr_valid(unsigned long addr)
|
|||
{
|
||||
unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
|
||||
pgd_t *pgd;
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
pte_t *pte;
|
||||
|
@ -1101,7 +1191,11 @@ int kern_addr_valid(unsigned long addr)
|
|||
if (pgd_none(*pgd))
|
||||
return 0;
|
||||
|
||||
pud = pud_offset(pgd, addr);
|
||||
p4d = p4d_offset(pgd, addr);
|
||||
if (p4d_none(*p4d))
|
||||
return 0;
|
||||
|
||||
pud = pud_offset(p4d, addr);
|
||||
if (pud_none(*pud))
|
||||
return 0;
|
||||
|
||||
|
@ -1158,6 +1252,7 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start,
|
|||
unsigned long addr;
|
||||
unsigned long next;
|
||||
pgd_t *pgd;
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
|
||||
|
@ -1168,7 +1263,11 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start,
|
|||
if (!pgd)
|
||||
return -ENOMEM;
|
||||
|
||||
pud = vmemmap_pud_populate(pgd, addr, node);
|
||||
p4d = vmemmap_p4d_populate(pgd, addr, node);
|
||||
if (!p4d)
|
||||
return -ENOMEM;
|
||||
|
||||
pud = vmemmap_pud_populate(p4d, addr, node);
|
||||
if (!pud)
|
||||
return -ENOMEM;
|
||||
|
||||
|
@ -1236,6 +1335,7 @@ void register_page_bootmem_memmap(unsigned long section_nr,
|
|||
unsigned long end = (unsigned long)(start_page + size);
|
||||
unsigned long next;
|
||||
pgd_t *pgd;
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
unsigned int nr_pages;
|
||||
|
@ -1251,7 +1351,14 @@ void register_page_bootmem_memmap(unsigned long section_nr,
|
|||
}
|
||||
get_page_bootmem(section_nr, pgd_page(*pgd), MIX_SECTION_INFO);
|
||||
|
||||
pud = pud_offset(pgd, addr);
|
||||
p4d = p4d_offset(pgd, addr);
|
||||
if (p4d_none(*p4d)) {
|
||||
next = (addr + PAGE_SIZE) & PAGE_MASK;
|
||||
continue;
|
||||
}
|
||||
get_page_bootmem(section_nr, p4d_page(*p4d), MIX_SECTION_INFO);
|
||||
|
||||
pud = pud_offset(p4d, addr);
|
||||
if (pud_none(*pud)) {
|
||||
next = (addr + PAGE_SIZE) & PAGE_MASK;
|
||||
continue;
|
||||
|
|
|
@ -426,7 +426,8 @@ static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
|
|||
/* Don't assume we're using swapper_pg_dir at this point */
|
||||
pgd_t *base = __va(read_cr3());
|
||||
pgd_t *pgd = &base[pgd_index(addr)];
|
||||
pud_t *pud = pud_offset(pgd, addr);
|
||||
p4d_t *p4d = p4d_offset(pgd, addr);
|
||||
pud_t *pud = pud_offset(p4d, addr);
|
||||
pmd_t *pmd = pmd_offset(pud, addr);
|
||||
|
||||
return pmd;
|
||||
|
|
|
@ -34,8 +34,19 @@ static int __init map_range(struct range *range)
|
|||
static void __init clear_pgds(unsigned long start,
|
||||
unsigned long end)
|
||||
{
|
||||
for (; start < end; start += PGDIR_SIZE)
|
||||
pgd_clear(pgd_offset_k(start));
|
||||
pgd_t *pgd;
|
||||
|
||||
for (; start < end; start += PGDIR_SIZE) {
|
||||
pgd = pgd_offset_k(start);
|
||||
/*
|
||||
* With folded p4d, pgd_clear() is nop, use p4d_clear()
|
||||
* instead.
|
||||
*/
|
||||
if (CONFIG_PGTABLE_LEVELS < 5)
|
||||
p4d_clear(p4d_offset(pgd, start));
|
||||
else
|
||||
pgd_clear(pgd);
|
||||
}
|
||||
}
|
||||
|
||||
static void __init kasan_map_early_shadow(pgd_t *pgd)
|
||||
|
@ -45,8 +56,18 @@ static void __init kasan_map_early_shadow(pgd_t *pgd)
|
|||
unsigned long end = KASAN_SHADOW_END;
|
||||
|
||||
for (i = pgd_index(start); start < end; i++) {
|
||||
pgd[i] = __pgd(__pa_nodebug(kasan_zero_pud)
|
||||
| _KERNPG_TABLE);
|
||||
switch (CONFIG_PGTABLE_LEVELS) {
|
||||
case 4:
|
||||
pgd[i] = __pgd(__pa_nodebug(kasan_zero_pud) |
|
||||
_KERNPG_TABLE);
|
||||
break;
|
||||
case 5:
|
||||
pgd[i] = __pgd(__pa_nodebug(kasan_zero_p4d) |
|
||||
_KERNPG_TABLE);
|
||||
break;
|
||||
default:
|
||||
BUILD_BUG();
|
||||
}
|
||||
start += PGDIR_SIZE;
|
||||
}
|
||||
}
|
||||
|
@ -74,6 +95,7 @@ void __init kasan_early_init(void)
|
|||
pteval_t pte_val = __pa_nodebug(kasan_zero_page) | __PAGE_KERNEL;
|
||||
pmdval_t pmd_val = __pa_nodebug(kasan_zero_pte) | _KERNPG_TABLE;
|
||||
pudval_t pud_val = __pa_nodebug(kasan_zero_pmd) | _KERNPG_TABLE;
|
||||
p4dval_t p4d_val = __pa_nodebug(kasan_zero_pud) | _KERNPG_TABLE;
|
||||
|
||||
for (i = 0; i < PTRS_PER_PTE; i++)
|
||||
kasan_zero_pte[i] = __pte(pte_val);
|
||||
|
@ -84,6 +106,9 @@ void __init kasan_early_init(void)
|
|||
for (i = 0; i < PTRS_PER_PUD; i++)
|
||||
kasan_zero_pud[i] = __pud(pud_val);
|
||||
|
||||
for (i = 0; CONFIG_PGTABLE_LEVELS >= 5 && i < PTRS_PER_P4D; i++)
|
||||
kasan_zero_p4d[i] = __p4d(p4d_val);
|
||||
|
||||
kasan_map_early_shadow(early_level4_pgt);
|
||||
kasan_map_early_shadow(init_level4_pgt);
|
||||
}
|
||||
|
|
|
@ -30,30 +30,44 @@
|
|||
#include <linux/limits.h>
|
||||
#include <linux/sched/signal.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/compat.h>
|
||||
#include <asm/elf.h>
|
||||
|
||||
struct va_alignment __read_mostly va_align = {
|
||||
.flags = -1,
|
||||
};
|
||||
|
||||
static unsigned long stack_maxrandom_size(void)
|
||||
unsigned long tasksize_32bit(void)
|
||||
{
|
||||
return IA32_PAGE_OFFSET;
|
||||
}
|
||||
|
||||
unsigned long tasksize_64bit(void)
|
||||
{
|
||||
return TASK_SIZE_MAX;
|
||||
}
|
||||
|
||||
static unsigned long stack_maxrandom_size(unsigned long task_size)
|
||||
{
|
||||
unsigned long max = 0;
|
||||
if ((current->flags & PF_RANDOMIZE) &&
|
||||
!(current->personality & ADDR_NO_RANDOMIZE)) {
|
||||
max = ((-1UL) & STACK_RND_MASK) << PAGE_SHIFT;
|
||||
max = (-1UL) & __STACK_RND_MASK(task_size == tasksize_32bit());
|
||||
max <<= PAGE_SHIFT;
|
||||
}
|
||||
|
||||
return max;
|
||||
}
|
||||
|
||||
/*
|
||||
* Top of mmap area (just below the process stack).
|
||||
*
|
||||
* Leave an at least ~128 MB hole with possible stack randomization.
|
||||
*/
|
||||
#define MIN_GAP (128*1024*1024UL + stack_maxrandom_size())
|
||||
#define MAX_GAP (TASK_SIZE/6*5)
|
||||
#ifdef CONFIG_COMPAT
|
||||
# define mmap32_rnd_bits mmap_rnd_compat_bits
|
||||
# define mmap64_rnd_bits mmap_rnd_bits
|
||||
#else
|
||||
# define mmap32_rnd_bits mmap_rnd_bits
|
||||
# define mmap64_rnd_bits mmap_rnd_bits
|
||||
#endif
|
||||
|
||||
#define SIZE_128M (128 * 1024 * 1024UL)
|
||||
|
||||
static int mmap_is_legacy(void)
|
||||
{
|
||||
|
@ -66,54 +80,91 @@ static int mmap_is_legacy(void)
|
|||
return sysctl_legacy_va_layout;
|
||||
}
|
||||
|
||||
unsigned long arch_mmap_rnd(void)
|
||||
static unsigned long arch_rnd(unsigned int rndbits)
|
||||
{
|
||||
unsigned long rnd;
|
||||
|
||||
if (mmap_is_ia32())
|
||||
#ifdef CONFIG_COMPAT
|
||||
rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1);
|
||||
#else
|
||||
rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
|
||||
#endif
|
||||
else
|
||||
rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
|
||||
|
||||
return rnd << PAGE_SHIFT;
|
||||
return (get_random_long() & ((1UL << rndbits) - 1)) << PAGE_SHIFT;
|
||||
}
|
||||
|
||||
static unsigned long mmap_base(unsigned long rnd)
|
||||
unsigned long arch_mmap_rnd(void)
|
||||
{
|
||||
if (!(current->flags & PF_RANDOMIZE))
|
||||
return 0;
|
||||
return arch_rnd(mmap_is_ia32() ? mmap32_rnd_bits : mmap64_rnd_bits);
|
||||
}
|
||||
|
||||
static unsigned long mmap_base(unsigned long rnd, unsigned long task_size)
|
||||
{
|
||||
unsigned long gap = rlimit(RLIMIT_STACK);
|
||||
unsigned long gap_min, gap_max;
|
||||
|
||||
if (gap < MIN_GAP)
|
||||
gap = MIN_GAP;
|
||||
else if (gap > MAX_GAP)
|
||||
gap = MAX_GAP;
|
||||
/*
|
||||
* Top of mmap area (just below the process stack).
|
||||
* Leave an at least ~128 MB hole with possible stack randomization.
|
||||
*/
|
||||
gap_min = SIZE_128M + stack_maxrandom_size(task_size);
|
||||
gap_max = (task_size / 6) * 5;
|
||||
|
||||
return PAGE_ALIGN(TASK_SIZE - gap - rnd);
|
||||
if (gap < gap_min)
|
||||
gap = gap_min;
|
||||
else if (gap > gap_max)
|
||||
gap = gap_max;
|
||||
|
||||
return PAGE_ALIGN(task_size - gap - rnd);
|
||||
}
|
||||
|
||||
static unsigned long mmap_legacy_base(unsigned long rnd,
|
||||
unsigned long task_size)
|
||||
{
|
||||
return __TASK_UNMAPPED_BASE(task_size) + rnd;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function, called very early during the creation of a new
|
||||
* process VM image, sets up which VM layout function to use:
|
||||
*/
|
||||
static void arch_pick_mmap_base(unsigned long *base, unsigned long *legacy_base,
|
||||
unsigned long random_factor, unsigned long task_size)
|
||||
{
|
||||
*legacy_base = mmap_legacy_base(random_factor, task_size);
|
||||
if (mmap_is_legacy())
|
||||
*base = *legacy_base;
|
||||
else
|
||||
*base = mmap_base(random_factor, task_size);
|
||||
}
|
||||
|
||||
void arch_pick_mmap_layout(struct mm_struct *mm)
|
||||
{
|
||||
unsigned long random_factor = 0UL;
|
||||
|
||||
if (current->flags & PF_RANDOMIZE)
|
||||
random_factor = arch_mmap_rnd();
|
||||
|
||||
mm->mmap_legacy_base = TASK_UNMAPPED_BASE + random_factor;
|
||||
|
||||
if (mmap_is_legacy()) {
|
||||
mm->mmap_base = mm->mmap_legacy_base;
|
||||
if (mmap_is_legacy())
|
||||
mm->get_unmapped_area = arch_get_unmapped_area;
|
||||
} else {
|
||||
mm->mmap_base = mmap_base(random_factor);
|
||||
else
|
||||
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
|
||||
|
||||
arch_pick_mmap_base(&mm->mmap_base, &mm->mmap_legacy_base,
|
||||
arch_rnd(mmap64_rnd_bits), tasksize_64bit());
|
||||
|
||||
#ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES
|
||||
/*
|
||||
* The mmap syscall mapping base decision depends solely on the
|
||||
* syscall type (64-bit or compat). This applies for 64bit
|
||||
* applications and 32bit applications. The 64bit syscall uses
|
||||
* mmap_base, the compat syscall uses mmap_compat_base.
|
||||
*/
|
||||
arch_pick_mmap_base(&mm->mmap_compat_base, &mm->mmap_compat_legacy_base,
|
||||
arch_rnd(mmap32_rnd_bits), tasksize_32bit());
|
||||
#endif
|
||||
}
|
||||
|
||||
unsigned long get_mmap_base(int is_legacy)
|
||||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
|
||||
#ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES
|
||||
if (in_compat_syscall()) {
|
||||
return is_legacy ? mm->mmap_compat_legacy_base
|
||||
: mm->mmap_compat_base;
|
||||
}
|
||||
#endif
|
||||
return is_legacy ? mm->mmap_legacy_base : mm->mmap_base;
|
||||
}
|
||||
|
||||
const char *arch_vma_name(struct vm_area_struct *vma)
|
||||
|
|
|
@ -526,15 +526,7 @@ int mpx_handle_bd_fault(void)
|
|||
if (!kernel_managing_mpx_tables(current->mm))
|
||||
return -EINVAL;
|
||||
|
||||
if (do_mpx_bt_fault()) {
|
||||
force_sig(SIGSEGV, current);
|
||||
/*
|
||||
* The force_sig() is essentially "handling" this
|
||||
* exception, so we do not pass up the error
|
||||
* from do_mpx_bt_fault().
|
||||
*/
|
||||
}
|
||||
return 0;
|
||||
return do_mpx_bt_fault();
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -201,7 +201,7 @@ static void __init alloc_node_data(int nid)
|
|||
nd_pa = __memblock_alloc_base(nd_size, SMP_CACHE_BYTES,
|
||||
MEMBLOCK_ALLOC_ACCESSIBLE);
|
||||
if (!nd_pa) {
|
||||
pr_err("Cannot find %zu bytes in node %d\n",
|
||||
pr_err("Cannot find %zu bytes in any node (initial node: %d)\n",
|
||||
nd_size, nid);
|
||||
return;
|
||||
}
|
||||
|
@ -225,7 +225,7 @@ static void __init alloc_node_data(int nid)
|
|||
* numa_cleanup_meminfo - Cleanup a numa_meminfo
|
||||
* @mi: numa_meminfo to clean up
|
||||
*
|
||||
* Sanitize @mi by merging and removing unncessary memblks. Also check for
|
||||
* Sanitize @mi by merging and removing unnecessary memblks. Also check for
|
||||
* conflicts and clear unused memblks.
|
||||
*
|
||||
* RETURNS:
|
||||
|
|
|
@ -346,6 +346,7 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
|
|||
pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
|
||||
unsigned int *level)
|
||||
{
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
|
||||
|
@ -354,7 +355,15 @@ pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
|
|||
if (pgd_none(*pgd))
|
||||
return NULL;
|
||||
|
||||
pud = pud_offset(pgd, address);
|
||||
p4d = p4d_offset(pgd, address);
|
||||
if (p4d_none(*p4d))
|
||||
return NULL;
|
||||
|
||||
*level = PG_LEVEL_512G;
|
||||
if (p4d_large(*p4d) || !p4d_present(*p4d))
|
||||
return (pte_t *)p4d;
|
||||
|
||||
pud = pud_offset(p4d, address);
|
||||
if (pud_none(*pud))
|
||||
return NULL;
|
||||
|
||||
|
@ -406,13 +415,18 @@ static pte_t *_lookup_address_cpa(struct cpa_data *cpa, unsigned long address,
|
|||
pmd_t *lookup_pmd_address(unsigned long address)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
|
||||
pgd = pgd_offset_k(address);
|
||||
if (pgd_none(*pgd))
|
||||
return NULL;
|
||||
|
||||
pud = pud_offset(pgd, address);
|
||||
p4d = p4d_offset(pgd, address);
|
||||
if (p4d_none(*p4d) || p4d_large(*p4d) || !p4d_present(*p4d))
|
||||
return NULL;
|
||||
|
||||
pud = pud_offset(p4d, address);
|
||||
if (pud_none(*pud) || pud_large(*pud) || !pud_present(*pud))
|
||||
return NULL;
|
||||
|
||||
|
@ -477,11 +491,13 @@ static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
|
|||
|
||||
list_for_each_entry(page, &pgd_list, lru) {
|
||||
pgd_t *pgd;
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
|
||||
pgd = (pgd_t *)page_address(page) + pgd_index(address);
|
||||
pud = pud_offset(pgd, address);
|
||||
p4d = p4d_offset(pgd, address);
|
||||
pud = pud_offset(p4d, address);
|
||||
pmd = pmd_offset(pud, address);
|
||||
set_pte_atomic((pte_t *)pmd, pte);
|
||||
}
|
||||
|
@ -836,9 +852,9 @@ static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end)
|
|||
pud_clear(pud);
|
||||
}
|
||||
|
||||
static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
|
||||
static void unmap_pud_range(p4d_t *p4d, unsigned long start, unsigned long end)
|
||||
{
|
||||
pud_t *pud = pud_offset(pgd, start);
|
||||
pud_t *pud = pud_offset(p4d, start);
|
||||
|
||||
/*
|
||||
* Not on a GB page boundary?
|
||||
|
@ -1004,8 +1020,8 @@ static long populate_pmd(struct cpa_data *cpa,
|
|||
return num_pages;
|
||||
}
|
||||
|
||||
static long populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd,
|
||||
pgprot_t pgprot)
|
||||
static int populate_pud(struct cpa_data *cpa, unsigned long start, p4d_t *p4d,
|
||||
pgprot_t pgprot)
|
||||
{
|
||||
pud_t *pud;
|
||||
unsigned long end;
|
||||
|
@ -1026,7 +1042,7 @@ static long populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd,
|
|||
cur_pages = (pre_end - start) >> PAGE_SHIFT;
|
||||
cur_pages = min_t(int, (int)cpa->numpages, cur_pages);
|
||||
|
||||
pud = pud_offset(pgd, start);
|
||||
pud = pud_offset(p4d, start);
|
||||
|
||||
/*
|
||||
* Need a PMD page?
|
||||
|
@ -1047,7 +1063,7 @@ static long populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd,
|
|||
if (cpa->numpages == cur_pages)
|
||||
return cur_pages;
|
||||
|
||||
pud = pud_offset(pgd, start);
|
||||
pud = pud_offset(p4d, start);
|
||||
pud_pgprot = pgprot_4k_2_large(pgprot);
|
||||
|
||||
/*
|
||||
|
@ -1067,7 +1083,7 @@ static long populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd,
|
|||
if (start < end) {
|
||||
long tmp;
|
||||
|
||||
pud = pud_offset(pgd, start);
|
||||
pud = pud_offset(p4d, start);
|
||||
if (pud_none(*pud))
|
||||
if (alloc_pmd_page(pud))
|
||||
return -1;
|
||||
|
@ -1090,33 +1106,43 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr)
|
|||
{
|
||||
pgprot_t pgprot = __pgprot(_KERNPG_TABLE);
|
||||
pud_t *pud = NULL; /* shut up gcc */
|
||||
p4d_t *p4d;
|
||||
pgd_t *pgd_entry;
|
||||
long ret;
|
||||
|
||||
pgd_entry = cpa->pgd + pgd_index(addr);
|
||||
|
||||
if (pgd_none(*pgd_entry)) {
|
||||
p4d = (p4d_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK);
|
||||
if (!p4d)
|
||||
return -1;
|
||||
|
||||
set_pgd(pgd_entry, __pgd(__pa(p4d) | _KERNPG_TABLE));
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate a PUD page and hand it down for mapping.
|
||||
*/
|
||||
if (pgd_none(*pgd_entry)) {
|
||||
p4d = p4d_offset(pgd_entry, addr);
|
||||
if (p4d_none(*p4d)) {
|
||||
pud = (pud_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK);
|
||||
if (!pud)
|
||||
return -1;
|
||||
|
||||
set_pgd(pgd_entry, __pgd(__pa(pud) | _KERNPG_TABLE));
|
||||
set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE));
|
||||
}
|
||||
|
||||
pgprot_val(pgprot) &= ~pgprot_val(cpa->mask_clr);
|
||||
pgprot_val(pgprot) |= pgprot_val(cpa->mask_set);
|
||||
|
||||
ret = populate_pud(cpa, addr, pgd_entry, pgprot);
|
||||
ret = populate_pud(cpa, addr, p4d, pgprot);
|
||||
if (ret < 0) {
|
||||
/*
|
||||
* Leave the PUD page in place in case some other CPU or thread
|
||||
* already found it, but remove any useless entries we just
|
||||
* added to it.
|
||||
*/
|
||||
unmap_pud_range(pgd_entry, addr,
|
||||
unmap_pud_range(p4d, addr,
|
||||
addr + (cpa->numpages << PAGE_SHIFT));
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -81,6 +81,14 @@ void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
|
|||
paravirt_release_pud(__pa(pud) >> PAGE_SHIFT);
|
||||
tlb_remove_page(tlb, virt_to_page(pud));
|
||||
}
|
||||
|
||||
#if CONFIG_PGTABLE_LEVELS > 4
|
||||
void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d)
|
||||
{
|
||||
paravirt_release_p4d(__pa(p4d) >> PAGE_SHIFT);
|
||||
tlb_remove_page(tlb, virt_to_page(p4d));
|
||||
}
|
||||
#endif /* CONFIG_PGTABLE_LEVELS > 4 */
|
||||
#endif /* CONFIG_PGTABLE_LEVELS > 3 */
|
||||
#endif /* CONFIG_PGTABLE_LEVELS > 2 */
|
||||
|
||||
|
@ -120,7 +128,7 @@ static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd)
|
|||
references from swapper_pg_dir. */
|
||||
if (CONFIG_PGTABLE_LEVELS == 2 ||
|
||||
(CONFIG_PGTABLE_LEVELS == 3 && SHARED_KERNEL_PMD) ||
|
||||
CONFIG_PGTABLE_LEVELS == 4) {
|
||||
CONFIG_PGTABLE_LEVELS >= 4) {
|
||||
clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY,
|
||||
swapper_pg_dir + KERNEL_PGD_BOUNDARY,
|
||||
KERNEL_PGD_PTRS);
|
||||
|
@ -261,13 +269,15 @@ static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
|
|||
|
||||
static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[])
|
||||
{
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
int i;
|
||||
|
||||
if (PREALLOCATED_PMDS == 0) /* Work around gcc-3.4.x bug */
|
||||
return;
|
||||
|
||||
pud = pud_offset(pgd, 0);
|
||||
p4d = p4d_offset(pgd, 0);
|
||||
pud = pud_offset(p4d, 0);
|
||||
|
||||
for (i = 0; i < PREALLOCATED_PMDS; i++, pud++) {
|
||||
pmd_t *pmd = pmds[i];
|
||||
|
@ -580,6 +590,28 @@ void native_set_fixmap(enum fixed_addresses idx, phys_addr_t phys,
|
|||
}
|
||||
|
||||
#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
/**
|
||||
* p4d_set_huge - setup kernel P4D mapping
|
||||
*
|
||||
* No 512GB pages yet -- always return 0
|
||||
*/
|
||||
int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* p4d_clear_huge - clear kernel P4D mapping when it is set
|
||||
*
|
||||
* No 512GB pages yet -- always return 0
|
||||
*/
|
||||
int p4d_clear_huge(p4d_t *p4d)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* pud_set_huge - setup kernel PUD mapping
|
||||
*
|
||||
|
|
|
@ -26,6 +26,7 @@ unsigned int __VMALLOC_RESERVE = 128 << 20;
|
|||
void set_pte_vaddr(unsigned long vaddr, pte_t pteval)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
pte_t *pte;
|
||||
|
@ -35,7 +36,12 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pteval)
|
|||
BUG();
|
||||
return;
|
||||
}
|
||||
pud = pud_offset(pgd, vaddr);
|
||||
p4d = p4d_offset(pgd, vaddr);
|
||||
if (p4d_none(*p4d)) {
|
||||
BUG();
|
||||
return;
|
||||
}
|
||||
pud = pud_offset(p4d, vaddr);
|
||||
if (pud_none(*pud)) {
|
||||
BUG();
|
||||
return;
|
||||
|
|
|
@ -263,8 +263,6 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
|
|||
{
|
||||
struct flush_tlb_info info;
|
||||
|
||||
if (end == 0)
|
||||
end = start + PAGE_SIZE;
|
||||
info.flush_mm = mm;
|
||||
info.flush_start = start;
|
||||
info.flush_end = end;
|
||||
|
@ -289,23 +287,6 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
|
|||
smp_call_function_many(cpumask, flush_tlb_func, &info, 1);
|
||||
}
|
||||
|
||||
void flush_tlb_current_task(void)
|
||||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
|
||||
preempt_disable();
|
||||
|
||||
count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
|
||||
|
||||
/* This is an implicit full barrier that synchronizes with switch_mm. */
|
||||
local_flush_tlb();
|
||||
|
||||
trace_tlb_flush(TLB_LOCAL_SHOOTDOWN, TLB_FLUSH_ALL);
|
||||
if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
|
||||
flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
/*
|
||||
* See Documentation/x86/tlb.txt for details. We choose 33
|
||||
* because it is large enough to cover the vast majority (at
|
||||
|
@ -326,6 +307,12 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
|
|||
unsigned long base_pages_to_flush = TLB_FLUSH_ALL;
|
||||
|
||||
preempt_disable();
|
||||
|
||||
if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB))
|
||||
base_pages_to_flush = (end - start) >> PAGE_SHIFT;
|
||||
if (base_pages_to_flush > tlb_single_page_flush_ceiling)
|
||||
base_pages_to_flush = TLB_FLUSH_ALL;
|
||||
|
||||
if (current->active_mm != mm) {
|
||||
/* Synchronize with switch_mm. */
|
||||
smp_mb();
|
||||
|
@ -342,15 +329,11 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
|
|||
goto out;
|
||||
}
|
||||
|
||||
if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB))
|
||||
base_pages_to_flush = (end - start) >> PAGE_SHIFT;
|
||||
|
||||
/*
|
||||
* Both branches below are implicit full barriers (MOV to CR or
|
||||
* INVLPG) that synchronize with switch_mm.
|
||||
*/
|
||||
if (base_pages_to_flush > tlb_single_page_flush_ceiling) {
|
||||
base_pages_to_flush = TLB_FLUSH_ALL;
|
||||
if (base_pages_to_flush == TLB_FLUSH_ALL) {
|
||||
count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
|
||||
local_flush_tlb();
|
||||
} else {
|
||||
|
@ -393,7 +376,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long start)
|
|||
}
|
||||
|
||||
if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
|
||||
flush_tlb_others(mm_cpumask(mm), mm, start, 0UL);
|
||||
flush_tlb_others(mm_cpumask(mm), mm, start, start + PAGE_SIZE);
|
||||
|
||||
preempt_enable();
|
||||
}
|
||||
|
|
|
@ -68,7 +68,7 @@ pgd_t * __init efi_call_phys_prolog(void)
|
|||
load_cr3(initial_page_table);
|
||||
__flush_tlb_all();
|
||||
|
||||
gdt_descr.address = __pa(get_cpu_gdt_table(0));
|
||||
gdt_descr.address = get_cpu_gdt_paddr(0);
|
||||
gdt_descr.size = GDT_SIZE - 1;
|
||||
load_gdt(&gdt_descr);
|
||||
|
||||
|
@ -79,7 +79,7 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd)
|
|||
{
|
||||
struct desc_ptr gdt_descr;
|
||||
|
||||
gdt_descr.address = (unsigned long)get_cpu_gdt_table(0);
|
||||
gdt_descr.address = (unsigned long)get_cpu_gdt_rw(0);
|
||||
gdt_descr.size = GDT_SIZE - 1;
|
||||
load_gdt(&gdt_descr);
|
||||
|
||||
|
|
|
@ -135,6 +135,7 @@ static pgd_t *efi_pgd;
|
|||
int __init efi_alloc_page_tables(void)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
gfp_t gfp_mask;
|
||||
|
||||
|
@ -147,14 +148,19 @@ int __init efi_alloc_page_tables(void)
|
|||
return -ENOMEM;
|
||||
|
||||
pgd = efi_pgd + pgd_index(EFI_VA_END);
|
||||
|
||||
pud = pud_alloc_one(NULL, 0);
|
||||
if (!pud) {
|
||||
p4d = p4d_alloc(&init_mm, pgd, EFI_VA_END);
|
||||
if (!p4d) {
|
||||
free_page((unsigned long)efi_pgd);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
pgd_populate(NULL, pgd, pud);
|
||||
pud = pud_alloc(&init_mm, p4d, EFI_VA_END);
|
||||
if (!pud) {
|
||||
if (CONFIG_PGTABLE_LEVELS > 4)
|
||||
free_page((unsigned long) pgd_page_vaddr(*pgd));
|
||||
free_page((unsigned long)efi_pgd);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -166,6 +172,7 @@ void efi_sync_low_kernel_mappings(void)
|
|||
{
|
||||
unsigned num_entries;
|
||||
pgd_t *pgd_k, *pgd_efi;
|
||||
p4d_t *p4d_k, *p4d_efi;
|
||||
pud_t *pud_k, *pud_efi;
|
||||
|
||||
if (efi_enabled(EFI_OLD_MEMMAP))
|
||||
|
@ -189,6 +196,21 @@ void efi_sync_low_kernel_mappings(void)
|
|||
num_entries = pgd_index(EFI_VA_END) - pgd_index(PAGE_OFFSET);
|
||||
memcpy(pgd_efi, pgd_k, sizeof(pgd_t) * num_entries);
|
||||
|
||||
/*
|
||||
* As with PGDs, we share all P4D entries apart from the one entry
|
||||
* that covers the EFI runtime mapping space.
|
||||
*/
|
||||
BUILD_BUG_ON(p4d_index(EFI_VA_END) != p4d_index(MODULES_END));
|
||||
BUILD_BUG_ON((EFI_VA_START & P4D_MASK) != (EFI_VA_END & P4D_MASK));
|
||||
|
||||
pgd_efi = efi_pgd + pgd_index(EFI_VA_END);
|
||||
pgd_k = pgd_offset_k(EFI_VA_END);
|
||||
p4d_efi = p4d_offset(pgd_efi, 0);
|
||||
p4d_k = p4d_offset(pgd_k, 0);
|
||||
|
||||
num_entries = p4d_index(EFI_VA_END);
|
||||
memcpy(p4d_efi, p4d_k, sizeof(p4d_t) * num_entries);
|
||||
|
||||
/*
|
||||
* We share all the PUD entries apart from those that map the
|
||||
* EFI regions. Copy around them.
|
||||
|
@ -196,17 +218,16 @@ void efi_sync_low_kernel_mappings(void)
|
|||
BUILD_BUG_ON((EFI_VA_START & ~PUD_MASK) != 0);
|
||||
BUILD_BUG_ON((EFI_VA_END & ~PUD_MASK) != 0);
|
||||
|
||||
pgd_efi = efi_pgd + pgd_index(EFI_VA_END);
|
||||
pud_efi = pud_offset(pgd_efi, 0);
|
||||
|
||||
pgd_k = pgd_offset_k(EFI_VA_END);
|
||||
pud_k = pud_offset(pgd_k, 0);
|
||||
p4d_efi = p4d_offset(pgd_efi, EFI_VA_END);
|
||||
p4d_k = p4d_offset(pgd_k, EFI_VA_END);
|
||||
pud_efi = pud_offset(p4d_efi, 0);
|
||||
pud_k = pud_offset(p4d_k, 0);
|
||||
|
||||
num_entries = pud_index(EFI_VA_END);
|
||||
memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries);
|
||||
|
||||
pud_efi = pud_offset(pgd_efi, EFI_VA_START);
|
||||
pud_k = pud_offset(pgd_k, EFI_VA_START);
|
||||
pud_efi = pud_offset(p4d_efi, EFI_VA_START);
|
||||
pud_k = pud_offset(p4d_k, EFI_VA_START);
|
||||
|
||||
num_entries = PTRS_PER_PUD - pud_index(EFI_VA_START);
|
||||
memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries);
|
||||
|
|
|
@ -95,7 +95,7 @@ static void __save_processor_state(struct saved_context *ctxt)
|
|||
* 'pmode_gdt' in wakeup_start.
|
||||
*/
|
||||
ctxt->gdt_desc.size = GDT_SIZE - 1;
|
||||
ctxt->gdt_desc.address = (unsigned long)get_cpu_gdt_table(smp_processor_id());
|
||||
ctxt->gdt_desc.address = (unsigned long)get_cpu_gdt_rw(smp_processor_id());
|
||||
|
||||
store_tr(ctxt->tr);
|
||||
|
||||
|
@ -162,7 +162,7 @@ static void fix_processor_context(void)
|
|||
int cpu = smp_processor_id();
|
||||
struct tss_struct *t = &per_cpu(cpu_tss, cpu);
|
||||
#ifdef CONFIG_X86_64
|
||||
struct desc_struct *desc = get_cpu_gdt_table(cpu);
|
||||
struct desc_struct *desc = get_cpu_gdt_rw(cpu);
|
||||
tss_desc tss;
|
||||
#endif
|
||||
set_tss_desc(cpu, t); /*
|
||||
|
@ -183,6 +183,9 @@ static void fix_processor_context(void)
|
|||
load_mm_ldt(current->active_mm); /* This does lldt */
|
||||
|
||||
fpu__resume_cpu();
|
||||
|
||||
/* The processor is back on the direct GDT, load back the fixmap */
|
||||
load_fixmap_gdt(cpu);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -32,6 +32,7 @@ pgd_t *resume_pg_dir;
|
|||
*/
|
||||
static pmd_t *resume_one_md_table_init(pgd_t *pgd)
|
||||
{
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd_table;
|
||||
|
||||
|
@ -41,11 +42,13 @@ static pmd_t *resume_one_md_table_init(pgd_t *pgd)
|
|||
return NULL;
|
||||
|
||||
set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
|
||||
pud = pud_offset(pgd, 0);
|
||||
p4d = p4d_offset(pgd, 0);
|
||||
pud = pud_offset(p4d, 0);
|
||||
|
||||
BUG_ON(pmd_table != pmd_offset(pud, 0));
|
||||
#else
|
||||
pud = pud_offset(pgd, 0);
|
||||
p4d = p4d_offset(pgd, 0);
|
||||
pud = pud_offset(p4d, 0);
|
||||
pmd_table = pmd_offset(pud, 0);
|
||||
#endif
|
||||
|
||||
|
|
|
@ -50,6 +50,7 @@ static int set_up_temporary_text_mapping(pgd_t *pgd)
|
|||
{
|
||||
pmd_t *pmd;
|
||||
pud_t *pud;
|
||||
p4d_t *p4d;
|
||||
|
||||
/*
|
||||
* The new mapping only has to cover the page containing the image
|
||||
|
@ -64,6 +65,13 @@ static int set_up_temporary_text_mapping(pgd_t *pgd)
|
|||
* the virtual address space after switching over to the original page
|
||||
* tables used by the image kernel.
|
||||
*/
|
||||
|
||||
if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
|
||||
p4d = (p4d_t *)get_safe_page(GFP_ATOMIC);
|
||||
if (!p4d)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
pud = (pud_t *)get_safe_page(GFP_ATOMIC);
|
||||
if (!pud)
|
||||
return -ENOMEM;
|
||||
|
@ -76,8 +84,13 @@ static int set_up_temporary_text_mapping(pgd_t *pgd)
|
|||
__pmd((jump_address_phys & PMD_MASK) | __PAGE_KERNEL_LARGE_EXEC));
|
||||
set_pud(pud + pud_index(restore_jump_address),
|
||||
__pud(__pa(pmd) | _KERNPG_TABLE));
|
||||
set_pgd(pgd + pgd_index(restore_jump_address),
|
||||
__pgd(__pa(pud) | _KERNPG_TABLE));
|
||||
if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
|
||||
set_p4d(p4d + p4d_index(restore_jump_address), __p4d(__pa(pud) | _KERNPG_TABLE));
|
||||
set_pgd(pgd + pgd_index(restore_jump_address), __pgd(__pa(p4d) | _KERNPG_TABLE));
|
||||
} else {
|
||||
/* No p4d for 4-level paging: point the pgd to the pud page table */
|
||||
set_pgd(pgd + pgd_index(restore_jump_address), __pgd(__pa(pud) | _KERNPG_TABLE));
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -125,7 +138,10 @@ static int set_up_temporary_mappings(void)
|
|||
static int relocate_restore_code(void)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
pte_t *pte;
|
||||
|
||||
relocated_restore_code = get_safe_page(GFP_ATOMIC);
|
||||
if (!relocated_restore_code)
|
||||
|
@ -135,22 +151,25 @@ static int relocate_restore_code(void)
|
|||
|
||||
/* Make the page containing the relocated code executable */
|
||||
pgd = (pgd_t *)__va(read_cr3()) + pgd_index(relocated_restore_code);
|
||||
pud = pud_offset(pgd, relocated_restore_code);
|
||||
p4d = p4d_offset(pgd, relocated_restore_code);
|
||||
if (p4d_large(*p4d)) {
|
||||
set_p4d(p4d, __p4d(p4d_val(*p4d) & ~_PAGE_NX));
|
||||
goto out;
|
||||
}
|
||||
pud = pud_offset(p4d, relocated_restore_code);
|
||||
if (pud_large(*pud)) {
|
||||
set_pud(pud, __pud(pud_val(*pud) & ~_PAGE_NX));
|
||||
} else {
|
||||
pmd_t *pmd = pmd_offset(pud, relocated_restore_code);
|
||||
|
||||
if (pmd_large(*pmd)) {
|
||||
set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_NX));
|
||||
} else {
|
||||
pte_t *pte = pte_offset_kernel(pmd, relocated_restore_code);
|
||||
|
||||
set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_NX));
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
pmd = pmd_offset(pud, relocated_restore_code);
|
||||
if (pmd_large(*pmd)) {
|
||||
set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_NX));
|
||||
goto out;
|
||||
}
|
||||
pte = pte_offset_kernel(pmd, relocated_restore_code);
|
||||
set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_NX));
|
||||
out:
|
||||
__flush_tlb_all();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -711,7 +711,7 @@ static void load_TLS_descriptor(struct thread_struct *t,
|
|||
|
||||
*shadow = t->tls_array[i];
|
||||
|
||||
gdt = get_cpu_gdt_table(cpu);
|
||||
gdt = get_cpu_gdt_rw(cpu);
|
||||
maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]);
|
||||
mc = __xen_mc_entry(0);
|
||||
|
||||
|
|
|
@ -535,40 +535,41 @@ static pgd_t *xen_get_user_pgd(pgd_t *pgd)
|
|||
return user_ptr;
|
||||
}
|
||||
|
||||
static void __xen_set_pgd_hyper(pgd_t *ptr, pgd_t val)
|
||||
static void __xen_set_p4d_hyper(p4d_t *ptr, p4d_t val)
|
||||
{
|
||||
struct mmu_update u;
|
||||
|
||||
u.ptr = virt_to_machine(ptr).maddr;
|
||||
u.val = pgd_val_ma(val);
|
||||
u.val = p4d_val_ma(val);
|
||||
xen_extend_mmu_update(&u);
|
||||
}
|
||||
|
||||
/*
|
||||
* Raw hypercall-based set_pgd, intended for in early boot before
|
||||
* Raw hypercall-based set_p4d, intended for in early boot before
|
||||
* there's a page structure. This implies:
|
||||
* 1. The only existing pagetable is the kernel's
|
||||
* 2. It is always pinned
|
||||
* 3. It has no user pagetable attached to it
|
||||
*/
|
||||
static void __init xen_set_pgd_hyper(pgd_t *ptr, pgd_t val)
|
||||
static void __init xen_set_p4d_hyper(p4d_t *ptr, p4d_t val)
|
||||
{
|
||||
preempt_disable();
|
||||
|
||||
xen_mc_batch();
|
||||
|
||||
__xen_set_pgd_hyper(ptr, val);
|
||||
__xen_set_p4d_hyper(ptr, val);
|
||||
|
||||
xen_mc_issue(PARAVIRT_LAZY_MMU);
|
||||
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
static void xen_set_pgd(pgd_t *ptr, pgd_t val)
|
||||
static void xen_set_p4d(p4d_t *ptr, p4d_t val)
|
||||
{
|
||||
pgd_t *user_ptr = xen_get_user_pgd(ptr);
|
||||
pgd_t *user_ptr = xen_get_user_pgd((pgd_t *)ptr);
|
||||
pgd_t pgd_val;
|
||||
|
||||
trace_xen_mmu_set_pgd(ptr, user_ptr, val);
|
||||
trace_xen_mmu_set_p4d(ptr, (p4d_t *)user_ptr, val);
|
||||
|
||||
/* If page is not pinned, we can just update the entry
|
||||
directly */
|
||||
|
@ -576,7 +577,8 @@ static void xen_set_pgd(pgd_t *ptr, pgd_t val)
|
|||
*ptr = val;
|
||||
if (user_ptr) {
|
||||
WARN_ON(xen_page_pinned(user_ptr));
|
||||
*user_ptr = val;
|
||||
pgd_val.pgd = p4d_val_ma(val);
|
||||
*user_ptr = pgd_val;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
@ -585,14 +587,72 @@ static void xen_set_pgd(pgd_t *ptr, pgd_t val)
|
|||
user updates together. */
|
||||
xen_mc_batch();
|
||||
|
||||
__xen_set_pgd_hyper(ptr, val);
|
||||
__xen_set_p4d_hyper(ptr, val);
|
||||
if (user_ptr)
|
||||
__xen_set_pgd_hyper(user_ptr, val);
|
||||
__xen_set_p4d_hyper((p4d_t *)user_ptr, val);
|
||||
|
||||
xen_mc_issue(PARAVIRT_LAZY_MMU);
|
||||
}
|
||||
#endif /* CONFIG_PGTABLE_LEVELS == 4 */
|
||||
|
||||
static int xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd,
|
||||
int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
|
||||
bool last, unsigned long limit)
|
||||
{
|
||||
int i, nr, flush = 0;
|
||||
|
||||
nr = last ? pmd_index(limit) + 1 : PTRS_PER_PMD;
|
||||
for (i = 0; i < nr; i++) {
|
||||
if (!pmd_none(pmd[i]))
|
||||
flush |= (*func)(mm, pmd_page(pmd[i]), PT_PTE);
|
||||
}
|
||||
return flush;
|
||||
}
|
||||
|
||||
static int xen_pud_walk(struct mm_struct *mm, pud_t *pud,
|
||||
int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
|
||||
bool last, unsigned long limit)
|
||||
{
|
||||
int i, nr, flush = 0;
|
||||
|
||||
nr = last ? pud_index(limit) + 1 : PTRS_PER_PUD;
|
||||
for (i = 0; i < nr; i++) {
|
||||
pmd_t *pmd;
|
||||
|
||||
if (pud_none(pud[i]))
|
||||
continue;
|
||||
|
||||
pmd = pmd_offset(&pud[i], 0);
|
||||
if (PTRS_PER_PMD > 1)
|
||||
flush |= (*func)(mm, virt_to_page(pmd), PT_PMD);
|
||||
flush |= xen_pmd_walk(mm, pmd, func,
|
||||
last && i == nr - 1, limit);
|
||||
}
|
||||
return flush;
|
||||
}
|
||||
|
||||
static int xen_p4d_walk(struct mm_struct *mm, p4d_t *p4d,
|
||||
int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
|
||||
bool last, unsigned long limit)
|
||||
{
|
||||
int i, nr, flush = 0;
|
||||
|
||||
nr = last ? p4d_index(limit) + 1 : PTRS_PER_P4D;
|
||||
for (i = 0; i < nr; i++) {
|
||||
pud_t *pud;
|
||||
|
||||
if (p4d_none(p4d[i]))
|
||||
continue;
|
||||
|
||||
pud = pud_offset(&p4d[i], 0);
|
||||
if (PTRS_PER_PUD > 1)
|
||||
flush |= (*func)(mm, virt_to_page(pud), PT_PUD);
|
||||
flush |= xen_pud_walk(mm, pud, func,
|
||||
last && i == nr - 1, limit);
|
||||
}
|
||||
return flush;
|
||||
}
|
||||
|
||||
/*
|
||||
* (Yet another) pagetable walker. This one is intended for pinning a
|
||||
* pagetable. This means that it walks a pagetable and calls the
|
||||
|
@ -613,10 +673,8 @@ static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
|
|||
enum pt_level),
|
||||
unsigned long limit)
|
||||
{
|
||||
int flush = 0;
|
||||
int i, nr, flush = 0;
|
||||
unsigned hole_low, hole_high;
|
||||
unsigned pgdidx_limit, pudidx_limit, pmdidx_limit;
|
||||
unsigned pgdidx, pudidx, pmdidx;
|
||||
|
||||
/* The limit is the last byte to be touched */
|
||||
limit--;
|
||||
|
@ -633,65 +691,22 @@ static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
|
|||
hole_low = pgd_index(USER_LIMIT);
|
||||
hole_high = pgd_index(PAGE_OFFSET);
|
||||
|
||||
pgdidx_limit = pgd_index(limit);
|
||||
#if PTRS_PER_PUD > 1
|
||||
pudidx_limit = pud_index(limit);
|
||||
#else
|
||||
pudidx_limit = 0;
|
||||
#endif
|
||||
#if PTRS_PER_PMD > 1
|
||||
pmdidx_limit = pmd_index(limit);
|
||||
#else
|
||||
pmdidx_limit = 0;
|
||||
#endif
|
||||
nr = pgd_index(limit) + 1;
|
||||
for (i = 0; i < nr; i++) {
|
||||
p4d_t *p4d;
|
||||
|
||||
for (pgdidx = 0; pgdidx <= pgdidx_limit; pgdidx++) {
|
||||
pud_t *pud;
|
||||
|
||||
if (pgdidx >= hole_low && pgdidx < hole_high)
|
||||
if (i >= hole_low && i < hole_high)
|
||||
continue;
|
||||
|
||||
if (!pgd_val(pgd[pgdidx]))
|
||||
if (pgd_none(pgd[i]))
|
||||
continue;
|
||||
|
||||
pud = pud_offset(&pgd[pgdidx], 0);
|
||||
|
||||
if (PTRS_PER_PUD > 1) /* not folded */
|
||||
flush |= (*func)(mm, virt_to_page(pud), PT_PUD);
|
||||
|
||||
for (pudidx = 0; pudidx < PTRS_PER_PUD; pudidx++) {
|
||||
pmd_t *pmd;
|
||||
|
||||
if (pgdidx == pgdidx_limit &&
|
||||
pudidx > pudidx_limit)
|
||||
goto out;
|
||||
|
||||
if (pud_none(pud[pudidx]))
|
||||
continue;
|
||||
|
||||
pmd = pmd_offset(&pud[pudidx], 0);
|
||||
|
||||
if (PTRS_PER_PMD > 1) /* not folded */
|
||||
flush |= (*func)(mm, virt_to_page(pmd), PT_PMD);
|
||||
|
||||
for (pmdidx = 0; pmdidx < PTRS_PER_PMD; pmdidx++) {
|
||||
struct page *pte;
|
||||
|
||||
if (pgdidx == pgdidx_limit &&
|
||||
pudidx == pudidx_limit &&
|
||||
pmdidx > pmdidx_limit)
|
||||
goto out;
|
||||
|
||||
if (pmd_none(pmd[pmdidx]))
|
||||
continue;
|
||||
|
||||
pte = pmd_page(pmd[pmdidx]);
|
||||
flush |= (*func)(mm, pte, PT_PTE);
|
||||
}
|
||||
}
|
||||
p4d = p4d_offset(&pgd[i], 0);
|
||||
if (PTRS_PER_P4D > 1)
|
||||
flush |= (*func)(mm, virt_to_page(p4d), PT_P4D);
|
||||
flush |= xen_p4d_walk(mm, p4d, func, i == nr - 1, limit);
|
||||
}
|
||||
|
||||
out:
|
||||
/* Do the top level last, so that the callbacks can use it as
|
||||
a cue to do final things like tlb flushes. */
|
||||
flush |= (*func)(mm, virt_to_page(pgd), PT_PGD);
|
||||
|
@ -1150,57 +1165,97 @@ static void __init xen_cleanmfnmap_free_pgtbl(void *pgtbl, bool unpin)
|
|||
xen_free_ro_pages(pa, PAGE_SIZE);
|
||||
}
|
||||
|
||||
static void __init xen_cleanmfnmap_pmd(pmd_t *pmd, bool unpin)
|
||||
{
|
||||
unsigned long pa;
|
||||
pte_t *pte_tbl;
|
||||
int i;
|
||||
|
||||
if (pmd_large(*pmd)) {
|
||||
pa = pmd_val(*pmd) & PHYSICAL_PAGE_MASK;
|
||||
xen_free_ro_pages(pa, PMD_SIZE);
|
||||
return;
|
||||
}
|
||||
|
||||
pte_tbl = pte_offset_kernel(pmd, 0);
|
||||
for (i = 0; i < PTRS_PER_PTE; i++) {
|
||||
if (pte_none(pte_tbl[i]))
|
||||
continue;
|
||||
pa = pte_pfn(pte_tbl[i]) << PAGE_SHIFT;
|
||||
xen_free_ro_pages(pa, PAGE_SIZE);
|
||||
}
|
||||
set_pmd(pmd, __pmd(0));
|
||||
xen_cleanmfnmap_free_pgtbl(pte_tbl, unpin);
|
||||
}
|
||||
|
||||
static void __init xen_cleanmfnmap_pud(pud_t *pud, bool unpin)
|
||||
{
|
||||
unsigned long pa;
|
||||
pmd_t *pmd_tbl;
|
||||
int i;
|
||||
|
||||
if (pud_large(*pud)) {
|
||||
pa = pud_val(*pud) & PHYSICAL_PAGE_MASK;
|
||||
xen_free_ro_pages(pa, PUD_SIZE);
|
||||
return;
|
||||
}
|
||||
|
||||
pmd_tbl = pmd_offset(pud, 0);
|
||||
for (i = 0; i < PTRS_PER_PMD; i++) {
|
||||
if (pmd_none(pmd_tbl[i]))
|
||||
continue;
|
||||
xen_cleanmfnmap_pmd(pmd_tbl + i, unpin);
|
||||
}
|
||||
set_pud(pud, __pud(0));
|
||||
xen_cleanmfnmap_free_pgtbl(pmd_tbl, unpin);
|
||||
}
|
||||
|
||||
static void __init xen_cleanmfnmap_p4d(p4d_t *p4d, bool unpin)
|
||||
{
|
||||
unsigned long pa;
|
||||
pud_t *pud_tbl;
|
||||
int i;
|
||||
|
||||
if (p4d_large(*p4d)) {
|
||||
pa = p4d_val(*p4d) & PHYSICAL_PAGE_MASK;
|
||||
xen_free_ro_pages(pa, P4D_SIZE);
|
||||
return;
|
||||
}
|
||||
|
||||
pud_tbl = pud_offset(p4d, 0);
|
||||
for (i = 0; i < PTRS_PER_PUD; i++) {
|
||||
if (pud_none(pud_tbl[i]))
|
||||
continue;
|
||||
xen_cleanmfnmap_pud(pud_tbl + i, unpin);
|
||||
}
|
||||
set_p4d(p4d, __p4d(0));
|
||||
xen_cleanmfnmap_free_pgtbl(pud_tbl, unpin);
|
||||
}
|
||||
|
||||
/*
|
||||
* Since it is well isolated we can (and since it is perhaps large we should)
|
||||
* also free the page tables mapping the initial P->M table.
|
||||
*/
|
||||
static void __init xen_cleanmfnmap(unsigned long vaddr)
|
||||
{
|
||||
unsigned long va = vaddr & PMD_MASK;
|
||||
unsigned long pa;
|
||||
pgd_t *pgd = pgd_offset_k(va);
|
||||
pud_t *pud_page = pud_offset(pgd, 0);
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
pte_t *pte;
|
||||
pgd_t *pgd;
|
||||
p4d_t *p4d;
|
||||
unsigned int i;
|
||||
bool unpin;
|
||||
|
||||
unpin = (vaddr == 2 * PGDIR_SIZE);
|
||||
set_pgd(pgd, __pgd(0));
|
||||
do {
|
||||
pud = pud_page + pud_index(va);
|
||||
if (pud_none(*pud)) {
|
||||
va += PUD_SIZE;
|
||||
} else if (pud_large(*pud)) {
|
||||
pa = pud_val(*pud) & PHYSICAL_PAGE_MASK;
|
||||
xen_free_ro_pages(pa, PUD_SIZE);
|
||||
va += PUD_SIZE;
|
||||
} else {
|
||||
pmd = pmd_offset(pud, va);
|
||||
if (pmd_large(*pmd)) {
|
||||
pa = pmd_val(*pmd) & PHYSICAL_PAGE_MASK;
|
||||
xen_free_ro_pages(pa, PMD_SIZE);
|
||||
} else if (!pmd_none(*pmd)) {
|
||||
pte = pte_offset_kernel(pmd, va);
|
||||
set_pmd(pmd, __pmd(0));
|
||||
for (i = 0; i < PTRS_PER_PTE; ++i) {
|
||||
if (pte_none(pte[i]))
|
||||
break;
|
||||
pa = pte_pfn(pte[i]) << PAGE_SHIFT;
|
||||
xen_free_ro_pages(pa, PAGE_SIZE);
|
||||
}
|
||||
xen_cleanmfnmap_free_pgtbl(pte, unpin);
|
||||
}
|
||||
va += PMD_SIZE;
|
||||
if (pmd_index(va))
|
||||
continue;
|
||||
set_pud(pud, __pud(0));
|
||||
xen_cleanmfnmap_free_pgtbl(pmd, unpin);
|
||||
}
|
||||
|
||||
} while (pud_index(va) || pmd_index(va));
|
||||
xen_cleanmfnmap_free_pgtbl(pud_page, unpin);
|
||||
vaddr &= PMD_MASK;
|
||||
pgd = pgd_offset_k(vaddr);
|
||||
p4d = p4d_offset(pgd, 0);
|
||||
for (i = 0; i < PTRS_PER_P4D; i++) {
|
||||
if (p4d_none(p4d[i]))
|
||||
continue;
|
||||
xen_cleanmfnmap_p4d(p4d + i, unpin);
|
||||
}
|
||||
if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
|
||||
set_pgd(pgd, __pgd(0));
|
||||
xen_cleanmfnmap_free_pgtbl(p4d, unpin);
|
||||
}
|
||||
}
|
||||
|
||||
static void __init xen_pagetable_p2m_free(void)
|
||||
|
@ -1538,7 +1593,6 @@ static int xen_pgd_alloc(struct mm_struct *mm)
|
|||
BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd))));
|
||||
}
|
||||
#endif
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -1730,7 +1784,7 @@ static void xen_release_pmd(unsigned long pfn)
|
|||
xen_release_ptpage(pfn, PT_PMD);
|
||||
}
|
||||
|
||||
#if CONFIG_PGTABLE_LEVELS == 4
|
||||
#if CONFIG_PGTABLE_LEVELS >= 4
|
||||
static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn)
|
||||
{
|
||||
xen_alloc_ptpage(mm, pfn, PT_PUD);
|
||||
|
@ -2071,21 +2125,27 @@ static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr)
|
|||
*/
|
||||
void __init xen_relocate_p2m(void)
|
||||
{
|
||||
phys_addr_t size, new_area, pt_phys, pmd_phys, pud_phys;
|
||||
phys_addr_t size, new_area, pt_phys, pmd_phys, pud_phys, p4d_phys;
|
||||
unsigned long p2m_pfn, p2m_pfn_end, n_frames, pfn, pfn_end;
|
||||
int n_pte, n_pt, n_pmd, n_pud, idx_pte, idx_pt, idx_pmd, idx_pud;
|
||||
int n_pte, n_pt, n_pmd, n_pud, n_p4d, idx_pte, idx_pt, idx_pmd, idx_pud, idx_p4d;
|
||||
pte_t *pt;
|
||||
pmd_t *pmd;
|
||||
pud_t *pud;
|
||||
p4d_t *p4d = NULL;
|
||||
pgd_t *pgd;
|
||||
unsigned long *new_p2m;
|
||||
int save_pud;
|
||||
|
||||
size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
|
||||
n_pte = roundup(size, PAGE_SIZE) >> PAGE_SHIFT;
|
||||
n_pt = roundup(size, PMD_SIZE) >> PMD_SHIFT;
|
||||
n_pmd = roundup(size, PUD_SIZE) >> PUD_SHIFT;
|
||||
n_pud = roundup(size, PGDIR_SIZE) >> PGDIR_SHIFT;
|
||||
n_frames = n_pte + n_pt + n_pmd + n_pud;
|
||||
n_pud = roundup(size, P4D_SIZE) >> P4D_SHIFT;
|
||||
if (PTRS_PER_P4D > 1)
|
||||
n_p4d = roundup(size, PGDIR_SIZE) >> PGDIR_SHIFT;
|
||||
else
|
||||
n_p4d = 0;
|
||||
n_frames = n_pte + n_pt + n_pmd + n_pud + n_p4d;
|
||||
|
||||
new_area = xen_find_free_area(PFN_PHYS(n_frames));
|
||||
if (!new_area) {
|
||||
|
@ -2101,55 +2161,76 @@ void __init xen_relocate_p2m(void)
|
|||
* To avoid any possible virtual address collision, just use
|
||||
* 2 * PUD_SIZE for the new area.
|
||||
*/
|
||||
pud_phys = new_area;
|
||||
p4d_phys = new_area;
|
||||
pud_phys = p4d_phys + PFN_PHYS(n_p4d);
|
||||
pmd_phys = pud_phys + PFN_PHYS(n_pud);
|
||||
pt_phys = pmd_phys + PFN_PHYS(n_pmd);
|
||||
p2m_pfn = PFN_DOWN(pt_phys) + n_pt;
|
||||
|
||||
pgd = __va(read_cr3());
|
||||
new_p2m = (unsigned long *)(2 * PGDIR_SIZE);
|
||||
for (idx_pud = 0; idx_pud < n_pud; idx_pud++) {
|
||||
pud = early_memremap(pud_phys, PAGE_SIZE);
|
||||
clear_page(pud);
|
||||
for (idx_pmd = 0; idx_pmd < min(n_pmd, PTRS_PER_PUD);
|
||||
idx_pmd++) {
|
||||
pmd = early_memremap(pmd_phys, PAGE_SIZE);
|
||||
clear_page(pmd);
|
||||
for (idx_pt = 0; idx_pt < min(n_pt, PTRS_PER_PMD);
|
||||
idx_pt++) {
|
||||
pt = early_memremap(pt_phys, PAGE_SIZE);
|
||||
clear_page(pt);
|
||||
for (idx_pte = 0;
|
||||
idx_pte < min(n_pte, PTRS_PER_PTE);
|
||||
idx_pte++) {
|
||||
set_pte(pt + idx_pte,
|
||||
pfn_pte(p2m_pfn, PAGE_KERNEL));
|
||||
p2m_pfn++;
|
||||
}
|
||||
n_pte -= PTRS_PER_PTE;
|
||||
early_memunmap(pt, PAGE_SIZE);
|
||||
make_lowmem_page_readonly(__va(pt_phys));
|
||||
pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE,
|
||||
PFN_DOWN(pt_phys));
|
||||
set_pmd(pmd + idx_pt,
|
||||
__pmd(_PAGE_TABLE | pt_phys));
|
||||
pt_phys += PAGE_SIZE;
|
||||
}
|
||||
n_pt -= PTRS_PER_PMD;
|
||||
early_memunmap(pmd, PAGE_SIZE);
|
||||
make_lowmem_page_readonly(__va(pmd_phys));
|
||||
pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE,
|
||||
PFN_DOWN(pmd_phys));
|
||||
set_pud(pud + idx_pmd, __pud(_PAGE_TABLE | pmd_phys));
|
||||
pmd_phys += PAGE_SIZE;
|
||||
idx_p4d = 0;
|
||||
save_pud = n_pud;
|
||||
do {
|
||||
if (n_p4d > 0) {
|
||||
p4d = early_memremap(p4d_phys, PAGE_SIZE);
|
||||
clear_page(p4d);
|
||||
n_pud = min(save_pud, PTRS_PER_P4D);
|
||||
}
|
||||
n_pmd -= PTRS_PER_PUD;
|
||||
early_memunmap(pud, PAGE_SIZE);
|
||||
make_lowmem_page_readonly(__va(pud_phys));
|
||||
pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(pud_phys));
|
||||
set_pgd(pgd + 2 + idx_pud, __pgd(_PAGE_TABLE | pud_phys));
|
||||
pud_phys += PAGE_SIZE;
|
||||
}
|
||||
for (idx_pud = 0; idx_pud < n_pud; idx_pud++) {
|
||||
pud = early_memremap(pud_phys, PAGE_SIZE);
|
||||
clear_page(pud);
|
||||
for (idx_pmd = 0; idx_pmd < min(n_pmd, PTRS_PER_PUD);
|
||||
idx_pmd++) {
|
||||
pmd = early_memremap(pmd_phys, PAGE_SIZE);
|
||||
clear_page(pmd);
|
||||
for (idx_pt = 0; idx_pt < min(n_pt, PTRS_PER_PMD);
|
||||
idx_pt++) {
|
||||
pt = early_memremap(pt_phys, PAGE_SIZE);
|
||||
clear_page(pt);
|
||||
for (idx_pte = 0;
|
||||
idx_pte < min(n_pte, PTRS_PER_PTE);
|
||||
idx_pte++) {
|
||||
set_pte(pt + idx_pte,
|
||||
pfn_pte(p2m_pfn, PAGE_KERNEL));
|
||||
p2m_pfn++;
|
||||
}
|
||||
n_pte -= PTRS_PER_PTE;
|
||||
early_memunmap(pt, PAGE_SIZE);
|
||||
make_lowmem_page_readonly(__va(pt_phys));
|
||||
pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE,
|
||||
PFN_DOWN(pt_phys));
|
||||
set_pmd(pmd + idx_pt,
|
||||
__pmd(_PAGE_TABLE | pt_phys));
|
||||
pt_phys += PAGE_SIZE;
|
||||
}
|
||||
n_pt -= PTRS_PER_PMD;
|
||||
early_memunmap(pmd, PAGE_SIZE);
|
||||
make_lowmem_page_readonly(__va(pmd_phys));
|
||||
pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE,
|
||||
PFN_DOWN(pmd_phys));
|
||||
set_pud(pud + idx_pmd, __pud(_PAGE_TABLE | pmd_phys));
|
||||
pmd_phys += PAGE_SIZE;
|
||||
}
|
||||
n_pmd -= PTRS_PER_PUD;
|
||||
early_memunmap(pud, PAGE_SIZE);
|
||||
make_lowmem_page_readonly(__va(pud_phys));
|
||||
pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(pud_phys));
|
||||
if (n_p4d > 0)
|
||||
set_p4d(p4d + idx_pud, __p4d(_PAGE_TABLE | pud_phys));
|
||||
else
|
||||
set_pgd(pgd + 2 + idx_pud, __pgd(_PAGE_TABLE | pud_phys));
|
||||
pud_phys += PAGE_SIZE;
|
||||
}
|
||||
if (n_p4d > 0) {
|
||||
save_pud -= PTRS_PER_P4D;
|
||||
early_memunmap(p4d, PAGE_SIZE);
|
||||
make_lowmem_page_readonly(__va(p4d_phys));
|
||||
pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, PFN_DOWN(p4d_phys));
|
||||
set_pgd(pgd + 2 + idx_p4d, __pgd(_PAGE_TABLE | p4d_phys));
|
||||
p4d_phys += PAGE_SIZE;
|
||||
}
|
||||
} while (++idx_p4d < n_p4d);
|
||||
|
||||
/* Now copy the old p2m info to the new area. */
|
||||
memcpy(new_p2m, xen_p2m_addr, size);
|
||||
|
@ -2326,6 +2407,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
|
|||
#endif
|
||||
case FIX_TEXT_POKE0:
|
||||
case FIX_TEXT_POKE1:
|
||||
case FIX_GDT_REMAP_BEGIN ... FIX_GDT_REMAP_END:
|
||||
/* All local page mappings */
|
||||
pte = pfn_pte(phys, prot);
|
||||
break;
|
||||
|
@ -2378,8 +2460,8 @@ static void __init xen_post_allocator_init(void)
|
|||
pv_mmu_ops.set_pte = xen_set_pte;
|
||||
pv_mmu_ops.set_pmd = xen_set_pmd;
|
||||
pv_mmu_ops.set_pud = xen_set_pud;
|
||||
#if CONFIG_PGTABLE_LEVELS == 4
|
||||
pv_mmu_ops.set_pgd = xen_set_pgd;
|
||||
#if CONFIG_PGTABLE_LEVELS >= 4
|
||||
pv_mmu_ops.set_p4d = xen_set_p4d;
|
||||
#endif
|
||||
|
||||
/* This will work as long as patching hasn't happened yet
|
||||
|
@ -2388,7 +2470,7 @@ static void __init xen_post_allocator_init(void)
|
|||
pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
|
||||
pv_mmu_ops.release_pte = xen_release_pte;
|
||||
pv_mmu_ops.release_pmd = xen_release_pmd;
|
||||
#if CONFIG_PGTABLE_LEVELS == 4
|
||||
#if CONFIG_PGTABLE_LEVELS >= 4
|
||||
pv_mmu_ops.alloc_pud = xen_alloc_pud;
|
||||
pv_mmu_ops.release_pud = xen_release_pud;
|
||||
#endif
|
||||
|
@ -2454,10 +2536,10 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
|
|||
.make_pmd = PV_CALLEE_SAVE(xen_make_pmd),
|
||||
.pmd_val = PV_CALLEE_SAVE(xen_pmd_val),
|
||||
|
||||
#if CONFIG_PGTABLE_LEVELS == 4
|
||||
#if CONFIG_PGTABLE_LEVELS >= 4
|
||||
.pud_val = PV_CALLEE_SAVE(xen_pud_val),
|
||||
.make_pud = PV_CALLEE_SAVE(xen_make_pud),
|
||||
.set_pgd = xen_set_pgd_hyper,
|
||||
.set_p4d = xen_set_p4d_hyper,
|
||||
|
||||
.alloc_pud = xen_alloc_pmd_init,
|
||||
.release_pud = xen_release_pmd_init,
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
|
||||
enum pt_level {
|
||||
PT_PGD,
|
||||
PT_P4D,
|
||||
PT_PUD,
|
||||
PT_PMD,
|
||||
PT_PTE
|
||||
|
|
|
@ -392,7 +392,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
|
|||
if (ctxt == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
gdt = get_cpu_gdt_table(cpu);
|
||||
gdt = get_cpu_gdt_rw(cpu);
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
ctxt->user_regs.fs = __KERNEL_PERCPU;
|
||||
|
|
|
@ -43,6 +43,7 @@ static void dax_pmem_percpu_exit(void *data)
|
|||
struct dax_pmem *dax_pmem = to_dax_pmem(ref);
|
||||
|
||||
dev_dbg(dax_pmem->dev, "%s\n", __func__);
|
||||
wait_for_completion(&dax_pmem->cmp);
|
||||
percpu_ref_exit(ref);
|
||||
}
|
||||
|
||||
|
@ -53,7 +54,6 @@ static void dax_pmem_percpu_kill(void *data)
|
|||
|
||||
dev_dbg(dax_pmem->dev, "%s\n", __func__);
|
||||
percpu_ref_kill(ref);
|
||||
wait_for_completion(&dax_pmem->cmp);
|
||||
}
|
||||
|
||||
static int dax_pmem_probe(struct device *dev)
|
||||
|
|
|
@ -504,7 +504,7 @@ void __init lguest_arch_host_init(void)
|
|||
* byte, not the size, hence the "-1").
|
||||
*/
|
||||
state->host_gdt_desc.size = GDT_SIZE-1;
|
||||
state->host_gdt_desc.address = (long)get_cpu_gdt_table(i);
|
||||
state->host_gdt_desc.address = (long)get_cpu_gdt_rw(i);
|
||||
|
||||
/*
|
||||
* All CPUs on the Host use the same Interrupt Descriptor
|
||||
|
@ -554,8 +554,8 @@ void __init lguest_arch_host_init(void)
|
|||
* The Host needs to be able to use the LGUEST segments on this
|
||||
* CPU, too, so put them in the Host GDT.
|
||||
*/
|
||||
get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT;
|
||||
get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT;
|
||||
get_cpu_gdt_rw(i)[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT;
|
||||
get_cpu_gdt_rw(i)[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
#include <linux/badblocks.h>
|
||||
#include <linux/memremap.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/pfn_t.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/pmem.h>
|
||||
|
@ -231,6 +232,11 @@ static void pmem_release_queue(void *q)
|
|||
blk_cleanup_queue(q);
|
||||
}
|
||||
|
||||
static void pmem_freeze_queue(void *q)
|
||||
{
|
||||
blk_freeze_queue_start(q);
|
||||
}
|
||||
|
||||
static void pmem_release_disk(void *disk)
|
||||
{
|
||||
del_gendisk(disk);
|
||||
|
@ -284,6 +290,9 @@ static int pmem_attach_disk(struct device *dev,
|
|||
if (!q)
|
||||
return -ENOMEM;
|
||||
|
||||
if (devm_add_action_or_reset(dev, pmem_release_queue, q))
|
||||
return -ENOMEM;
|
||||
|
||||
pmem->pfn_flags = PFN_DEV;
|
||||
if (is_nd_pfn(dev)) {
|
||||
addr = devm_memremap_pages(dev, &pfn_res, &q->q_usage_counter,
|
||||
|
@ -303,10 +312,10 @@ static int pmem_attach_disk(struct device *dev,
|
|||
pmem->size, ARCH_MEMREMAP_PMEM);
|
||||
|
||||
/*
|
||||
* At release time the queue must be dead before
|
||||
* At release time the queue must be frozen before
|
||||
* devm_memremap_pages is unwound
|
||||
*/
|
||||
if (devm_add_action_or_reset(dev, pmem_release_queue, q))
|
||||
if (devm_add_action_or_reset(dev, pmem_freeze_queue, q))
|
||||
return -ENOMEM;
|
||||
|
||||
if (IS_ERR(addr))
|
||||
|
|
|
@ -54,7 +54,7 @@ __asm__(".text \n"
|
|||
|
||||
#define Q2_SET_SEL(cpu, selname, address, size) \
|
||||
do { \
|
||||
struct desc_struct *gdt = get_cpu_gdt_table((cpu)); \
|
||||
struct desc_struct *gdt = get_cpu_gdt_rw((cpu)); \
|
||||
set_desc_base(&gdt[(selname) >> 3], (u32)(address)); \
|
||||
set_desc_limit(&gdt[(selname) >> 3], (size) - 1); \
|
||||
} while(0)
|
||||
|
@ -95,8 +95,8 @@ static inline u16 call_pnp_bios(u16 func, u16 arg1, u16 arg2, u16 arg3,
|
|||
return PNP_FUNCTION_NOT_SUPPORTED;
|
||||
|
||||
cpu = get_cpu();
|
||||
save_desc_40 = get_cpu_gdt_table(cpu)[0x40 / 8];
|
||||
get_cpu_gdt_table(cpu)[0x40 / 8] = bad_bios_desc;
|
||||
save_desc_40 = get_cpu_gdt_rw(cpu)[0x40 / 8];
|
||||
get_cpu_gdt_rw(cpu)[0x40 / 8] = bad_bios_desc;
|
||||
|
||||
/* On some boxes IRQ's during PnP BIOS calls are deadly. */
|
||||
spin_lock_irqsave(&pnp_bios_lock, flags);
|
||||
|
@ -134,7 +134,7 @@ static inline u16 call_pnp_bios(u16 func, u16 arg1, u16 arg2, u16 arg3,
|
|||
:"memory");
|
||||
spin_unlock_irqrestore(&pnp_bios_lock, flags);
|
||||
|
||||
get_cpu_gdt_table(cpu)[0x40 / 8] = save_desc_40;
|
||||
get_cpu_gdt_rw(cpu)[0x40 / 8] = save_desc_40;
|
||||
put_cpu();
|
||||
|
||||
/* If we get here and this is set then the PnP BIOS faulted on us. */
|
||||
|
@ -477,7 +477,7 @@ void pnpbios_calls_init(union pnp_bios_install_struct *header)
|
|||
pnp_bios_callpoint.segment = PNP_CS16;
|
||||
|
||||
for_each_possible_cpu(i) {
|
||||
struct desc_struct *gdt = get_cpu_gdt_table(i);
|
||||
struct desc_struct *gdt = get_cpu_gdt_rw(i);
|
||||
if (!gdt)
|
||||
continue;
|
||||
set_desc_base(&gdt[GDT_ENTRY_PNPBIOS_CS32],
|
||||
|
|
|
@ -32,10 +32,4 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
|
|||
/* by default, allow everything */
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool arch_pte_access_permitted(pte_t pte, bool write)
|
||||
{
|
||||
/* by default, allow everything */
|
||||
return true;
|
||||
}
|
||||
#endif /* _ASM_GENERIC_MM_HOOKS_H */
|
||||
|
|
|
@ -341,6 +341,31 @@ static inline int pte_unused(pte_t pte)
|
|||
}
|
||||
#endif
|
||||
|
||||
#ifndef pte_access_permitted
|
||||
#define pte_access_permitted(pte, write) \
|
||||
(pte_present(pte) && (!(write) || pte_write(pte)))
|
||||
#endif
|
||||
|
||||
#ifndef pmd_access_permitted
|
||||
#define pmd_access_permitted(pmd, write) \
|
||||
(pmd_present(pmd) && (!(write) || pmd_write(pmd)))
|
||||
#endif
|
||||
|
||||
#ifndef pud_access_permitted
|
||||
#define pud_access_permitted(pud, write) \
|
||||
(pud_present(pud) && (!(write) || pud_write(pud)))
|
||||
#endif
|
||||
|
||||
#ifndef p4d_access_permitted
|
||||
#define p4d_access_permitted(p4d, write) \
|
||||
(p4d_present(p4d) && (!(write) || p4d_write(p4d)))
|
||||
#endif
|
||||
|
||||
#ifndef pgd_access_permitted
|
||||
#define pgd_access_permitted(pgd, write) \
|
||||
(pgd_present(pgd) && (!(write) || pgd_write(pgd)))
|
||||
#endif
|
||||
|
||||
#ifndef __HAVE_ARCH_PMD_SAME
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
|
||||
|
|
|
@ -432,6 +432,10 @@ static inline int pud_devmap(pud_t pud)
|
|||
{
|
||||
return 0;
|
||||
}
|
||||
static inline int pgd_devmap(pgd_t pgd)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
|
@ -758,19 +762,11 @@ static inline enum zone_type page_zonenum(const struct page *page)
|
|||
}
|
||||
|
||||
#ifdef CONFIG_ZONE_DEVICE
|
||||
void get_zone_device_page(struct page *page);
|
||||
void put_zone_device_page(struct page *page);
|
||||
static inline bool is_zone_device_page(const struct page *page)
|
||||
{
|
||||
return page_zonenum(page) == ZONE_DEVICE;
|
||||
}
|
||||
#else
|
||||
static inline void get_zone_device_page(struct page *page)
|
||||
{
|
||||
}
|
||||
static inline void put_zone_device_page(struct page *page)
|
||||
{
|
||||
}
|
||||
static inline bool is_zone_device_page(const struct page *page)
|
||||
{
|
||||
return false;
|
||||
|
@ -786,9 +782,6 @@ static inline void get_page(struct page *page)
|
|||
*/
|
||||
VM_BUG_ON_PAGE(page_ref_count(page) <= 0, page);
|
||||
page_ref_inc(page);
|
||||
|
||||
if (unlikely(is_zone_device_page(page)))
|
||||
get_zone_device_page(page);
|
||||
}
|
||||
|
||||
static inline void put_page(struct page *page)
|
||||
|
@ -797,9 +790,6 @@ static inline void put_page(struct page *page)
|
|||
|
||||
if (put_page_testzero(page))
|
||||
__put_page(page);
|
||||
|
||||
if (unlikely(is_zone_device_page(page)))
|
||||
put_zone_device_page(page);
|
||||
}
|
||||
|
||||
#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
|
||||
|
|
|
@ -367,6 +367,11 @@ struct mm_struct {
|
|||
#endif
|
||||
unsigned long mmap_base; /* base of mmap area */
|
||||
unsigned long mmap_legacy_base; /* base of mmap area in bottom-up allocations */
|
||||
#ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES
|
||||
/* Base adresses for compatible mmap() */
|
||||
unsigned long mmap_compat_base;
|
||||
unsigned long mmap_compat_legacy_base;
|
||||
#endif
|
||||
unsigned long task_size; /* size of task vm space */
|
||||
unsigned long highest_vm_end; /* highest vma end address */
|
||||
pgd_t * pgd;
|
||||
|
|
|
@ -148,7 +148,7 @@ static inline int page_cache_get_speculative(struct page *page)
|
|||
|
||||
#ifdef CONFIG_TINY_RCU
|
||||
# ifdef CONFIG_PREEMPT_COUNT
|
||||
VM_BUG_ON(!in_atomic());
|
||||
VM_BUG_ON(!in_atomic() && !irqs_disabled());
|
||||
# endif
|
||||
/*
|
||||
* Preempt must be disabled here - we rely on rcu_read_lock doing
|
||||
|
@ -186,7 +186,7 @@ static inline int page_cache_add_speculative(struct page *page, int count)
|
|||
|
||||
#if !defined(CONFIG_SMP) && defined(CONFIG_TREE_RCU)
|
||||
# ifdef CONFIG_PREEMPT_COUNT
|
||||
VM_BUG_ON(!in_atomic());
|
||||
VM_BUG_ON(!in_atomic() && !irqs_disabled());
|
||||
# endif
|
||||
VM_BUG_ON_PAGE(page_count(page) == 0, page);
|
||||
page_ref_add(page, count);
|
||||
|
|
|
@ -241,21 +241,21 @@ TRACE_EVENT(xen_mmu_set_pud,
|
|||
(int)sizeof(pudval_t) * 2, (unsigned long long)__entry->pudval)
|
||||
);
|
||||
|
||||
TRACE_EVENT(xen_mmu_set_pgd,
|
||||
TP_PROTO(pgd_t *pgdp, pgd_t *user_pgdp, pgd_t pgdval),
|
||||
TP_ARGS(pgdp, user_pgdp, pgdval),
|
||||
TRACE_EVENT(xen_mmu_set_p4d,
|
||||
TP_PROTO(p4d_t *p4dp, p4d_t *user_p4dp, p4d_t p4dval),
|
||||
TP_ARGS(p4dp, user_p4dp, p4dval),
|
||||
TP_STRUCT__entry(
|
||||
__field(pgd_t *, pgdp)
|
||||
__field(pgd_t *, user_pgdp)
|
||||
__field(pgdval_t, pgdval)
|
||||
__field(p4d_t *, p4dp)
|
||||
__field(p4d_t *, user_p4dp)
|
||||
__field(p4dval_t, p4dval)
|
||||
),
|
||||
TP_fast_assign(__entry->pgdp = pgdp;
|
||||
__entry->user_pgdp = user_pgdp;
|
||||
__entry->pgdval = pgdval.pgd),
|
||||
TP_printk("pgdp %p user_pgdp %p pgdval %0*llx (raw %0*llx)",
|
||||
__entry->pgdp, __entry->user_pgdp,
|
||||
(int)sizeof(pgdval_t) * 2, (unsigned long long)pgd_val(native_make_pgd(__entry->pgdval)),
|
||||
(int)sizeof(pgdval_t) * 2, (unsigned long long)__entry->pgdval)
|
||||
TP_fast_assign(__entry->p4dp = p4dp;
|
||||
__entry->user_p4dp = user_p4dp;
|
||||
__entry->p4dval = p4d_val(p4dval)),
|
||||
TP_printk("p4dp %p user_p4dp %p p4dval %0*llx (raw %0*llx)",
|
||||
__entry->p4dp, __entry->user_p4dp,
|
||||
(int)sizeof(p4dval_t) * 2, (unsigned long long)pgd_val(native_make_pgd(__entry->p4dval)),
|
||||
(int)sizeof(p4dval_t) * 2, (unsigned long long)__entry->p4dval)
|
||||
);
|
||||
|
||||
TRACE_EVENT(xen_mmu_pud_clear,
|
||||
|
|
|
@ -182,18 +182,6 @@ struct page_map {
|
|||
struct vmem_altmap altmap;
|
||||
};
|
||||
|
||||
void get_zone_device_page(struct page *page)
|
||||
{
|
||||
percpu_ref_get(page->pgmap->ref);
|
||||
}
|
||||
EXPORT_SYMBOL(get_zone_device_page);
|
||||
|
||||
void put_zone_device_page(struct page *page)
|
||||
{
|
||||
put_dev_pagemap(page->pgmap);
|
||||
}
|
||||
EXPORT_SYMBOL(put_zone_device_page);
|
||||
|
||||
static void pgmap_radix_release(struct resource *res)
|
||||
{
|
||||
resource_size_t key, align_start, align_size, align_end;
|
||||
|
@ -237,6 +225,10 @@ static void devm_memremap_pages_release(struct device *dev, void *data)
|
|||
struct resource *res = &page_map->res;
|
||||
resource_size_t align_start, align_size;
|
||||
struct dev_pagemap *pgmap = &page_map->pgmap;
|
||||
unsigned long pfn;
|
||||
|
||||
for_each_device_pfn(pfn, page_map)
|
||||
put_page(pfn_to_page(pfn));
|
||||
|
||||
if (percpu_ref_tryget_live(pgmap->ref)) {
|
||||
dev_WARN(dev, "%s: page mapping is still live!\n", __func__);
|
||||
|
@ -277,7 +269,10 @@ struct dev_pagemap *find_dev_pagemap(resource_size_t phys)
|
|||
*
|
||||
* Notes:
|
||||
* 1/ @ref must be 'live' on entry and 'dead' before devm_memunmap_pages() time
|
||||
* (or devm release event).
|
||||
* (or devm release event). The expected order of events is that @ref has
|
||||
* been through percpu_ref_kill() before devm_memremap_pages_release(). The
|
||||
* wait for the completion of all references being dropped and
|
||||
* percpu_ref_exit() must occur after devm_memremap_pages_release().
|
||||
*
|
||||
* 2/ @res is expected to be a host memory range that could feasibly be
|
||||
* treated as a "System RAM" range, i.e. not a device mmio range, but
|
||||
|
@ -379,6 +374,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
|
|||
*/
|
||||
list_del(&page->lru);
|
||||
page->pgmap = pgmap;
|
||||
percpu_ref_get(ref);
|
||||
}
|
||||
devres_add(dev, page_map);
|
||||
return __va(res->start);
|
||||
|
|
148
mm/gup.c
148
mm/gup.c
|
@ -1189,34 +1189,57 @@ struct page *get_dump_page(unsigned long addr)
|
|||
*/
|
||||
#ifdef CONFIG_HAVE_GENERIC_RCU_GUP
|
||||
|
||||
#ifndef gup_get_pte
|
||||
/*
|
||||
* We assume that the PTE can be read atomically. If this is not the case for
|
||||
* your architecture, please provide the helper.
|
||||
*/
|
||||
static inline pte_t gup_get_pte(pte_t *ptep)
|
||||
{
|
||||
return READ_ONCE(*ptep);
|
||||
}
|
||||
#endif
|
||||
|
||||
static void undo_dev_pagemap(int *nr, int nr_start, struct page **pages)
|
||||
{
|
||||
while ((*nr) - nr_start) {
|
||||
struct page *page = pages[--(*nr)];
|
||||
|
||||
ClearPageReferenced(page);
|
||||
put_page(page);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __HAVE_ARCH_PTE_SPECIAL
|
||||
static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
|
||||
int write, struct page **pages, int *nr)
|
||||
{
|
||||
struct dev_pagemap *pgmap = NULL;
|
||||
int nr_start = *nr, ret = 0;
|
||||
pte_t *ptep, *ptem;
|
||||
int ret = 0;
|
||||
|
||||
ptem = ptep = pte_offset_map(&pmd, addr);
|
||||
do {
|
||||
/*
|
||||
* In the line below we are assuming that the pte can be read
|
||||
* atomically. If this is not the case for your architecture,
|
||||
* please wrap this in a helper function!
|
||||
*
|
||||
* for an example see gup_get_pte in arch/x86/mm/gup.c
|
||||
*/
|
||||
pte_t pte = READ_ONCE(*ptep);
|
||||
pte_t pte = gup_get_pte(ptep);
|
||||
struct page *head, *page;
|
||||
|
||||
/*
|
||||
* Similar to the PMD case below, NUMA hinting must take slow
|
||||
* path using the pte_protnone check.
|
||||
*/
|
||||
if (!pte_present(pte) || pte_special(pte) ||
|
||||
pte_protnone(pte) || (write && !pte_write(pte)))
|
||||
if (pte_protnone(pte))
|
||||
goto pte_unmap;
|
||||
|
||||
if (!arch_pte_access_permitted(pte, write))
|
||||
if (!pte_access_permitted(pte, write))
|
||||
goto pte_unmap;
|
||||
|
||||
if (pte_devmap(pte)) {
|
||||
pgmap = get_dev_pagemap(pte_pfn(pte), pgmap);
|
||||
if (unlikely(!pgmap)) {
|
||||
undo_dev_pagemap(nr, nr_start, pages);
|
||||
goto pte_unmap;
|
||||
}
|
||||
} else if (pte_special(pte))
|
||||
goto pte_unmap;
|
||||
|
||||
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
|
||||
|
@ -1232,6 +1255,9 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
|
|||
}
|
||||
|
||||
VM_BUG_ON_PAGE(compound_head(page) != head, page);
|
||||
|
||||
put_dev_pagemap(pgmap);
|
||||
SetPageReferenced(page);
|
||||
pages[*nr] = page;
|
||||
(*nr)++;
|
||||
|
||||
|
@ -1261,15 +1287,76 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
|
|||
}
|
||||
#endif /* __HAVE_ARCH_PTE_SPECIAL */
|
||||
|
||||
#ifdef __HAVE_ARCH_PTE_DEVMAP
|
||||
static int __gup_device_huge(unsigned long pfn, unsigned long addr,
|
||||
unsigned long end, struct page **pages, int *nr)
|
||||
{
|
||||
int nr_start = *nr;
|
||||
struct dev_pagemap *pgmap = NULL;
|
||||
|
||||
do {
|
||||
struct page *page = pfn_to_page(pfn);
|
||||
|
||||
pgmap = get_dev_pagemap(pfn, pgmap);
|
||||
if (unlikely(!pgmap)) {
|
||||
undo_dev_pagemap(nr, nr_start, pages);
|
||||
return 0;
|
||||
}
|
||||
SetPageReferenced(page);
|
||||
pages[*nr] = page;
|
||||
get_page(page);
|
||||
put_dev_pagemap(pgmap);
|
||||
(*nr)++;
|
||||
pfn++;
|
||||
} while (addr += PAGE_SIZE, addr != end);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr,
|
||||
unsigned long end, struct page **pages, int *nr)
|
||||
{
|
||||
unsigned long fault_pfn;
|
||||
|
||||
fault_pfn = pmd_pfn(pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
|
||||
return __gup_device_huge(fault_pfn, addr, end, pages, nr);
|
||||
}
|
||||
|
||||
static int __gup_device_huge_pud(pud_t pud, unsigned long addr,
|
||||
unsigned long end, struct page **pages, int *nr)
|
||||
{
|
||||
unsigned long fault_pfn;
|
||||
|
||||
fault_pfn = pud_pfn(pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
|
||||
return __gup_device_huge(fault_pfn, addr, end, pages, nr);
|
||||
}
|
||||
#else
|
||||
static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr,
|
||||
unsigned long end, struct page **pages, int *nr)
|
||||
{
|
||||
BUILD_BUG();
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __gup_device_huge_pud(pud_t pud, unsigned long addr,
|
||||
unsigned long end, struct page **pages, int *nr)
|
||||
{
|
||||
BUILD_BUG();
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
|
||||
unsigned long end, int write, struct page **pages, int *nr)
|
||||
{
|
||||
struct page *head, *page;
|
||||
int refs;
|
||||
|
||||
if (write && !pmd_write(orig))
|
||||
if (!pmd_access_permitted(orig, write))
|
||||
return 0;
|
||||
|
||||
if (pmd_devmap(orig))
|
||||
return __gup_device_huge_pmd(orig, addr, end, pages, nr);
|
||||
|
||||
refs = 0;
|
||||
head = pmd_page(orig);
|
||||
page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
|
||||
|
@ -1293,6 +1380,7 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
|
|||
return 0;
|
||||
}
|
||||
|
||||
SetPageReferenced(head);
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -1302,9 +1390,12 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
|
|||
struct page *head, *page;
|
||||
int refs;
|
||||
|
||||
if (write && !pud_write(orig))
|
||||
if (!pud_access_permitted(orig, write))
|
||||
return 0;
|
||||
|
||||
if (pud_devmap(orig))
|
||||
return __gup_device_huge_pud(orig, addr, end, pages, nr);
|
||||
|
||||
refs = 0;
|
||||
head = pud_page(orig);
|
||||
page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
|
||||
|
@ -1328,6 +1419,7 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
|
|||
return 0;
|
||||
}
|
||||
|
||||
SetPageReferenced(head);
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -1338,9 +1430,10 @@ static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
|
|||
int refs;
|
||||
struct page *head, *page;
|
||||
|
||||
if (write && !pgd_write(orig))
|
||||
if (!pgd_access_permitted(orig, write))
|
||||
return 0;
|
||||
|
||||
BUILD_BUG_ON(pgd_devmap(orig));
|
||||
refs = 0;
|
||||
head = pgd_page(orig);
|
||||
page = head + ((addr & ~PGDIR_MASK) >> PAGE_SHIFT);
|
||||
|
@ -1364,6 +1457,7 @@ static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
|
|||
return 0;
|
||||
}
|
||||
|
||||
SetPageReferenced(head);
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -1520,6 +1614,21 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
|
|||
return nr;
|
||||
}
|
||||
|
||||
#ifndef gup_fast_permitted
|
||||
/*
|
||||
* Check if it's allowed to use __get_user_pages_fast() for the range, or
|
||||
* we need to fall back to the slow version:
|
||||
*/
|
||||
bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
|
||||
{
|
||||
unsigned long len, end;
|
||||
|
||||
len = (unsigned long) nr_pages << PAGE_SHIFT;
|
||||
end = start + len;
|
||||
return end >= start;
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* get_user_pages_fast() - pin user pages in memory
|
||||
* @start: starting user address
|
||||
|
@ -1539,11 +1648,14 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
|
|||
int get_user_pages_fast(unsigned long start, int nr_pages, int write,
|
||||
struct page **pages)
|
||||
{
|
||||
int nr, ret;
|
||||
int nr = 0, ret = 0;
|
||||
|
||||
start &= PAGE_MASK;
|
||||
nr = __get_user_pages_fast(start, nr_pages, write, pages);
|
||||
ret = nr;
|
||||
|
||||
if (gup_fast_permitted(start, nr_pages, write)) {
|
||||
nr = __get_user_pages_fast(start, nr_pages, write, pages);
|
||||
ret = nr;
|
||||
}
|
||||
|
||||
if (nr < nr_pages) {
|
||||
/* Try to get the remaining pages with get_user_pages */
|
||||
|
|
10
mm/swap.c
10
mm/swap.c
|
@ -97,6 +97,16 @@ static void __put_compound_page(struct page *page)
|
|||
|
||||
void __put_page(struct page *page)
|
||||
{
|
||||
if (is_zone_device_page(page)) {
|
||||
put_dev_pagemap(page->pgmap);
|
||||
|
||||
/*
|
||||
* The page belongs to the device that created pgmap. Do
|
||||
* not return it to page allocator.
|
||||
*/
|
||||
return;
|
||||
}
|
||||
|
||||
if (unlikely(PageCompound(page)))
|
||||
__put_compound_page(page);
|
||||
else
|
||||
|
|
|
@ -409,6 +409,51 @@ static void *threadproc(void *ctx)
|
|||
}
|
||||
}
|
||||
|
||||
#ifdef __i386__
|
||||
|
||||
#ifndef SA_RESTORE
|
||||
#define SA_RESTORER 0x04000000
|
||||
#endif
|
||||
|
||||
/*
|
||||
* The UAPI header calls this 'struct sigaction', which conflicts with
|
||||
* glibc. Sigh.
|
||||
*/
|
||||
struct fake_ksigaction {
|
||||
void *handler; /* the real type is nasty */
|
||||
unsigned long sa_flags;
|
||||
void (*sa_restorer)(void);
|
||||
unsigned char sigset[8];
|
||||
};
|
||||
|
||||
static void fix_sa_restorer(int sig)
|
||||
{
|
||||
struct fake_ksigaction ksa;
|
||||
|
||||
if (syscall(SYS_rt_sigaction, sig, NULL, &ksa, 8) == 0) {
|
||||
/*
|
||||
* glibc has a nasty bug: it sometimes writes garbage to
|
||||
* sa_restorer. This interacts quite badly with anything
|
||||
* that fiddles with SS because it can trigger legacy
|
||||
* stack switching. Patch it up. See:
|
||||
*
|
||||
* https://sourceware.org/bugzilla/show_bug.cgi?id=21269
|
||||
*/
|
||||
if (!(ksa.sa_flags & SA_RESTORER) && ksa.sa_restorer) {
|
||||
ksa.sa_restorer = NULL;
|
||||
if (syscall(SYS_rt_sigaction, sig, &ksa, NULL,
|
||||
sizeof(ksa.sigset)) != 0)
|
||||
err(1, "rt_sigaction");
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
static void fix_sa_restorer(int sig)
|
||||
{
|
||||
/* 64-bit glibc works fine. */
|
||||
}
|
||||
#endif
|
||||
|
||||
static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
|
||||
int flags)
|
||||
{
|
||||
|
@ -420,6 +465,7 @@ static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
|
|||
if (sigaction(sig, &sa, 0))
|
||||
err(1, "sigaction");
|
||||
|
||||
fix_sa_restorer(sig);
|
||||
}
|
||||
|
||||
static jmp_buf jmpbuf;
|
||||
|
|
|
@ -404,8 +404,6 @@ void handler(int signum, siginfo_t *si, void *vucontext)
|
|||
dprintf2("info->si_lower: %p\n", __si_bounds_lower(si));
|
||||
dprintf2("info->si_upper: %p\n", __si_bounds_upper(si));
|
||||
|
||||
check_siginfo_vs_shadow(si);
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
dprintf3("[%d]: %p\n", i, si_addr_ptr[i]);
|
||||
switch (br_reason) {
|
||||
|
@ -416,6 +414,9 @@ void handler(int signum, siginfo_t *si, void *vucontext)
|
|||
exit(5);
|
||||
case 1: /* #BR MPX bounds exception */
|
||||
/* these are normal and we expect to see them */
|
||||
|
||||
check_siginfo_vs_shadow(si);
|
||||
|
||||
dprintf1("bounds exception (normal): status 0x%jx at %p si_addr: %p\n",
|
||||
status, (void *)ip, si->si_addr);
|
||||
num_bnd_chk++;
|
||||
|
|
Loading…
Reference in a new issue