x86: clean up the page table dumper and add 32-bit support
Clean up the page table dumper (fix boundary conditions, table driven address ranges, some formatting changes since it is no longer using the kernel log but a separate virtual file), and generalize to 32 bits. [ mingo@elte.hu: x86: fix the pagetable dumper ] Signed-off-by: H. Peter Anvin <hpa@zytor.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
This commit is contained in:
parent
926e5392ba
commit
fe770bf031
3 changed files with 179 additions and 126 deletions
|
@ -56,7 +56,7 @@ config DEBUG_PER_CPU_MAPS
|
|||
|
||||
config X86_PTDUMP
|
||||
bool "Export kernel pagetable layout to userspace via debugfs"
|
||||
depends on X86_64
|
||||
depends on DEBUG_KERNEL
|
||||
select DEBUG_FS
|
||||
help
|
||||
Say Y here if you want to show the kernel pagetable layout in a
|
||||
|
|
|
@ -3,6 +3,7 @@ obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o
|
|||
obj-$(CONFIG_X86_32) += pgtable_32.o
|
||||
|
||||
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
|
||||
obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o
|
||||
|
||||
obj-$(CONFIG_HIGHMEM) += highmem_32.o
|
||||
|
||||
|
@ -12,5 +13,4 @@ else
|
|||
obj-$(CONFIG_NUMA) += numa_64.o
|
||||
obj-$(CONFIG_K8_NUMA) += k8topology_64.o
|
||||
obj-$(CONFIG_ACPI_NUMA) += srat_64.o
|
||||
obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o
|
||||
endif
|
||||
|
|
|
@ -12,9 +12,10 @@
|
|||
* of the License.
|
||||
*/
|
||||
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/debugfs.h>
|
||||
|
||||
#include <asm/pgtable.h>
|
||||
|
||||
|
@ -28,73 +29,107 @@ struct pg_state {
|
|||
pgprot_t current_prot;
|
||||
unsigned long start_address;
|
||||
unsigned long current_address;
|
||||
int printed_vmalloc;
|
||||
int printed_modules;
|
||||
int printed_vmemmap;
|
||||
int printed_highmap;
|
||||
const struct addr_marker *marker;
|
||||
};
|
||||
|
||||
struct addr_marker {
|
||||
unsigned long start_address;
|
||||
const char *name;
|
||||
};
|
||||
|
||||
/* Address space markers hints */
|
||||
static struct addr_marker address_markers[] = {
|
||||
{ 0, "User Space" },
|
||||
#ifdef CONFIG_X86_64
|
||||
{ 0x8000000000000000UL, "Kernel Space" },
|
||||
{ 0xffff810000000000UL, "Low Kernel Mapping" },
|
||||
{ VMALLOC_START, "vmalloc() Area" },
|
||||
{ MODULES_VADDR, "Modules" },
|
||||
{ MODULES_END, "End Modules" },
|
||||
{ VMEMMAP_START, "Vmemmap" },
|
||||
{ __START_KERNEL_map, "High Kernel Mapping" },
|
||||
#else
|
||||
{ PAGE_OFFSET, "Kernel Mapping" },
|
||||
{ 0/* VMALLOC_START */, "vmalloc() Area" },
|
||||
{ 0/*VMALLOC_END*/, "vmalloc() End" },
|
||||
# ifdef CONFIG_HIGHMEM
|
||||
{ 0/*PKMAP_BASE*/, "Persisent kmap() Area" },
|
||||
# endif
|
||||
{ 0/*FIXADDR_START*/, "Fixmap Area" },
|
||||
#endif
|
||||
{ -1, NULL } /* End of list */
|
||||
};
|
||||
|
||||
/* Multipliers for offsets within the PTEs */
|
||||
#define LEVEL_4_MULT (PAGE_SIZE)
|
||||
#define LEVEL_3_MULT (512UL * LEVEL_4_MULT)
|
||||
#define LEVEL_2_MULT (512UL * LEVEL_3_MULT)
|
||||
#define LEVEL_1_MULT (512UL * LEVEL_2_MULT)
|
||||
|
||||
#define PTE_LEVEL_MULT (PAGE_SIZE)
|
||||
#define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT)
|
||||
#define PUD_LEVEL_MULT (PTRS_PER_PMD * PMD_LEVEL_MULT)
|
||||
#define PGD_LEVEL_MULT (PTRS_PER_PUD * PUD_LEVEL_MULT)
|
||||
|
||||
/*
|
||||
* Print a readable form of a pgprot_t to the seq_file
|
||||
*/
|
||||
static void printk_prot(struct seq_file *m, pgprot_t prot, int level)
|
||||
{
|
||||
unsigned long pr = pgprot_val(prot);
|
||||
pgprotval_t pr = pgprot_val(prot);
|
||||
static const char * const level_name[] =
|
||||
{ "cr3", "pgd", "pud", "pmd", "pte" };
|
||||
|
||||
if (pr & _PAGE_USER)
|
||||
seq_printf(m, "USR ");
|
||||
else
|
||||
seq_printf(m, " ");
|
||||
if (pr & _PAGE_RW)
|
||||
seq_printf(m, "RW ");
|
||||
else
|
||||
seq_printf(m, "ro ");
|
||||
if (pr & _PAGE_PWT)
|
||||
seq_printf(m, "PWT ");
|
||||
else
|
||||
seq_printf(m, " ");
|
||||
if (pr & _PAGE_PCD)
|
||||
seq_printf(m, "PCD ");
|
||||
else
|
||||
seq_printf(m, " ");
|
||||
|
||||
/* Bit 9 has a different meaning on level 3 vs 4 */
|
||||
if (level <= 3) {
|
||||
if (pr & _PAGE_PSE)
|
||||
seq_printf(m, "PSE ");
|
||||
else
|
||||
seq_printf(m, " ");
|
||||
if (!pgprot_val(prot)) {
|
||||
/* Not present */
|
||||
seq_printf(m, " ");
|
||||
} else {
|
||||
if (pr & _PAGE_PAT)
|
||||
seq_printf(m, "pat ");
|
||||
if (pr & _PAGE_USER)
|
||||
seq_printf(m, "USR ");
|
||||
else
|
||||
seq_printf(m, " ");
|
||||
if (pr & _PAGE_RW)
|
||||
seq_printf(m, "RW ");
|
||||
else
|
||||
seq_printf(m, "ro ");
|
||||
if (pr & _PAGE_PWT)
|
||||
seq_printf(m, "PWT ");
|
||||
else
|
||||
seq_printf(m, " ");
|
||||
if (pr & _PAGE_PCD)
|
||||
seq_printf(m, "PCD ");
|
||||
else
|
||||
seq_printf(m, " ");
|
||||
|
||||
/* Bit 9 has a different meaning on level 3 vs 4 */
|
||||
if (level <= 3) {
|
||||
if (pr & _PAGE_PSE)
|
||||
seq_printf(m, "PSE ");
|
||||
else
|
||||
seq_printf(m, " ");
|
||||
} else {
|
||||
if (pr & _PAGE_PAT)
|
||||
seq_printf(m, "pat ");
|
||||
else
|
||||
seq_printf(m, " ");
|
||||
}
|
||||
if (pr & _PAGE_GLOBAL)
|
||||
seq_printf(m, "GLB ");
|
||||
else
|
||||
seq_printf(m, " ");
|
||||
if (pr & _PAGE_NX)
|
||||
seq_printf(m, "NX ");
|
||||
else
|
||||
seq_printf(m, "x ");
|
||||
}
|
||||
if (pr & _PAGE_GLOBAL)
|
||||
seq_printf(m, "GLB ");
|
||||
else
|
||||
seq_printf(m, " ");
|
||||
if (pr & _PAGE_NX)
|
||||
seq_printf(m, "NX ");
|
||||
else
|
||||
seq_printf(m, "x ");
|
||||
seq_printf(m, "%s\n", level_name[level]);
|
||||
}
|
||||
|
||||
/*
|
||||
* Sign-extend the 48 bit address to 64 bit
|
||||
* On 64 bits, sign-extend the 48 bit address to 64 bit
|
||||
*/
|
||||
static unsigned long sign_extend(unsigned long u)
|
||||
static unsigned long normalize_addr(unsigned long u)
|
||||
{
|
||||
if (u>>47)
|
||||
u = u | (0xffffUL << 48);
|
||||
#ifdef CONFIG_X86_64
|
||||
return (signed long)(u << 16) >> 16;
|
||||
#else
|
||||
return u;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -103,81 +138,62 @@ static unsigned long sign_extend(unsigned long u)
|
|||
* print what we collected so far.
|
||||
*/
|
||||
static void note_page(struct seq_file *m, struct pg_state *st,
|
||||
pgprot_t new_prot, int level)
|
||||
pgprot_t new_prot, int level)
|
||||
{
|
||||
unsigned long prot, cur;
|
||||
pgprotval_t prot, cur;
|
||||
static const char units[] = "KMGTPE";
|
||||
|
||||
/*
|
||||
* If we have a "break" in the series, we need to flush the state that
|
||||
* we have now. "break" is either changing perms or a different level.
|
||||
* we have now. "break" is either changing perms, levels or
|
||||
* address space marker.
|
||||
*/
|
||||
prot = pgprot_val(new_prot) & ~(PTE_MASK);
|
||||
cur = pgprot_val(st->current_prot) & ~(PTE_MASK);
|
||||
|
||||
if ((prot != cur || level != st->level) &&
|
||||
st->current_address != st->start_address) {
|
||||
char unit = 'K';
|
||||
if (!st->level) {
|
||||
/* First entry */
|
||||
st->current_prot = new_prot;
|
||||
st->level = level;
|
||||
st->marker = address_markers;
|
||||
seq_printf(m, "---[ %s ]---\n", st->marker->name);
|
||||
} else if (prot != cur || level != st->level ||
|
||||
st->current_address >= st->marker[1].start_address) {
|
||||
const char *unit = units;
|
||||
unsigned long delta;
|
||||
|
||||
/*
|
||||
* Now print the actual finished series
|
||||
*/
|
||||
seq_printf(m, "0x%p-0x%p ",
|
||||
(void *)st->start_address,
|
||||
(void *)st->current_address);
|
||||
|
||||
delta = (st->current_address - st->start_address) >> 10;
|
||||
while (!(delta & 1023) && unit[1]) {
|
||||
delta >>= 10;
|
||||
unit++;
|
||||
}
|
||||
seq_printf(m, "%9lu%c ", delta, *unit);
|
||||
printk_prot(m, st->current_prot, st->level);
|
||||
|
||||
/*
|
||||
* We print markers for special areas of address space,
|
||||
* such as the start of vmalloc space etc.
|
||||
* This helps in the interpretation.
|
||||
*/
|
||||
if (!st->printed_vmalloc &&
|
||||
st->start_address >= VMALLOC_START) {
|
||||
seq_printf(m, "---[ VMALLOC SPACE ]---\n");
|
||||
st->printed_vmalloc = 1;
|
||||
}
|
||||
if (!st->printed_modules &&
|
||||
st->start_address >= MODULES_VADDR) {
|
||||
seq_printf(m, "---[ MODULES SPACE ]---\n");
|
||||
st->printed_modules = 1;
|
||||
}
|
||||
if (st->printed_modules < 2 &&
|
||||
st->start_address >= MODULES_END) {
|
||||
seq_printf(m, "---[ END MODULES SPACE ]---\n");
|
||||
st->printed_modules = 2;
|
||||
}
|
||||
if (!st->printed_vmemmap &&
|
||||
st->start_address >= VMEMMAP_START) {
|
||||
seq_printf(m, "---[ VMMEMMAP SPACE ]---\n");
|
||||
st->printed_vmemmap = 1;
|
||||
}
|
||||
if (!st->printed_highmap &&
|
||||
st->start_address >= __START_KERNEL_map) {
|
||||
seq_printf(m, "---[ HIGH KERNEL MAPPING ]---\n");
|
||||
st->printed_highmap = 1;
|
||||
if (st->current_address >= st->marker[1].start_address) {
|
||||
st->marker++;
|
||||
seq_printf(m, "---[ %s ]---\n", st->marker->name);
|
||||
}
|
||||
|
||||
/*
|
||||
* Now print the actual finished series
|
||||
*/
|
||||
seq_printf(m, "[ %016lx - %016lx ",
|
||||
st->start_address, st->current_address);
|
||||
|
||||
delta = (st->current_address - st->start_address) >> 10;
|
||||
if ((delta & 1023) == 0) {
|
||||
delta = delta >> 10;
|
||||
unit = 'M';
|
||||
}
|
||||
if (pgprot_val(st->current_prot)) {
|
||||
seq_printf(m, "Size %9lu%cb ", delta, unit);
|
||||
printk_prot(m, st->current_prot, st->level);
|
||||
seq_printf(m, "L%i]\n", st->level);
|
||||
} else {
|
||||
/* don't print protections on non-present memory */
|
||||
seq_printf(m, "%14lu%cb", delta, unit);
|
||||
seq_printf(m, " L%i]\n",
|
||||
st->level);
|
||||
}
|
||||
st->start_address = st->current_address;
|
||||
st->current_prot = new_prot;
|
||||
st->level = level;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
static void walk_level_4(struct seq_file *m, struct pg_state *st, pmd_t addr,
|
||||
static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr,
|
||||
unsigned long P)
|
||||
{
|
||||
int i;
|
||||
|
@ -187,14 +203,15 @@ static void walk_level_4(struct seq_file *m, struct pg_state *st, pmd_t addr,
|
|||
for (i = 0; i < PTRS_PER_PTE; i++) {
|
||||
pgprot_t prot = pte_pgprot(*start);
|
||||
|
||||
st->current_address = sign_extend(P + i * LEVEL_4_MULT);
|
||||
st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT);
|
||||
note_page(m, st, prot, 4);
|
||||
start++;
|
||||
}
|
||||
}
|
||||
|
||||
#if PTRS_PER_PMD > 1
|
||||
|
||||
static void walk_level_3(struct seq_file *m, struct pg_state *st, pud_t addr,
|
||||
static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr,
|
||||
unsigned long P)
|
||||
{
|
||||
int i;
|
||||
|
@ -202,25 +219,30 @@ static void walk_level_3(struct seq_file *m, struct pg_state *st, pud_t addr,
|
|||
|
||||
start = (pmd_t *) pud_page_vaddr(addr);
|
||||
for (i = 0; i < PTRS_PER_PMD; i++) {
|
||||
st->current_address = sign_extend(P + i * LEVEL_3_MULT);
|
||||
st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT);
|
||||
if (!pmd_none(*start)) {
|
||||
unsigned long prot;
|
||||
pgprotval_t prot = pmd_val(*start) & ~PTE_MASK;
|
||||
|
||||
prot = pmd_val(*start) & ~(PTE_MASK);
|
||||
/* Deal with 2Mb pages */
|
||||
if (pmd_large(*start))
|
||||
if (pmd_large(*start) || !pmd_present(*start))
|
||||
note_page(m, st, __pgprot(prot), 3);
|
||||
else
|
||||
walk_level_4(m, st, *start,
|
||||
P + i * LEVEL_3_MULT);
|
||||
walk_pte_level(m, st, *start,
|
||||
P + i * PMD_LEVEL_MULT);
|
||||
} else
|
||||
note_page(m, st, __pgprot(0), 3);
|
||||
start++;
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
#define walk_pmd_level(m,s,a,p) walk_pte_level(m,s,__pmd(pud_val(a)),p)
|
||||
#define pud_large(a) pmd_large(__pmd(pud_val(a)))
|
||||
#define pud_none(a) pmd_none(__pmd(pud_val(a)))
|
||||
#endif
|
||||
|
||||
static void walk_level_2(struct seq_file *m, struct pg_state *st, pgd_t addr,
|
||||
#if PTRS_PER_PUD > 1
|
||||
|
||||
static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
|
||||
unsigned long P)
|
||||
{
|
||||
int i;
|
||||
|
@ -229,16 +251,15 @@ static void walk_level_2(struct seq_file *m, struct pg_state *st, pgd_t addr,
|
|||
start = (pud_t *) pgd_page_vaddr(addr);
|
||||
|
||||
for (i = 0; i < PTRS_PER_PUD; i++) {
|
||||
st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT);
|
||||
if (!pud_none(*start)) {
|
||||
unsigned long prot;
|
||||
pgprotval_t prot = pud_val(*start) & ~PTE_MASK;
|
||||
|
||||
prot = pud_val(*start) & ~(PTE_MASK);
|
||||
/* Deal with 1Gb pages */
|
||||
if (pud_large(*start))
|
||||
if (pud_large(*start) || !pud_present(*start))
|
||||
note_page(m, st, __pgprot(prot), 2);
|
||||
else
|
||||
walk_level_3(m, st, *start,
|
||||
P + i * LEVEL_2_MULT);
|
||||
walk_pmd_level(m, st, *start,
|
||||
P + i * PUD_LEVEL_MULT);
|
||||
} else
|
||||
note_page(m, st, __pgprot(0), 2);
|
||||
|
||||
|
@ -246,28 +267,48 @@ static void walk_level_2(struct seq_file *m, struct pg_state *st, pgd_t addr,
|
|||
}
|
||||
}
|
||||
|
||||
static void walk_level_1(struct seq_file *m)
|
||||
#else
|
||||
#define walk_pud_level(m,s,a,p) walk_pmd_level(m,s,__pud(pgd_val(a)),p)
|
||||
#define pgd_large(a) pud_large(__pud(pgd_val(a)))
|
||||
#define pgd_none(a) pud_none(__pud(pgd_val(a)))
|
||||
#endif
|
||||
|
||||
static void walk_pgd_level(struct seq_file *m)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
pgd_t *start = (pgd_t *) &init_level4_pgt;
|
||||
#else
|
||||
pgd_t *start = swapper_pg_dir;
|
||||
#endif
|
||||
int i;
|
||||
struct pg_state st;
|
||||
|
||||
memset(&st, 0, sizeof(st));
|
||||
st.level = 1;
|
||||
|
||||
for (i = 0; i < PTRS_PER_PGD; i++) {
|
||||
if (!pgd_none(*start))
|
||||
walk_level_2(m, &st, *start, i * LEVEL_1_MULT);
|
||||
else
|
||||
st.current_address = normalize_addr(i * PGD_LEVEL_MULT);
|
||||
if (!pgd_none(*start)) {
|
||||
pgprotval_t prot = pgd_val(*start) & ~PTE_MASK;
|
||||
|
||||
if (pgd_large(*start) || !pgd_present(*start))
|
||||
note_page(m, &st, __pgprot(prot), 1);
|
||||
else
|
||||
walk_pud_level(m, &st, *start,
|
||||
i * PGD_LEVEL_MULT);
|
||||
} else
|
||||
note_page(m, &st, __pgprot(0), 1);
|
||||
|
||||
start++;
|
||||
}
|
||||
|
||||
/* Flush out the last page */
|
||||
st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT);
|
||||
note_page(m, &st, __pgprot(0), 0);
|
||||
}
|
||||
|
||||
static int ptdump_show(struct seq_file *m, void *v)
|
||||
{
|
||||
seq_puts(m, "Kernel pagetable dump\n");
|
||||
walk_level_1(m);
|
||||
walk_pgd_level(m);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -287,6 +328,18 @@ int pt_dump_init(void)
|
|||
{
|
||||
struct dentry *pe;
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/* Not a compile-time constant on x86-32 */
|
||||
address_markers[2].start_address = VMALLOC_START;
|
||||
address_markers[3].start_address = VMALLOC_END;
|
||||
# ifdef CONFIG_HIGHMEM
|
||||
address_markers[4].start_address = PKMAP_BASE;
|
||||
address_markers[5].start_address = FIXADDR_START;
|
||||
# else
|
||||
address_markers[4].start_address = FIXADDR_START;
|
||||
# endif
|
||||
#endif
|
||||
|
||||
pe = debugfs_create_file("kernel_page_tables", 0600, NULL, NULL,
|
||||
&ptdump_fops);
|
||||
if (!pe)
|
||||
|
|
Loading…
Reference in a new issue