x86: clean up the page table dumper and add 32-bit support

Clean up the page table dumper (fix boundary conditions, table driven
address ranges, some formatting changes since it is no longer using
the kernel log but a separate virtual file), and generalize to 32
bits.

[ mingo@elte.hu: x86: fix the pagetable dumper ]

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index cb7002e..7ce8e70 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -56,7 +56,7 @@
 
 config X86_PTDUMP
 	bool "Export kernel pagetable layout to userspace via debugfs"
-	depends on X86_64
+	depends on DEBUG_KERNEL
 	select DEBUG_FS
 	help
 	  Say Y here if you want to show the kernel pagetable layout in a
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 28632f4..9ab9889 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -3,6 +3,7 @@
 obj-$(CONFIG_X86_32)		+= pgtable_32.o
 
 obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
+obj-$(CONFIG_X86_PTDUMP)	+= dump_pagetables.o
 
 obj-$(CONFIG_HIGHMEM)		+= highmem_32.o
 
@@ -12,5 +13,4 @@
 obj-$(CONFIG_NUMA)		+= numa_64.o
 obj-$(CONFIG_K8_NUMA)		+= k8topology_64.o
 obj-$(CONFIG_ACPI_NUMA)		+= srat_64.o
-obj-$(CONFIG_X86_PTDUMP)	+= dump_pagetables.o
 endif
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 5e7f643..6d84033 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -12,9 +12,10 @@
  * of the License.
  */
 
+#include <linux/debugfs.h>
+#include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/seq_file.h>
-#include <linux/debugfs.h>
 
 #include <asm/pgtable.h>
 
@@ -28,73 +29,107 @@
 	pgprot_t current_prot;
 	unsigned long start_address;
 	unsigned long current_address;
-	int printed_vmalloc;
-	int printed_modules;
-	int printed_vmemmap;
-	int printed_highmap;
+	const struct addr_marker *marker;
+};
+
+struct addr_marker {
+	unsigned long start_address;
+	const char *name;
+};
+
+/* Address space markers hints */
+static struct addr_marker address_markers[] = {
+	{ 0, "User Space" },
+#ifdef CONFIG_X86_64
+	{ 0x8000000000000000UL, "Kernel Space" },
+	{ 0xffff810000000000UL, "Low Kernel Mapping" },
+	{ VMALLOC_START,        "vmalloc() Area" },
+	{ MODULES_VADDR,        "Modules" },
+	{ MODULES_END,          "End Modules" },
+	{ VMEMMAP_START,        "Vmemmap" },
+	{ __START_KERNEL_map,   "High Kernel Mapping" },
+#else
+	{ PAGE_OFFSET,          "Kernel Mapping" },
+	{ 0/* VMALLOC_START */, "vmalloc() Area" },
+	{ 0/*VMALLOC_END*/,     "vmalloc() End" },
+# ifdef CONFIG_HIGHMEM
+	{ 0/*PKMAP_BASE*/,      "Persisent kmap() Area" },
+# endif
+	{ 0/*FIXADDR_START*/,   "Fixmap Area" },
+#endif
+	{ -1, NULL }		/* End of list */
 };
 
 /* Multipliers for offsets within the PTEs */
-#define LEVEL_4_MULT (PAGE_SIZE)
-#define LEVEL_3_MULT (512UL * LEVEL_4_MULT)
-#define LEVEL_2_MULT (512UL * LEVEL_3_MULT)
-#define LEVEL_1_MULT (512UL * LEVEL_2_MULT)
-
+#define PTE_LEVEL_MULT (PAGE_SIZE)
+#define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT)
+#define PUD_LEVEL_MULT (PTRS_PER_PMD * PMD_LEVEL_MULT)
+#define PGD_LEVEL_MULT (PTRS_PER_PUD * PUD_LEVEL_MULT)
 
 /*
  * Print a readable form of a pgprot_t to the seq_file
  */
 static void printk_prot(struct seq_file *m, pgprot_t prot, int level)
 {
-	unsigned long pr = pgprot_val(prot);
+	pgprotval_t pr = pgprot_val(prot);
+	static const char * const level_name[] =
+		{ "cr3", "pgd", "pud", "pmd", "pte" };
 
-	if (pr & _PAGE_USER)
-		seq_printf(m, "USR ");
-	else
-		seq_printf(m, "    ");
-	if (pr & _PAGE_RW)
-		seq_printf(m, "RW ");
-	else
-		seq_printf(m, "ro ");
-	if (pr & _PAGE_PWT)
-		seq_printf(m, "PWT ");
-	else
-		seq_printf(m, "    ");
-	if (pr & _PAGE_PCD)
-		seq_printf(m, "PCD ");
-	else
-		seq_printf(m, "    ");
-
-	/* Bit 9 has a different meaning on level 3 vs 4 */
-	if (level <= 3) {
-		if (pr & _PAGE_PSE)
-			seq_printf(m, "PSE ");
-		else
-			seq_printf(m, "    ");
+	if (!pgprot_val(prot)) {
+		/* Not present */
+		seq_printf(m, "                          ");
 	} else {
-		if (pr & _PAGE_PAT)
-			seq_printf(m, "pat ");
+		if (pr & _PAGE_USER)
+			seq_printf(m, "USR ");
 		else
 			seq_printf(m, "    ");
+		if (pr & _PAGE_RW)
+			seq_printf(m, "RW ");
+		else
+			seq_printf(m, "ro ");
+		if (pr & _PAGE_PWT)
+			seq_printf(m, "PWT ");
+		else
+			seq_printf(m, "    ");
+		if (pr & _PAGE_PCD)
+			seq_printf(m, "PCD ");
+		else
+			seq_printf(m, "    ");
+
+		/* Bit 9 has a different meaning on level 3 vs 4 */
+		if (level <= 3) {
+			if (pr & _PAGE_PSE)
+				seq_printf(m, "PSE ");
+			else
+				seq_printf(m, "    ");
+		} else {
+			if (pr & _PAGE_PAT)
+				seq_printf(m, "pat ");
+			else
+				seq_printf(m, "    ");
+		}
+		if (pr & _PAGE_GLOBAL)
+			seq_printf(m, "GLB ");
+		else
+			seq_printf(m, "    ");
+		if (pr & _PAGE_NX)
+			seq_printf(m, "NX ");
+		else
+			seq_printf(m, "x  ");
 	}
-	if (pr & _PAGE_GLOBAL)
-		seq_printf(m, "GLB ");
-	else
-		seq_printf(m, "    ");
-	if (pr & _PAGE_NX)
-		seq_printf(m, "NX ");
-	else
-		seq_printf(m, "x  ");
+	seq_printf(m, "%s\n", level_name[level]);
 }
 
 /*
- * Sign-extend the 48 bit address to 64 bit
+ * On 64 bits, sign-extend the 48 bit address to 64 bit
  */
-static unsigned long sign_extend(unsigned long u)
+static unsigned long normalize_addr(unsigned long u)
 {
-	if (u>>47)
-		u = u | (0xffffUL << 48);
+#ifdef CONFIG_X86_64
+	return (signed long)(u << 16) >> 16;
+#else
 	return u;
+#endif
 }
 
 /*
@@ -103,81 +138,62 @@
  * print what we collected so far.
  */
 static void note_page(struct seq_file *m, struct pg_state *st,
-					pgprot_t new_prot, int level)
+		      pgprot_t new_prot, int level)
 {
-	unsigned long prot, cur;
+	pgprotval_t prot, cur;
+	static const char units[] = "KMGTPE";
 
 	/*
 	 * If we have a "break" in the series, we need to flush the state that
-	 * we have now. "break" is either changing perms or a different level.
+	 * we have now. "break" is either changing perms, levels or
+	 * address space marker.
 	 */
 	prot = pgprot_val(new_prot) & ~(PTE_MASK);
 	cur = pgprot_val(st->current_prot) & ~(PTE_MASK);
 
-	if ((prot != cur || level != st->level) &&
-				st->current_address != st->start_address) {
-		char unit = 'K';
+	if (!st->level) {
+		/* First entry */
+		st->current_prot = new_prot;
+		st->level = level;
+		st->marker = address_markers;
+		seq_printf(m, "---[ %s ]---\n", st->marker->name);
+	} else if (prot != cur || level != st->level ||
+		   st->current_address >= st->marker[1].start_address) {
+		const char *unit = units;
 		unsigned long delta;
 
 		/*
+		 * Now print the actual finished series
+		 */
+		seq_printf(m, "0x%p-0x%p   ",
+			   (void *)st->start_address,
+			   (void *)st->current_address);
+
+		delta = (st->current_address - st->start_address) >> 10;
+		while (!(delta & 1023) && unit[1]) {
+			delta >>= 10;
+			unit++;
+		}
+		seq_printf(m, "%9lu%c ", delta, *unit);
+		printk_prot(m, st->current_prot, st->level);
+
+		/*
 		 * We print markers for special areas of address space,
 		 * such as the start of vmalloc space etc.
 		 * This helps in the interpretation.
 		 */
-		if (!st->printed_vmalloc &&
-				st->start_address >= VMALLOC_START) {
-			seq_printf(m, "---[ VMALLOC SPACE ]---\n");
-			st->printed_vmalloc = 1;
-		}
-		if (!st->printed_modules &&
-				st->start_address >= MODULES_VADDR) {
-			seq_printf(m, "---[ MODULES SPACE ]---\n");
-			st->printed_modules = 1;
-		}
-		if (st->printed_modules < 2 &&
-				st->start_address >= MODULES_END) {
-			seq_printf(m, "---[ END MODULES SPACE ]---\n");
-			st->printed_modules = 2;
-		}
-		if (!st->printed_vmemmap &&
-				st->start_address >= VMEMMAP_START) {
-			seq_printf(m, "---[ VMMEMMAP SPACE ]---\n");
-			st->printed_vmemmap = 1;
-		}
-		if (!st->printed_highmap &&
-				st->start_address >= __START_KERNEL_map) {
-			seq_printf(m, "---[ HIGH KERNEL MAPPING ]---\n");
-			st->printed_highmap = 1;
+		if (st->current_address >= st->marker[1].start_address) {
+			st->marker++;
+			seq_printf(m, "---[ %s ]---\n", st->marker->name);
 		}
 
-		/*
-		 * Now print the actual finished series
-		 */
-		seq_printf(m, "[ %016lx -  %016lx   ",
-				st->start_address, st->current_address);
-
-		delta = (st->current_address - st->start_address) >> 10;
-		if ((delta & 1023) == 0) {
-			delta = delta >> 10;
-			unit = 'M';
-		}
-		if (pgprot_val(st->current_prot)) {
-			seq_printf(m, "Size %9lu%cb ", delta, unit);
-			printk_prot(m, st->current_prot, st->level);
-			seq_printf(m, "L%i]\n", st->level);
-		} else {
-			/* don't print protections on non-present memory */
-			seq_printf(m, "%14lu%cb", delta, unit);
-			seq_printf(m, "                           L%i]\n",
-					st->level);
-		}
 		st->start_address = st->current_address;
 		st->current_prot = new_prot;
 		st->level = level;
-	};
+	}
 }
 
-static void walk_level_4(struct seq_file *m, struct pg_state *st, pmd_t addr,
+static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr,
 							unsigned long P)
 {
 	int i;
@@ -187,14 +203,15 @@
 	for (i = 0; i < PTRS_PER_PTE; i++) {
 		pgprot_t prot = pte_pgprot(*start);
 
-		st->current_address = sign_extend(P + i * LEVEL_4_MULT);
+		st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT);
 		note_page(m, st, prot, 4);
 		start++;
 	}
 }
 
+#if PTRS_PER_PMD > 1
 
-static void walk_level_3(struct seq_file *m, struct pg_state *st, pud_t addr,
+static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr,
 							unsigned long P)
 {
 	int i;
@@ -202,25 +219,30 @@
 
 	start = (pmd_t *) pud_page_vaddr(addr);
 	for (i = 0; i < PTRS_PER_PMD; i++) {
-		st->current_address = sign_extend(P + i * LEVEL_3_MULT);
+		st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT);
 		if (!pmd_none(*start)) {
-			unsigned long prot;
+			pgprotval_t prot = pmd_val(*start) & ~PTE_MASK;
 
-			prot = pmd_val(*start) & ~(PTE_MASK);
-			/* Deal with 2Mb pages */
-			if (pmd_large(*start))
+			if (pmd_large(*start) || !pmd_present(*start))
 				note_page(m, st, __pgprot(prot), 3);
 			else
-				walk_level_4(m, st, *start,
-							P + i * LEVEL_3_MULT);
+				walk_pte_level(m, st, *start,
+					       P + i * PMD_LEVEL_MULT);
 		} else
 			note_page(m, st, __pgprot(0), 3);
 		start++;
 	}
 }
 
+#else
+#define walk_pmd_level(m,s,a,p) walk_pte_level(m,s,__pmd(pud_val(a)),p)
+#define pud_large(a) pmd_large(__pmd(pud_val(a)))
+#define pud_none(a)  pmd_none(__pmd(pud_val(a)))
+#endif
 
-static void walk_level_2(struct seq_file *m, struct pg_state *st, pgd_t addr,
+#if PTRS_PER_PUD > 1
+
+static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
 							unsigned long P)
 {
 	int i;
@@ -229,16 +251,15 @@
 	start = (pud_t *) pgd_page_vaddr(addr);
 
 	for (i = 0; i < PTRS_PER_PUD; i++) {
+		st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT);
 		if (!pud_none(*start)) {
-			unsigned long prot;
+			pgprotval_t prot = pud_val(*start) & ~PTE_MASK;
 
-			prot = pud_val(*start) & ~(PTE_MASK);
-			/* Deal with 1Gb pages */
-			if (pud_large(*start))
+			if (pud_large(*start) || !pud_present(*start))
 				note_page(m, st, __pgprot(prot), 2);
 			else
-				walk_level_3(m, st, *start,
-					P + i * LEVEL_2_MULT);
+				walk_pmd_level(m, st, *start,
+					       P + i * PUD_LEVEL_MULT);
 		} else
 			note_page(m, st, __pgprot(0), 2);
 
@@ -246,28 +267,48 @@
 	}
 }
 
-static void walk_level_1(struct seq_file *m)
+#else
+#define walk_pud_level(m,s,a,p) walk_pmd_level(m,s,__pud(pgd_val(a)),p)
+#define pgd_large(a) pud_large(__pud(pgd_val(a)))
+#define pgd_none(a)  pud_none(__pud(pgd_val(a)))
+#endif
+
+static void walk_pgd_level(struct seq_file *m)
 {
+#ifdef CONFIG_X86_64
 	pgd_t *start = (pgd_t *) &init_level4_pgt;
+#else
+	pgd_t *start = swapper_pg_dir;
+#endif
 	int i;
 	struct pg_state st;
 
 	memset(&st, 0, sizeof(st));
-	st.level = 1;
 
 	for (i = 0; i < PTRS_PER_PGD; i++) {
-		if (!pgd_none(*start))
-			walk_level_2(m, &st, *start, i * LEVEL_1_MULT);
-		else
+		st.current_address = normalize_addr(i * PGD_LEVEL_MULT);
+		if (!pgd_none(*start)) {
+			pgprotval_t prot = pgd_val(*start) & ~PTE_MASK;
+
+			if (pgd_large(*start) || !pgd_present(*start))
+				note_page(m, &st, __pgprot(prot), 1);
+			else
+				walk_pud_level(m, &st, *start,
+					       i * PGD_LEVEL_MULT);
+		} else
 			note_page(m, &st, __pgprot(0), 1);
+
 		start++;
 	}
+
+	/* Flush out the last page */
+	st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT);
+	note_page(m, &st, __pgprot(0), 0);
 }
 
 static int ptdump_show(struct seq_file *m, void *v)
 {
-	seq_puts(m, "Kernel pagetable dump\n");
-	walk_level_1(m);
+	walk_pgd_level(m);
 	return 0;
 }
 
@@ -287,6 +328,18 @@
 {
 	struct dentry *pe;
 
+#ifdef CONFIG_X86_32
+	/* Not a compile-time constant on x86-32 */
+	address_markers[2].start_address = VMALLOC_START;
+	address_markers[3].start_address = VMALLOC_END;
+# ifdef CONFIG_HIGHMEM
+	address_markers[4].start_address = PKMAP_BASE;
+	address_markers[5].start_address = FIXADDR_START;
+# else
+	address_markers[4].start_address = FIXADDR_START;
+# endif
+#endif
+
 	pe = debugfs_create_file("kernel_page_tables", 0600, NULL, NULL,
 				 &ptdump_fops);
 	if (!pe)