[ARM] Introduce new PTE memory type bits

Provide L_PTE_MT_xxx definitions to describe the memory types that we
use in Linux/ARM.  These definitions are carefully picked such that:

1. their LSBs match what is required for pre-ARMv6 CPUs.
2. they all have a unique encoding, including after modification
   by build_mem_type_table() (the result being that some have more
   than one combination.)

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 5c75e02..8df2e25 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -164,14 +164,35 @@
 #define L_PTE_PRESENT		(1 << 0)
 #define L_PTE_FILE		(1 << 1)	/* only when !PRESENT */
 #define L_PTE_YOUNG		(1 << 1)
-#define L_PTE_BUFFERABLE	(1 << 2)	/* matches PTE */
-#define L_PTE_CACHEABLE		(1 << 3)	/* matches PTE */
+#define L_PTE_BUFFERABLE	(1 << 2)	/* obsolete, matches PTE */
+#define L_PTE_CACHEABLE		(1 << 3)	/* obsolete, matches PTE */
 #define L_PTE_DIRTY		(1 << 6)
 #define L_PTE_WRITE		(1 << 7)
 #define L_PTE_USER		(1 << 8)
 #define L_PTE_EXEC		(1 << 9)
 #define L_PTE_SHARED		(1 << 10)	/* shared(v6), coherent(xsc3) */
 
+/*
+ * These are the memory types, defined to be compatible with
+ * pre-ARMv6 CPUs cacheable and bufferable bits:   XXCB
+ * (note: build_mem_type_table modifies these bits
+ * to work with our existing proc-*.S setup.)
+ */
+#define L_PTE_MT_UNCACHED	(0x00 << 2)	/* 0000 */
+#define L_PTE_MT_BUFFERABLE	(0x01 << 2)	/* 0001 */
+#define L_PTE_MT_WRITETHROUGH	(0x02 << 2)	/* 0010 */
+#define L_PTE_MT_WRITEBACK	(0x03 << 2)	/* 0011 */
+#define L_PTE_MT_MINICACHE	(0x06 << 2)	/* 0110 (sa1100, xscale) */
+#define L_PTE_MT_WRITEALLOC	(0x07 << 2)	/* 0111 */
+#define L_PTE_MT_DEV_SHARED	(0x04 << 2)	/* 0100 (pre-v6) */
+#define L_PTE_MT_DEV_SHARED2	(0x05 << 2)	/* 0101 (v6) */
+#define L_PTE_MT_DEV_NONSHARED	(0x0c << 2)	/* 1100 */
+#define L_PTE_MT_DEV_IXP2000	(0x0d << 2)	/* 1101 */
+#define L_PTE_MT_DEV_WC		(0x09 << 2)	/* 1001 (pre-v6, !xsc3) */
+#define L_PTE_MT_DEV_WC2	(0x08 << 2)	/* 1000 (xsc3, v6) */
+#define L_PTE_MT_DEV_CACHED	(0x0b << 2)	/* 1011 */
+#define L_PTE_MT_MASK		(0x0f << 2)
+
 #ifndef __ASSEMBLY__
 
 /*
@@ -180,7 +201,7 @@
  * as well as any architecture dependent bits like global/ASID and SMP
  * shared mapping bits.
  */
-#define _L_PTE_DEFAULT	L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_CACHEABLE | L_PTE_BUFFERABLE
+#define _L_PTE_DEFAULT	L_PTE_PRESENT | L_PTE_YOUNG
 #define _L_PTE_READ	L_PTE_USER | L_PTE_EXEC
 
 extern pgprot_t		pgprot_user;
@@ -286,8 +307,10 @@
 /*
  * Mark the prot value as uncacheable and unbufferable.
  */
-#define pgprot_noncached(prot)	__pgprot(pgprot_val(prot) & ~(L_PTE_CACHEABLE | L_PTE_BUFFERABLE))
-#define pgprot_writecombine(prot) __pgprot(pgprot_val(prot) & ~L_PTE_CACHEABLE)
+#define pgprot_noncached(prot) \
+	__pgprot((pgprot_val(prot) & ~L_PTE_MT_MASK) | L_PTE_MT_UNCACHED)
+#define pgprot_writecombine(prot) \
+	__pgprot((pgprot_val(prot) & ~L_PTE_MT_MASK) | L_PTE_MT_BUFFERABLE)
 
 #define pmd_none(pmd)		(!pmd_val(pmd))
 #define pmd_present(pmd)	(pmd_val(pmd))
diff --git a/arch/arm/mm/copypage-v4mc.c b/arch/arm/mm/copypage-v4mc.c
index ded0e96..8d33e25 100644
--- a/arch/arm/mm/copypage-v4mc.c
+++ b/arch/arm/mm/copypage-v4mc.c
@@ -28,7 +28,7 @@
  * specific hacks for copying pages efficiently.
  */
 #define minicache_pgprot __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | \
-				  L_PTE_CACHEABLE)
+				  L_PTE_MT_MINICACHE)
 
 static DEFINE_SPINLOCK(minicache_lock);
 
diff --git a/arch/arm/mm/copypage-xscale.c b/arch/arm/mm/copypage-xscale.c
index 2e455f8..bad4933 100644
--- a/arch/arm/mm/copypage-xscale.c
+++ b/arch/arm/mm/copypage-xscale.c
@@ -30,7 +30,7 @@
 #define COPYPAGE_MINICACHE	0xffff8000
 
 #define minicache_pgprot __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | \
-				  L_PTE_CACHEABLE)
+				  L_PTE_MT_MINICACHE)
 
 static DEFINE_SPINLOCK(minicache_lock);
 
diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c
index a8ec97b..6f92904 100644
--- a/arch/arm/mm/fault-armv.c
+++ b/arch/arm/mm/fault-armv.c
@@ -21,7 +21,7 @@
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 
-static unsigned long shared_pte_mask = L_PTE_CACHEABLE;
+static unsigned long shared_pte_mask = L_PTE_MT_BUFFERABLE;
 
 /*
  * We take the easy way out of this problem - we make the
@@ -63,9 +63,10 @@
 	 * If this page isn't present, or is already setup to
 	 * fault (ie, is old), we can safely ignore any issues.
 	 */
-	if (ret && pte_val(entry) & shared_pte_mask) {
+	if (ret && (pte_val(entry) & L_PTE_MT_MASK) != shared_pte_mask) {
 		flush_cache_page(vma, address, pte_pfn(entry));
-		pte_val(entry) &= ~shared_pte_mask;
+		pte_val(entry) &= ~L_PTE_MT_MASK;
+		pte_val(entry) |= shared_pte_mask;
 		set_pte_at(vma->vm_mm, address, pte, entry);
 		flush_tlb_page(vma, address);
 	}
@@ -197,7 +198,7 @@
 		unsigned long *p1, *p2;
 		pgprot_t prot = __pgprot(L_PTE_PRESENT|L_PTE_YOUNG|
 					 L_PTE_DIRTY|L_PTE_WRITE|
-					 L_PTE_BUFFERABLE);
+					 L_PTE_MT_BUFFERABLE);
 
 		p1 = vmap(&page, 1, VM_IOREMAP, prot);
 		p2 = vmap(&page, 1, VM_IOREMAP, prot);
@@ -218,7 +219,7 @@
 
 	if (v) {
 		printk("failed, %s\n", reason);
-		shared_pte_mask |= L_PTE_BUFFERABLE;
+		shared_pte_mask = L_PTE_MT_UNCACHED;
 	} else {
 		printk("ok\n");
 	}
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index a713e40..cfc0add 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -68,27 +68,27 @@
 		.policy		= "uncached",
 		.cr_mask	= CR_W|CR_C,
 		.pmd		= PMD_SECT_UNCACHED,
-		.pte		= 0,
+		.pte		= L_PTE_MT_UNCACHED,
 	}, {
 		.policy		= "buffered",
 		.cr_mask	= CR_C,
 		.pmd		= PMD_SECT_BUFFERED,
-		.pte		= PTE_BUFFERABLE,
+		.pte		= L_PTE_MT_BUFFERABLE,
 	}, {
 		.policy		= "writethrough",
 		.cr_mask	= 0,
 		.pmd		= PMD_SECT_WT,
-		.pte		= PTE_CACHEABLE,
+		.pte		= L_PTE_MT_WRITETHROUGH,
 	}, {
 		.policy		= "writeback",
 		.cr_mask	= 0,
 		.pmd		= PMD_SECT_WB,
-		.pte		= PTE_BUFFERABLE|PTE_CACHEABLE,
+		.pte		= L_PTE_MT_WRITEBACK,
 	}, {
 		.policy		= "writealloc",
 		.cr_mask	= 0,
 		.pmd		= PMD_SECT_WBWA,
-		.pte		= PTE_BUFFERABLE|PTE_CACHEABLE,
+		.pte		= L_PTE_MT_WRITEALLOC,
 	}
 };
 
@@ -186,35 +186,36 @@
 
 static struct mem_type mem_types[] = {
 	[MT_DEVICE] = {		  /* Strongly ordered / ARMv6 shared device */
-		.prot_pte	= PROT_PTE_DEVICE,
+		.prot_pte	= PROT_PTE_DEVICE | L_PTE_MT_DEV_SHARED |
+				  L_PTE_SHARED,
 		.prot_l1	= PMD_TYPE_TABLE,
 		.prot_sect	= PROT_SECT_DEVICE | PMD_SECT_UNCACHED,
 		.domain		= DOMAIN_IO,
 	},
 	[MT_DEVICE_NONSHARED] = { /* ARMv6 non-shared device */
-		.prot_pte	= PROT_PTE_DEVICE,
+		.prot_pte	= PROT_PTE_DEVICE | L_PTE_MT_DEV_NONSHARED,
 		.prot_pte_ext	= PTE_EXT_TEX(2),
 		.prot_l1	= PMD_TYPE_TABLE,
 		.prot_sect	= PROT_SECT_DEVICE | PMD_SECT_TEX(2),
 		.domain		= DOMAIN_IO,
 	},
 	[MT_DEVICE_CACHED] = {	  /* ioremap_cached */
-		.prot_pte	= PROT_PTE_DEVICE | L_PTE_CACHEABLE | L_PTE_BUFFERABLE,
+		.prot_pte	= PROT_PTE_DEVICE | L_PTE_MT_DEV_CACHED,
 		.prot_l1	= PMD_TYPE_TABLE,
 		.prot_sect	= PROT_SECT_DEVICE | PMD_SECT_WB,
 		.domain		= DOMAIN_IO,
 	},	
 	[MT_DEVICE_IXP2000] = {	  /* IXP2400 requires XCB=101 for on-chip I/O */
-		.prot_pte	= PROT_PTE_DEVICE,
+		.prot_pte	= PROT_PTE_DEVICE | L_PTE_MT_DEV_IXP2000,
 		.prot_l1	= PMD_TYPE_TABLE,
 		.prot_sect	= PROT_SECT_DEVICE | PMD_SECT_BUFFERABLE |
 				  PMD_SECT_TEX(1),
 		.domain		= DOMAIN_IO,
 	},
 	[MT_DEVICE_WC] = {	/* ioremap_wc */
-		.prot_pte	= PROT_PTE_DEVICE,
+		.prot_pte	= PROT_PTE_DEVICE | L_PTE_MT_DEV_WC,
 		.prot_l1	= PMD_TYPE_TABLE,
-		.prot_sect	= PROT_SECT_DEVICE,
+		.prot_sect	= PROT_SECT_DEVICE | PMD_SECT_BUFFERABLE,
 		.domain		= DOMAIN_IO,
 	},
 	[MT_CACHECLEAN] = {
@@ -259,7 +260,7 @@
 {
 	struct cachepolicy *cp;
 	unsigned int cr = get_cr();
-	unsigned int user_pgprot, kern_pgprot;
+	unsigned int user_pgprot, kern_pgprot, vecs_pgprot;
 	int cpu_arch = cpu_architecture();
 	int i;
 
@@ -277,6 +278,9 @@
 			cachepolicy = CPOLICY_WRITEBACK;
 		ecc_mask = 0;
 	}
+#ifdef CONFIG_SMP
+	cachepolicy = CPOLICY_WRITEALLOC;
+#endif
 
 	/*
 	 * On non-Xscale3 ARMv5-and-older systems, use CB=01
@@ -286,10 +290,9 @@
 	 */
 	if (cpu_is_xsc3() || cpu_arch >= CPU_ARCH_ARMv6) {
 		mem_types[MT_DEVICE_WC].prot_pte_ext |= PTE_EXT_TEX(1);
+		mem_types[MT_DEVICE_WC].prot_pte &= ~L_PTE_BUFFERABLE;
 		mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1);
-	} else {
-		mem_types[MT_DEVICE_WC].prot_pte |= L_PTE_BUFFERABLE;
-		mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_BUFFERABLE;
+		mem_types[MT_DEVICE_WC].prot_sect &= ~PMD_SECT_BUFFERABLE;
 	}
 
 	/*
@@ -312,7 +315,15 @@
 	}
 
 	cp = &cache_policies[cachepolicy];
-	kern_pgprot = user_pgprot = cp->pte;
+	vecs_pgprot = kern_pgprot = user_pgprot = cp->pte;
+
+#ifndef CONFIG_SMP
+	/*
+	 * Only use write-through for non-SMP systems
+	 */
+	if (cpu_arch >= CPU_ARCH_ARMv5 && cachepolicy > CPOLICY_WRITETHROUGH)
+		vecs_pgprot = cache_policies[CPOLICY_WRITETHROUGH].pte;
+#endif
 
 	/*
 	 * Enable CPU-specific coherency if supported.
@@ -349,30 +360,21 @@
 		 */
 		user_pgprot |= L_PTE_SHARED;
 		kern_pgprot |= L_PTE_SHARED;
+		vecs_pgprot |= L_PTE_SHARED;
 		mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S;
 #endif
 	}
 
 	for (i = 0; i < 16; i++) {
 		unsigned long v = pgprot_val(protection_map[i]);
-		v = (v & ~(L_PTE_BUFFERABLE|L_PTE_CACHEABLE)) | user_pgprot;
-		protection_map[i] = __pgprot(v);
+		protection_map[i] = __pgprot(v | user_pgprot);
 	}
 
-	mem_types[MT_LOW_VECTORS].prot_pte |= kern_pgprot;
-	mem_types[MT_HIGH_VECTORS].prot_pte |= kern_pgprot;
+	mem_types[MT_LOW_VECTORS].prot_pte |= vecs_pgprot;
+	mem_types[MT_HIGH_VECTORS].prot_pte |= vecs_pgprot;
 
-	if (cpu_arch >= CPU_ARCH_ARMv5) {
-#ifndef CONFIG_SMP
-		/*
-		 * Only use write-through for non-SMP systems
-		 */
-		mem_types[MT_LOW_VECTORS].prot_pte &= ~L_PTE_BUFFERABLE;
-		mem_types[MT_HIGH_VECTORS].prot_pte &= ~L_PTE_BUFFERABLE;
-#endif
-	} else {
+	if (cpu_arch < CPU_ARCH_ARMv5)
 		mem_types[MT_MINICLEAN].prot_sect &= ~PMD_SECT_TEX(1);
-	}
 
 	pgprot_user   = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | user_pgprot);
 	pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG |