Check whether the TLB operations need broadcasting on SMP systems

ARMv7 SMP hardware can handle the TLB maintenance operations
broadcasting in hardware so that the software can avoid the costly IPIs.
This patch adds the necessary checks (the MMFR3 CPUID register) to avoid
the broadcasting if already supported by the hardware.

(this patch is based on the work done by Tony Thompson @ ARM)

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h
index 7b9d27e..b3e656c 100644
--- a/arch/arm/include/asm/cputype.h
+++ b/arch/arm/include/asm/cputype.h
@@ -8,6 +8,21 @@
 #define CPUID_TCM	2
 #define CPUID_TLBTYPE	3
 
+#define CPUID_EXT_PFR0	"c1, 0"
+#define CPUID_EXT_PFR1	"c1, 1"
+#define CPUID_EXT_DFR0	"c1, 2"
+#define CPUID_EXT_AFR0	"c1, 3"
+#define CPUID_EXT_MMFR0	"c1, 4"
+#define CPUID_EXT_MMFR1	"c1, 5"
+#define CPUID_EXT_MMFR2	"c1, 6"
+#define CPUID_EXT_MMFR3	"c1, 7"
+#define CPUID_EXT_ISAR0	"c2, 0"
+#define CPUID_EXT_ISAR1	"c2, 1"
+#define CPUID_EXT_ISAR2	"c2, 2"
+#define CPUID_EXT_ISAR3	"c2, 3"
+#define CPUID_EXT_ISAR4	"c2, 4"
+#define CPUID_EXT_ISAR5	"c2, 5"
+
 #ifdef CONFIG_CPU_CP15
 #define read_cpuid(reg)							\
 	({								\
@@ -18,9 +33,19 @@
 		    : "cc");						\
 		__val;							\
 	})
+#define read_cpuid_ext(ext_reg)						\
+	({								\
+		unsigned int __val;					\
+		asm("mrc	p15, 0, %0, c0, " ext_reg		\
+		    : "=r" (__val)					\
+		    :							\
+		    : "cc");						\
+		__val;							\
+	})
 #else
 extern unsigned int processor_id;
 #define read_cpuid(reg) (processor_id)
+#define read_cpuid_ext(reg) 0
 #endif
 
 /*
diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h
index a622180..c964f3f 100644
--- a/arch/arm/include/asm/tlbflush.h
+++ b/arch/arm/include/asm/tlbflush.h
@@ -40,6 +40,12 @@
 #define TLB_V6_I_ASID	(1 << 18)
 
 #define TLB_BTB		(1 << 28)
+
+/* Unified Inner Shareable TLB operations (ARMv7 MP extensions) */
+#define TLB_V7_UIS_PAGE	(1 << 19)
+#define TLB_V7_UIS_FULL (1 << 20)
+#define TLB_V7_UIS_ASID (1 << 21)
+
 #define TLB_L2CLEAN_FR	(1 << 29)		/* Feroceon */
 #define TLB_DCLEAN	(1 << 30)
 #define TLB_WB		(1 << 31)
@@ -176,9 +182,17 @@
 # define v6wbi_always_flags	(-1UL)
 #endif
 
+#ifdef CONFIG_SMP
+#define v7wbi_tlb_flags (TLB_WB | TLB_DCLEAN | TLB_BTB | \
+			 TLB_V7_UIS_FULL | TLB_V7_UIS_PAGE | TLB_V7_UIS_ASID)
+#else
+#define v7wbi_tlb_flags (TLB_WB | TLB_DCLEAN | TLB_BTB | \
+			 TLB_V6_U_FULL | TLB_V6_U_PAGE | TLB_V6_U_ASID)
+#endif
+
 #ifdef CONFIG_CPU_TLB_V7
-# define v7wbi_possible_flags	v6wbi_tlb_flags
-# define v7wbi_always_flags	v6wbi_tlb_flags
+# define v7wbi_possible_flags	v7wbi_tlb_flags
+# define v7wbi_always_flags	v7wbi_tlb_flags
 # ifdef _TLB
 #  define MULTI_TLB 1
 # else
@@ -316,6 +330,8 @@
 		asm("mcr p15, 0, %0, c8, c6, 0" : : "r" (zero) : "cc");
 	if (tlb_flag(TLB_V4_I_FULL | TLB_V6_I_FULL))
 		asm("mcr p15, 0, %0, c8, c5, 0" : : "r" (zero) : "cc");
+	if (tlb_flag(TLB_V7_UIS_FULL))
+		asm("mcr p15, 0, %0, c8, c3, 0" : : "r" (zero) : "cc");
 
 	if (tlb_flag(TLB_BTB)) {
 		/* flush the branch target cache */
@@ -351,6 +367,8 @@
 		asm("mcr p15, 0, %0, c8, c6, 2" : : "r" (asid) : "cc");
 	if (tlb_flag(TLB_V6_I_ASID))
 		asm("mcr p15, 0, %0, c8, c5, 2" : : "r" (asid) : "cc");
+	if (tlb_flag(TLB_V7_UIS_ASID))
+		asm("mcr p15, 0, %0, c8, c3, 2" : : "r" (asid) : "cc");
 
 	if (tlb_flag(TLB_BTB)) {
 		/* flush the branch target cache */
@@ -389,6 +407,8 @@
 		asm("mcr p15, 0, %0, c8, c6, 1" : : "r" (uaddr) : "cc");
 	if (tlb_flag(TLB_V6_I_PAGE))
 		asm("mcr p15, 0, %0, c8, c5, 1" : : "r" (uaddr) : "cc");
+	if (tlb_flag(TLB_V7_UIS_PAGE))
+		asm("mcr p15, 0, %0, c8, c3, 1" : : "r" (uaddr) : "cc");
 
 	if (tlb_flag(TLB_BTB)) {
 		/* flush the branch target cache */
@@ -424,6 +444,8 @@
 		asm("mcr p15, 0, %0, c8, c6, 1" : : "r" (kaddr) : "cc");
 	if (tlb_flag(TLB_V6_I_PAGE))
 		asm("mcr p15, 0, %0, c8, c5, 1" : : "r" (kaddr) : "cc");
+	if (tlb_flag(TLB_V7_UIS_PAGE))
+		asm("mcr p15, 0, %0, c8, c3, 1" : : "r" (kaddr) : "cc");
 
 	if (tlb_flag(TLB_BTB)) {
 		/* flush the branch target cache */