[ARM] Feroceon: L2 cache support

This patch adds support for the unified Feroceon L2 cache controller
as found in e.g. the Marvell Kirkwood and Marvell Discovery Duo
families of ARM SoCs.

Note that:

- Page table walks are outer uncacheable on Kirkwood and Discovery
  Duo, since the ARMv5 spec provides no way to indicate outer
  cacheability of page table walks (specifying it in TTBR[4:3] is
  an ARMv6+ feature).

  This requires adding L2 cache clean instructions to
  proc-feroceon.S (dcache_clean_area(), set_pte()) as well as to
  tlbflush.h ({flush,clean}_pmd_entry()).  The latter case is handled
  by defining a new TLB type (TLB_FEROCEON) which is almost identical
  to the v4wbi one but provides a TLB_L2CLEAN_FR flag.

- The Feroceon L2 cache controller supports L2 range (i.e. 'clean L2
  range by MVA' and 'invalidate L2 range by MVA') operations, and this
  patch uses those range operations for all Linux outer cache
  operations, as they are faster than the regular per-line operations.

  L2 range operations are not interruptible on this hardware, which
  avoids potential livelock issues, but can be bad for interrupt
  latency, so there is a compile-time tunable (MAX_RANGE_SIZE) which
  allows you to select the maximum range size to operate on at once.
  (Valid range is between one cache line and one 4KiB page, and must
  be a multiple of the line size.)

Signed-off-by: Lennert Buytenhek <buytenh@marvell.com>
diff --git a/include/asm-arm/tlbflush.h b/include/asm-arm/tlbflush.h
index 8c6bc1b..909656c 100644
--- a/include/asm-arm/tlbflush.h
+++ b/include/asm-arm/tlbflush.h
@@ -39,6 +39,7 @@
 #define TLB_V6_D_ASID	(1 << 17)
 #define TLB_V6_I_ASID	(1 << 18)
 
+#define TLB_L2CLEAN_FR	(1 << 29)		/* Feroceon */
 #define TLB_DCLEAN	(1 << 30)
 #define TLB_WB		(1 << 31)
 
@@ -51,6 +52,7 @@
  *	  v4    - ARMv4 without write buffer
  *	  v4wb  - ARMv4 with write buffer without I TLB flush entry instruction
  *	  v4wbi - ARMv4 with write buffer with I TLB flush entry instruction
+ *	  fr    - Feroceon (v4wbi with non-outer-cacheable page table walks)
  *	  v6wbi - ARMv6 with write buffer with I TLB flush entry instruction
  */
 #undef _TLB
@@ -103,6 +105,23 @@
 # define v4wbi_always_flags	(-1UL)
 #endif
 
+#define fr_tlb_flags	(TLB_WB | TLB_DCLEAN | TLB_L2CLEAN_FR | \
+			 TLB_V4_I_FULL | TLB_V4_D_FULL | \
+			 TLB_V4_I_PAGE | TLB_V4_D_PAGE)
+
+#ifdef CONFIG_CPU_TLB_FEROCEON
+# define fr_possible_flags	fr_tlb_flags
+# define fr_always_flags	fr_tlb_flags
+# ifdef _TLB
+#  define MULTI_TLB 1
+# else
+#  define _TLB v4wbi
+# endif
+#else
+# define fr_possible_flags	0
+# define fr_always_flags	(-1UL)
+#endif
+
 #define v4wb_tlb_flags	(TLB_WB | TLB_DCLEAN | \
 			 TLB_V4_I_FULL | TLB_V4_D_FULL | \
 			 TLB_V4_D_PAGE)
@@ -245,12 +264,14 @@
 #define possible_tlb_flags	(v3_possible_flags | \
 				 v4_possible_flags | \
 				 v4wbi_possible_flags | \
+				 fr_possible_flags | \
 				 v4wb_possible_flags | \
 				 v6wbi_possible_flags)
 
 #define always_tlb_flags	(v3_always_flags & \
 				 v4_always_flags & \
 				 v4wbi_always_flags & \
+				 fr_always_flags & \
 				 v4wb_always_flags & \
 				 v6wbi_always_flags)
 
@@ -417,6 +438,11 @@
 	if (tlb_flag(TLB_DCLEAN))
 		asm("mcr	p15, 0, %0, c7, c10, 1	@ flush_pmd"
 			: : "r" (pmd) : "cc");
+
+	if (tlb_flag(TLB_L2CLEAN_FR))
+		asm("mcr	p15, 1, %0, c15, c9, 1  @ L2 flush_pmd"
+			: : "r" (pmd) : "cc");
+
 	if (tlb_flag(TLB_WB))
 		dsb();
 }
@@ -428,6 +454,10 @@
 	if (tlb_flag(TLB_DCLEAN))
 		asm("mcr	p15, 0, %0, c7, c10, 1	@ flush_pmd"
 			: : "r" (pmd) : "cc");
+
+	if (tlb_flag(TLB_L2CLEAN_FR))
+		asm("mcr	p15, 1, %0, c15, c9, 1  @ L2 flush_pmd"
+			: : "r" (pmd) : "cc");
 }
 
 #undef tlb_flag