Merge branch 'devel-stable' into for-next
diff --git a/Documentation/arm/memory.txt b/Documentation/arm/memory.txt
index 4bfb9ff..38dc06d 100644
--- a/Documentation/arm/memory.txt
+++ b/Documentation/arm/memory.txt
@@ -41,16 +41,9 @@
 fffe0000	fffe7fff	ITCM mapping area for platforms with
 				ITCM mounted inside the CPU.
 
-fff00000	fffdffff	Fixmap mapping region.  Addresses provided
+ffc00000	ffdfffff	Fixmap mapping region.  Addresses provided
 				by fix_to_virt() will be located here.
 
-ffc00000	ffefffff	DMA memory mapping region.  Memory returned
-				by the dma_alloc_xxx functions will be
-				dynamically mapped here.
-
-ff000000	ffbfffff	Reserved for future expansion of DMA
-				mapping region.
-
 fee00000	feffffff	Mapping of PCI I/O space. This is a static
 				mapping within the vmalloc space.
 
diff --git a/Documentation/devicetree/bindings/arm/pmu.txt b/Documentation/devicetree/bindings/arm/pmu.txt
index fe5cef8..75ef91d 100644
--- a/Documentation/devicetree/bindings/arm/pmu.txt
+++ b/Documentation/devicetree/bindings/arm/pmu.txt
@@ -8,6 +8,7 @@
 
 - compatible : should be one of
 	"arm,armv8-pmuv3"
+	"arm,cortex-a17-pmu"
 	"arm,cortex-a15-pmu"
 	"arm,cortex-a12-pmu"
 	"arm,cortex-a9-pmu"
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index db3c541..8615dfa 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -165,12 +165,9 @@
 	bool
 	default y
 
-config RWSEM_GENERIC_SPINLOCK
-	bool
-	default y
-
 config RWSEM_XCHGADD_ALGORITHM
 	bool
+	default y
 
 config ARCH_HAS_ILOG2_U32
 	bool
@@ -1105,11 +1102,6 @@
 
 source arch/arm/mm/Kconfig
 
-config ARM_NR_BANKS
-	int
-	default 16 if ARCH_EP93XX
-	default 8
-
 config IWMMXT
 	bool "Enable iWMMXt support"
 	depends on CPU_XSCALE || CPU_XSC3 || CPU_MOHAWK || CPU_PJ4 || CPU_PJ4B
@@ -1230,19 +1222,6 @@
 	  register of the Cortex-A9 which reduces the linefill issuing
 	  capabilities of the processor.
 
-config PL310_ERRATA_588369
-	bool "PL310 errata: Clean & Invalidate maintenance operations do not invalidate clean lines"
-	depends on CACHE_L2X0
-	help
-	   The PL310 L2 cache controller implements three types of Clean &
-	   Invalidate maintenance operations: by Physical Address
-	   (offset 0x7F0), by Index/Way (0x7F8) and by Way (0x7FC).
-	   They are architecturally defined to behave as the execution of a
-	   clean operation followed immediately by an invalidate operation,
-	   both performing to the same memory location. This functionality
-	   is not correctly implemented in PL310 as clean lines are not
-	   invalidated as a result of these operations.
-
 config ARM_ERRATA_643719
 	bool "ARM errata: LoUIS bit field in CLIDR register is incorrect"
 	depends on CPU_V7 && SMP
@@ -1265,17 +1244,6 @@
 	  tables. The workaround changes the TLB flushing routines to invalidate
 	  entries regardless of the ASID.
 
-config PL310_ERRATA_727915
-	bool "PL310 errata: Background Clean & Invalidate by Way operation can cause data corruption"
-	depends on CACHE_L2X0
-	help
-	  PL310 implements the Clean & Invalidate by Way L2 cache maintenance
-	  operation (offset 0x7FC). This operation runs in background so that
-	  PL310 can handle normal accesses while it is in progress. Under very
-	  rare circumstances, due to this erratum, write data can be lost when
-	  PL310 treats a cacheable write transaction during a Clean &
-	  Invalidate by Way operation.
-
 config ARM_ERRATA_743622
 	bool "ARM errata: Faulty hazard checking in the Store Buffer may lead to data corruption"
 	depends on CPU_V7
@@ -1301,21 +1269,6 @@
 	  operation is received by a CPU before the ICIALLUIS has completed,
 	  potentially leading to corrupted entries in the cache or TLB.
 
-config PL310_ERRATA_753970
-	bool "PL310 errata: cache sync operation may be faulty"
-	depends on CACHE_PL310
-	help
-	  This option enables the workaround for the 753970 PL310 (r3p0) erratum.
-
-	  Under some condition the effect of cache sync operation on
-	  the store buffer still remains when the operation completes.
-	  This means that the store buffer is always asked to drain and
-	  this prevents it from merging any further writes. The workaround
-	  is to replace the normal offset of cache sync operation (0x730)
-	  by another offset targeting an unmapped PL310 register 0x740.
-	  This has the same effect as the cache sync operation: store buffer
-	  drain and waiting for all buffers empty.
-
 config ARM_ERRATA_754322
 	bool "ARM errata: possible faulty MMU translations following an ASID switch"
 	depends on CPU_V7
@@ -1364,18 +1317,6 @@
 	  relevant cache maintenance functions and sets a specific bit
 	  in the diagnostic control register of the SCU.
 
-config PL310_ERRATA_769419
-	bool "PL310 errata: no automatic Store Buffer drain"
-	depends on CACHE_L2X0
-	help
-	  On revisions of the PL310 prior to r3p2, the Store Buffer does
-	  not automatically drain. This can cause normal, non-cacheable
-	  writes to be retained when the memory system is idle, leading
-	  to suboptimal I/O performance for drivers using coherent DMA.
-	  This option adds a write barrier to the cpu_idle loop so that,
-	  on systems with an outer cache, the store buffer is drained
-	  explicitly.
-
 config ARM_ERRATA_775420
        bool "ARM errata: A data cache maintenance operation which aborts, might lead to deadlock"
        depends on CPU_V7
@@ -2295,6 +2236,11 @@
 config ARM_CPU_SUSPEND
 	def_bool PM_SLEEP
 
+config ARCH_HIBERNATION_POSSIBLE
+	bool
+	depends on MMU
+	default y if ARCH_SUSPEND_POSSIBLE
+
 endmenu
 
 source "net/Kconfig"
diff --git a/arch/arm/boot/compressed/atags_to_fdt.c b/arch/arm/boot/compressed/atags_to_fdt.c
index d1153c8..9448aa0 100644
--- a/arch/arm/boot/compressed/atags_to_fdt.c
+++ b/arch/arm/boot/compressed/atags_to_fdt.c
@@ -7,6 +7,8 @@
 #define do_extend_cmdline 0
 #endif
 
+#define NR_BANKS 16
+
 static int node_offset(void *fdt, const char *node_path)
 {
 	int offset = fdt_path_offset(fdt, node_path);
diff --git a/arch/arm/boot/dts/marco.dtsi b/arch/arm/boot/dts/marco.dtsi
index 0c9647d..fb35422 100644
--- a/arch/arm/boot/dts/marco.dtsi
+++ b/arch/arm/boot/dts/marco.dtsi
@@ -36,7 +36,7 @@
 		ranges = <0x40000000 0x40000000 0xa0000000>;
 
 		l2-cache-controller@c0030000 {
-			compatible = "sirf,marco-pl310-cache", "arm,pl310-cache";
+			compatible = "arm,pl310-cache";
 			reg = <0xc0030000 0x1000>;
 			interrupts = <0 59 0>;
 			arm,tag-latency = <1 1 1>;
diff --git a/arch/arm/boot/dts/prima2.dtsi b/arch/arm/boot/dts/prima2.dtsi
index 1e82571..0d6588d 100644
--- a/arch/arm/boot/dts/prima2.dtsi
+++ b/arch/arm/boot/dts/prima2.dtsi
@@ -48,7 +48,7 @@
 		ranges = <0x40000000 0x40000000 0x80000000>;
 
 		l2-cache-controller@80040000 {
-			compatible = "arm,pl310-cache", "sirf,prima2-pl310-cache";
+			compatible = "arm,pl310-cache";
 			reg = <0x80040000 0x1000>;
 			interrupts = <59>;
 			arm,tag-latency = <1 1 1>;
diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c
index f01c0ee..490f3dc 100644
--- a/arch/arm/common/bL_switcher.c
+++ b/arch/arm/common/bL_switcher.c
@@ -433,8 +433,12 @@
 {
 	int i;
 
-	for_each_cpu(i, &bL_switcher_removed_logical_cpus)
-		cpu_up(i);
+	for_each_cpu(i, &bL_switcher_removed_logical_cpus) {
+		struct device *cpu_dev = get_cpu_device(i);
+		int ret = device_online(cpu_dev);
+		if (ret)
+			dev_err(cpu_dev, "switcher: unable to restore CPU\n");
+	}
 }
 
 static int bL_switcher_halve_cpus(void)
@@ -521,7 +525,7 @@
 			continue;
 		}
 
-		ret = cpu_down(i);
+		ret = device_offline(get_cpu_device(i));
 		if (ret) {
 			bL_switcher_restore_cpus();
 			return ret;
diff --git a/arch/arm/common/mcpm_entry.c b/arch/arm/common/mcpm_entry.c
index 86fd60f..f91136a 100644
--- a/arch/arm/common/mcpm_entry.c
+++ b/arch/arm/common/mcpm_entry.c
@@ -106,14 +106,14 @@
 	BUG();
 }
 
-int mcpm_cpu_power_down_finish(unsigned int cpu, unsigned int cluster)
+int mcpm_wait_for_cpu_powerdown(unsigned int cpu, unsigned int cluster)
 {
 	int ret;
 
-	if (WARN_ON_ONCE(!platform_ops || !platform_ops->power_down_finish))
+	if (WARN_ON_ONCE(!platform_ops || !platform_ops->wait_for_powerdown))
 		return -EUNATCH;
 
-	ret = platform_ops->power_down_finish(cpu, cluster);
+	ret = platform_ops->wait_for_powerdown(cpu, cluster);
 	if (ret)
 		pr_warn("%s: cpu %u, cluster %u failed to power down (%d)\n",
 			__func__, cpu, cluster, ret);
diff --git a/arch/arm/common/mcpm_platsmp.c b/arch/arm/common/mcpm_platsmp.c
index 177251a..92e54d7 100644
--- a/arch/arm/common/mcpm_platsmp.c
+++ b/arch/arm/common/mcpm_platsmp.c
@@ -62,7 +62,7 @@
 
 	cpu_to_pcpu(cpu, &pcpu, &pcluster);
 
-	return !mcpm_cpu_power_down_finish(pcpu, pcluster);
+	return !mcpm_wait_for_cpu_powerdown(pcpu, pcluster);
 }
 
 static int mcpm_cpu_disable(unsigned int cpu)
diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
index 23e728e..f5a3576 100644
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild
@@ -21,6 +21,7 @@
 generic-y += poll.h
 generic-y += preempt.h
 generic-y += resource.h
+generic-y += rwsem.h
 generic-y += sections.h
 generic-y += segment.h
 generic-y += sembuf.h
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index b974184..57f0584 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -312,7 +312,7 @@
  * you cannot return to the original mode.
  */
 .macro safe_svcmode_maskall reg:req
-#if __LINUX_ARM_ARCH__ >= 6
+#if __LINUX_ARM_ARCH__ >= 6 && !defined(CONFIG_CPU_V7M)
 	mrs	\reg , cpsr
 	eor	\reg, \reg, #HYP_MODE
 	tst	\reg, #MODE_MASK
diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index 8b8b616..fd43f7f 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -212,7 +212,7 @@
 static inline void __flush_icache_all(void)
 {
 	__flush_icache_preferred();
-	dsb();
+	dsb(ishst);
 }
 
 /*
@@ -487,4 +487,6 @@
 int set_memory_x(unsigned long addr, int numpages);
 int set_memory_nx(unsigned long addr, int numpages);
 
+void flush_uprobe_xol_access(struct page *page, unsigned long uaddr,
+			     void *kaddr, unsigned long len);
 #endif
diff --git a/arch/arm/include/asm/cp15.h b/arch/arm/include/asm/cp15.h
index 6493802..c3f1152 100644
--- a/arch/arm/include/asm/cp15.h
+++ b/arch/arm/include/asm/cp15.h
@@ -42,24 +42,23 @@
 #ifndef __ASSEMBLY__
 
 #if __LINUX_ARM_ARCH__ >= 4
-#define vectors_high()	(cr_alignment & CR_V)
+#define vectors_high()	(get_cr() & CR_V)
 #else
 #define vectors_high()	(0)
 #endif
 
 #ifdef CONFIG_CPU_CP15
 
-extern unsigned long cr_no_alignment;	/* defined in entry-armv.S */
 extern unsigned long cr_alignment;	/* defined in entry-armv.S */
 
-static inline unsigned int get_cr(void)
+static inline unsigned long get_cr(void)
 {
-	unsigned int val;
+	unsigned long val;
 	asm("mrc p15, 0, %0, c1, c0, 0	@ get CR" : "=r" (val) : : "cc");
 	return val;
 }
 
-static inline void set_cr(unsigned int val)
+static inline void set_cr(unsigned long val)
 {
 	asm volatile("mcr p15, 0, %0, c1, c0, 0	@ set CR"
 	  : : "r" (val) : "cc");
@@ -80,10 +79,6 @@
 	isb();
 }
 
-#ifndef CONFIG_SMP
-extern void adjust_cr(unsigned long mask, unsigned long set);
-#endif
-
 #define CPACC_FULL(n)		(3 << (n * 2))
 #define CPACC_SVC(n)		(1 << (n * 2))
 #define CPACC_DISABLE(n)	(0 << (n * 2))
@@ -106,13 +101,17 @@
 #else /* ifdef CONFIG_CPU_CP15 */
 
 /*
- * cr_alignment and cr_no_alignment are tightly coupled to cp15 (at least in the
- * minds of the developers). Yielding 0 for machines without a cp15 (and making
- * it read-only) is fine for most cases and saves quite some #ifdeffery.
+ * cr_alignment is tightly coupled to cp15 (at least in the minds of the
+ * developers). Yielding 0 for machines without a cp15 (and making it
+ * read-only) is fine for most cases and saves quite some #ifdeffery.
  */
-#define cr_no_alignment	UL(0)
 #define cr_alignment	UL(0)
 
+static inline unsigned long get_cr(void)
+{
+	return 0;
+}
+
 #endif /* ifdef CONFIG_CPU_CP15 / else */
 
 #endif /* ifndef __ASSEMBLY__ */
diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h
index 4764344..8c2b732 100644
--- a/arch/arm/include/asm/cputype.h
+++ b/arch/arm/include/asm/cputype.h
@@ -72,6 +72,7 @@
 #define ARM_CPU_PART_CORTEX_A15		0xC0F0
 #define ARM_CPU_PART_CORTEX_A7		0xC070
 #define ARM_CPU_PART_CORTEX_A12		0xC0D0
+#define ARM_CPU_PART_CORTEX_A17		0xC0E0
 
 #define ARM_CPU_XSCALE_ARCH_MASK	0xe000
 #define ARM_CPU_XSCALE_ARCH_V1		0x2000
diff --git a/arch/arm/include/asm/fixmap.h b/arch/arm/include/asm/fixmap.h
index bbae919..74124b0 100644
--- a/arch/arm/include/asm/fixmap.h
+++ b/arch/arm/include/asm/fixmap.h
@@ -1,24 +1,11 @@
 #ifndef _ASM_FIXMAP_H
 #define _ASM_FIXMAP_H
 
-/*
- * Nothing too fancy for now.
- *
- * On ARM we already have well known fixed virtual addresses imposed by
- * the architecture such as the vector page which is located at 0xffff0000,
- * therefore a second level page table is already allocated covering
- * 0xfff00000 upwards.
- *
- * The cache flushing code in proc-xscale.S uses the virtual area between
- * 0xfffe0000 and 0xfffeffff.
- */
-
-#define FIXADDR_START		0xfff00000UL
-#define FIXADDR_TOP		0xfffe0000UL
+#define FIXADDR_START		0xffc00000UL
+#define FIXADDR_TOP		0xffe00000UL
 #define FIXADDR_SIZE		(FIXADDR_TOP - FIXADDR_START)
 
-#define FIX_KMAP_BEGIN		0
-#define FIX_KMAP_END		(FIXADDR_SIZE >> PAGE_SHIFT)
+#define FIX_KMAP_NR_PTES	(FIXADDR_SIZE >> PAGE_SHIFT)
 
 #define __fix_to_virt(x)	(FIXADDR_START + ((x) << PAGE_SHIFT))
 #define __virt_to_fix(x)	(((x) - FIXADDR_START) >> PAGE_SHIFT)
@@ -27,7 +14,7 @@
 
 static inline unsigned long fix_to_virt(const unsigned int idx)
 {
-	if (idx >= FIX_KMAP_END)
+	if (idx >= FIX_KMAP_NR_PTES)
 		__this_fixmap_does_not_exist();
 	return __fix_to_virt(idx);
 }
diff --git a/arch/arm/include/asm/glue-df.h b/arch/arm/include/asm/glue-df.h
index 6b70f1b..04e18b6 100644
--- a/arch/arm/include/asm/glue-df.h
+++ b/arch/arm/include/asm/glue-df.h
@@ -31,14 +31,6 @@
 #undef CPU_DABORT_HANDLER
 #undef MULTI_DABORT
 
-#if defined(CONFIG_CPU_ARM710)
-# ifdef CPU_DABORT_HANDLER
-#  define MULTI_DABORT 1
-# else
-#  define CPU_DABORT_HANDLER cpu_arm7_data_abort
-# endif
-#endif
-
 #ifdef CONFIG_CPU_ABRT_EV4
 # ifdef CPU_DABORT_HANDLER
 #  define MULTI_DABORT 1
diff --git a/arch/arm/include/asm/hardware/cache-l2x0.h b/arch/arm/include/asm/hardware/cache-l2x0.h
index 6795ff7..3a5ec1c 100644
--- a/arch/arm/include/asm/hardware/cache-l2x0.h
+++ b/arch/arm/include/asm/hardware/cache-l2x0.h
@@ -26,8 +26,8 @@
 #define L2X0_CACHE_TYPE			0x004
 #define L2X0_CTRL			0x100
 #define L2X0_AUX_CTRL			0x104
-#define L2X0_TAG_LATENCY_CTRL		0x108
-#define L2X0_DATA_LATENCY_CTRL		0x10C
+#define L310_TAG_LATENCY_CTRL		0x108
+#define L310_DATA_LATENCY_CTRL		0x10C
 #define L2X0_EVENT_CNT_CTRL		0x200
 #define L2X0_EVENT_CNT1_CFG		0x204
 #define L2X0_EVENT_CNT0_CFG		0x208
@@ -54,53 +54,93 @@
 #define L2X0_LOCKDOWN_WAY_D_BASE	0x900
 #define L2X0_LOCKDOWN_WAY_I_BASE	0x904
 #define L2X0_LOCKDOWN_STRIDE		0x08
-#define L2X0_ADDR_FILTER_START		0xC00
-#define L2X0_ADDR_FILTER_END		0xC04
+#define L310_ADDR_FILTER_START		0xC00
+#define L310_ADDR_FILTER_END		0xC04
 #define L2X0_TEST_OPERATION		0xF00
 #define L2X0_LINE_DATA			0xF10
 #define L2X0_LINE_TAG			0xF30
 #define L2X0_DEBUG_CTRL			0xF40
-#define L2X0_PREFETCH_CTRL		0xF60
-#define L2X0_POWER_CTRL			0xF80
-#define   L2X0_DYNAMIC_CLK_GATING_EN	(1 << 1)
-#define   L2X0_STNDBY_MODE_EN		(1 << 0)
+#define L310_PREFETCH_CTRL		0xF60
+#define L310_POWER_CTRL			0xF80
+#define   L310_DYNAMIC_CLK_GATING_EN	(1 << 1)
+#define   L310_STNDBY_MODE_EN		(1 << 0)
 
 /* Registers shifts and masks */
 #define L2X0_CACHE_ID_PART_MASK		(0xf << 6)
 #define L2X0_CACHE_ID_PART_L210		(1 << 6)
+#define L2X0_CACHE_ID_PART_L220		(2 << 6)
 #define L2X0_CACHE_ID_PART_L310		(3 << 6)
 #define L2X0_CACHE_ID_RTL_MASK          0x3f
-#define L2X0_CACHE_ID_RTL_R0P0          0x0
-#define L2X0_CACHE_ID_RTL_R1P0          0x2
-#define L2X0_CACHE_ID_RTL_R2P0          0x4
-#define L2X0_CACHE_ID_RTL_R3P0          0x5
-#define L2X0_CACHE_ID_RTL_R3P1          0x6
-#define L2X0_CACHE_ID_RTL_R3P2          0x8
+#define L210_CACHE_ID_RTL_R0P2_02	0x00
+#define L210_CACHE_ID_RTL_R0P1		0x01
+#define L210_CACHE_ID_RTL_R0P2_01	0x02
+#define L210_CACHE_ID_RTL_R0P3		0x03
+#define L210_CACHE_ID_RTL_R0P4		0x0b
+#define L210_CACHE_ID_RTL_R0P5		0x0f
+#define L220_CACHE_ID_RTL_R1P7_01REL0	0x06
+#define L310_CACHE_ID_RTL_R0P0		0x00
+#define L310_CACHE_ID_RTL_R1P0		0x02
+#define L310_CACHE_ID_RTL_R2P0		0x04
+#define L310_CACHE_ID_RTL_R3P0		0x05
+#define L310_CACHE_ID_RTL_R3P1		0x06
+#define L310_CACHE_ID_RTL_R3P1_50REL0	0x07
+#define L310_CACHE_ID_RTL_R3P2		0x08
+#define L310_CACHE_ID_RTL_R3P3		0x09
 
-#define L2X0_AUX_CTRL_MASK			0xc0000fff
+/* L2C auxiliary control register - bits common to L2C-210/220/310 */
+#define L2C_AUX_CTRL_WAY_SIZE_SHIFT		17
+#define L2C_AUX_CTRL_WAY_SIZE_MASK		(7 << 17)
+#define L2C_AUX_CTRL_WAY_SIZE(n)		((n) << 17)
+#define L2C_AUX_CTRL_EVTMON_ENABLE		BIT(20)
+#define L2C_AUX_CTRL_PARITY_ENABLE		BIT(21)
+#define L2C_AUX_CTRL_SHARED_OVERRIDE		BIT(22)
+/* L2C-210/220 common bits */
 #define L2X0_AUX_CTRL_DATA_RD_LATENCY_SHIFT	0
-#define L2X0_AUX_CTRL_DATA_RD_LATENCY_MASK	0x7
+#define L2X0_AUX_CTRL_DATA_RD_LATENCY_MASK	(7 << 0)
 #define L2X0_AUX_CTRL_DATA_WR_LATENCY_SHIFT	3
-#define L2X0_AUX_CTRL_DATA_WR_LATENCY_MASK	(0x7 << 3)
+#define L2X0_AUX_CTRL_DATA_WR_LATENCY_MASK	(7 << 3)
 #define L2X0_AUX_CTRL_TAG_LATENCY_SHIFT		6
-#define L2X0_AUX_CTRL_TAG_LATENCY_MASK		(0x7 << 6)
+#define L2X0_AUX_CTRL_TAG_LATENCY_MASK		(7 << 6)
 #define L2X0_AUX_CTRL_DIRTY_LATENCY_SHIFT	9
-#define L2X0_AUX_CTRL_DIRTY_LATENCY_MASK	(0x7 << 9)
-#define L2X0_AUX_CTRL_ASSOCIATIVITY_SHIFT	16
-#define L2X0_AUX_CTRL_WAY_SIZE_SHIFT		17
-#define L2X0_AUX_CTRL_WAY_SIZE_MASK		(0x7 << 17)
-#define L2X0_AUX_CTRL_SHARE_OVERRIDE_SHIFT	22
-#define L2X0_AUX_CTRL_NS_LOCKDOWN_SHIFT		26
-#define L2X0_AUX_CTRL_NS_INT_CTRL_SHIFT		27
-#define L2X0_AUX_CTRL_DATA_PREFETCH_SHIFT	28
-#define L2X0_AUX_CTRL_INSTR_PREFETCH_SHIFT	29
-#define L2X0_AUX_CTRL_EARLY_BRESP_SHIFT		30
+#define L2X0_AUX_CTRL_DIRTY_LATENCY_MASK	(7 << 9)
+#define L2X0_AUX_CTRL_ASSOC_SHIFT		13
+#define L2X0_AUX_CTRL_ASSOC_MASK		(15 << 13)
+/* L2C-210 specific bits */
+#define L210_AUX_CTRL_WRAP_DISABLE		BIT(12)
+#define L210_AUX_CTRL_WA_OVERRIDE		BIT(23)
+#define L210_AUX_CTRL_EXCLUSIVE_ABORT		BIT(24)
+/* L2C-220 specific bits */
+#define L220_AUX_CTRL_EXCLUSIVE_CACHE		BIT(12)
+#define L220_AUX_CTRL_FWA_SHIFT			23
+#define L220_AUX_CTRL_FWA_MASK			(3 << 23)
+#define L220_AUX_CTRL_NS_LOCKDOWN		BIT(26)
+#define L220_AUX_CTRL_NS_INT_CTRL		BIT(27)
+/* L2C-310 specific bits */
+#define L310_AUX_CTRL_FULL_LINE_ZERO		BIT(0)	/* R2P0+ */
+#define L310_AUX_CTRL_HIGHPRIO_SO_DEV		BIT(10)	/* R2P0+ */
+#define L310_AUX_CTRL_STORE_LIMITATION		BIT(11)	/* R2P0+ */
+#define L310_AUX_CTRL_EXCLUSIVE_CACHE		BIT(12)
+#define L310_AUX_CTRL_ASSOCIATIVITY_16		BIT(16)
+#define L310_AUX_CTRL_CACHE_REPLACE_RR		BIT(25)	/* R2P0+ */
+#define L310_AUX_CTRL_NS_LOCKDOWN		BIT(26)
+#define L310_AUX_CTRL_NS_INT_CTRL		BIT(27)
+#define L310_AUX_CTRL_DATA_PREFETCH		BIT(28)
+#define L310_AUX_CTRL_INSTR_PREFETCH		BIT(29)
+#define L310_AUX_CTRL_EARLY_BRESP		BIT(30)	/* R2P0+ */
 
-#define L2X0_LATENCY_CTRL_SETUP_SHIFT	0
-#define L2X0_LATENCY_CTRL_RD_SHIFT	4
-#define L2X0_LATENCY_CTRL_WR_SHIFT	8
+#define L310_LATENCY_CTRL_SETUP(n)		((n) << 0)
+#define L310_LATENCY_CTRL_RD(n)			((n) << 4)
+#define L310_LATENCY_CTRL_WR(n)			((n) << 8)
 
-#define L2X0_ADDR_FILTER_EN		1
+#define L310_ADDR_FILTER_EN		1
+
+#define L310_PREFETCH_CTRL_OFFSET_MASK		0x1f
+#define L310_PREFETCH_CTRL_DBL_LINEFILL_INCR	BIT(23)
+#define L310_PREFETCH_CTRL_PREFETCH_DROP	BIT(24)
+#define L310_PREFETCH_CTRL_DBL_LINEFILL_WRAP	BIT(27)
+#define L310_PREFETCH_CTRL_DATA_PREFETCH	BIT(28)
+#define L310_PREFETCH_CTRL_INSTR_PREFETCH	BIT(29)
+#define L310_PREFETCH_CTRL_DBL_LINEFILL		BIT(30)
 
 #define L2X0_CTRL_EN			1
 
diff --git a/arch/arm/include/asm/highmem.h b/arch/arm/include/asm/highmem.h
index 91b99ab..5355795 100644
--- a/arch/arm/include/asm/highmem.h
+++ b/arch/arm/include/asm/highmem.h
@@ -18,6 +18,7 @@
 	} while (0)
 
 extern pte_t *pkmap_page_table;
+extern pte_t *fixmap_page_table;
 
 extern void *kmap_high(struct page *page);
 extern void kunmap_high(struct page *page);
diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
index 8aa4cca..3d23418 100644
--- a/arch/arm/include/asm/io.h
+++ b/arch/arm/include/asm/io.h
@@ -179,6 +179,12 @@
 /* PCI fixed i/o mapping */
 #define PCI_IO_VIRT_BASE	0xfee00000
 
+#if defined(CONFIG_PCI)
+void pci_ioremap_set_mem_type(int mem_type);
+#else
+static inline void pci_ioremap_set_mem_type(int mem_type) {}
+#endif
+
 extern int pci_ioremap_io(unsigned int offset, phys_addr_t phys_addr);
 
 /*
diff --git a/arch/arm/include/asm/mach/arch.h b/arch/arm/include/asm/mach/arch.h
index 17a3fa2..060a75e 100644
--- a/arch/arm/include/asm/mach/arch.h
+++ b/arch/arm/include/asm/mach/arch.h
@@ -14,7 +14,6 @@
 #include <linux/reboot.h>
 
 struct tag;
-struct meminfo;
 struct pt_regs;
 struct smp_operations;
 #ifdef CONFIG_SMP
@@ -45,10 +44,12 @@
 	unsigned char		reserve_lp1 :1;	/* never has lp1	*/
 	unsigned char		reserve_lp2 :1;	/* never has lp2	*/
 	enum reboot_mode	reboot_mode;	/* default restart mode	*/
+	unsigned		l2c_aux_val;	/* L2 cache aux value	*/
+	unsigned		l2c_aux_mask;	/* L2 cache aux mask	*/
+	void			(*l2c_write_sec)(unsigned long, unsigned);
 	struct smp_operations	*smp;		/* SMP operations	*/
 	bool			(*smp_init)(void);
-	void			(*fixup)(struct tag *, char **,
-					 struct meminfo *);
+	void			(*fixup)(struct tag *, char **);
 	void			(*init_meminfo)(void);
 	void			(*reserve)(void);/* reserve mem blocks	*/
 	void			(*map_io)(void);/* IO mapping function	*/
diff --git a/arch/arm/include/asm/mcpm.h b/arch/arm/include/asm/mcpm.h
index a5ff410..d9702eb 100644
--- a/arch/arm/include/asm/mcpm.h
+++ b/arch/arm/include/asm/mcpm.h
@@ -98,14 +98,14 @@
  * previously in which case the caller should take appropriate action.
  *
  * On success, the CPU is not guaranteed to be truly halted until
- * mcpm_cpu_power_down_finish() subsequently returns non-zero for the
+ * mcpm_wait_for_cpu_powerdown() subsequently returns non-zero for the
  * specified cpu.  Until then, other CPUs should make sure they do not
  * trash memory the target CPU might be executing/accessing.
  */
 void mcpm_cpu_power_down(void);
 
 /**
- * mcpm_cpu_power_down_finish - wait for a specified CPU to halt, and
+ * mcpm_wait_for_cpu_powerdown - wait for a specified CPU to halt, and
  *	make sure it is powered off
  *
  * @cpu: CPU number within given cluster
@@ -127,7 +127,7 @@
  *	- zero if the CPU is in a safely parked state
  *	- nonzero otherwise (e.g., timeout)
  */
-int mcpm_cpu_power_down_finish(unsigned int cpu, unsigned int cluster);
+int mcpm_wait_for_cpu_powerdown(unsigned int cpu, unsigned int cluster);
 
 /**
  * mcpm_cpu_suspend - bring the calling CPU in a suspended state
@@ -171,7 +171,7 @@
 struct mcpm_platform_ops {
 	int (*power_up)(unsigned int cpu, unsigned int cluster);
 	void (*power_down)(void);
-	int (*power_down_finish)(unsigned int cpu, unsigned int cluster);
+	int (*wait_for_powerdown)(unsigned int cpu, unsigned int cluster);
 	void (*suspend)(u64);
 	void (*powered_up)(void);
 };
diff --git a/arch/arm/include/asm/memblock.h b/arch/arm/include/asm/memblock.h
index c2f5102..bf47a6c 100644
--- a/arch/arm/include/asm/memblock.h
+++ b/arch/arm/include/asm/memblock.h
@@ -1,10 +1,9 @@
 #ifndef _ASM_ARM_MEMBLOCK_H
 #define _ASM_ARM_MEMBLOCK_H
 
-struct meminfo;
 struct machine_desc;
 
-void arm_memblock_init(struct meminfo *, const struct machine_desc *);
+void arm_memblock_init(const struct machine_desc *);
 phys_addr_t arm_memblock_steal(phys_addr_t size, phys_addr_t align);
 
 #endif
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 02fa255..2b75146 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -83,8 +83,6 @@
  */
 #define IOREMAP_MAX_ORDER	24
 
-#define CONSISTENT_END		(0xffe00000UL)
-
 #else /* CONFIG_MMU */
 
 /*
diff --git a/arch/arm/include/asm/outercache.h b/arch/arm/include/asm/outercache.h
index f94784f..891a56b 100644
--- a/arch/arm/include/asm/outercache.h
+++ b/arch/arm/include/asm/outercache.h
@@ -28,53 +28,84 @@
 	void (*clean_range)(unsigned long, unsigned long);
 	void (*flush_range)(unsigned long, unsigned long);
 	void (*flush_all)(void);
-	void (*inv_all)(void);
 	void (*disable)(void);
 #ifdef CONFIG_OUTER_CACHE_SYNC
 	void (*sync)(void);
 #endif
-	void (*set_debug)(unsigned long);
 	void (*resume)(void);
+
+	/* This is an ARM L2C thing */
+	void (*write_sec)(unsigned long, unsigned);
 };
 
 extern struct outer_cache_fns outer_cache;
 
 #ifdef CONFIG_OUTER_CACHE
-
+/**
+ * outer_inv_range - invalidate range of outer cache lines
+ * @start: starting physical address, inclusive
+ * @end: end physical address, exclusive
+ */
 static inline void outer_inv_range(phys_addr_t start, phys_addr_t end)
 {
 	if (outer_cache.inv_range)
 		outer_cache.inv_range(start, end);
 }
+
+/**
+ * outer_clean_range - clean dirty outer cache lines
+ * @start: starting physical address, inclusive
+ * @end: end physical address, exclusive
+ */
 static inline void outer_clean_range(phys_addr_t start, phys_addr_t end)
 {
 	if (outer_cache.clean_range)
 		outer_cache.clean_range(start, end);
 }
+
+/**
+ * outer_flush_range - clean and invalidate outer cache lines
+ * @start: starting physical address, inclusive
+ * @end: end physical address, exclusive
+ */
 static inline void outer_flush_range(phys_addr_t start, phys_addr_t end)
 {
 	if (outer_cache.flush_range)
 		outer_cache.flush_range(start, end);
 }
 
+/**
+ * outer_flush_all - clean and invalidate all cache lines in the outer cache
+ *
+ * Note: depending on implementation, this may not be atomic - it must
+ * only be called with interrupts disabled and no other active outer
+ * cache masters.
+ *
+ * It is intended that this function is only used by implementations
+ * needing to override the outer_cache.disable() method due to security.
+ * (Some implementations perform this as a clean followed by an invalidate.)
+ */
 static inline void outer_flush_all(void)
 {
 	if (outer_cache.flush_all)
 		outer_cache.flush_all();
 }
 
-static inline void outer_inv_all(void)
-{
-	if (outer_cache.inv_all)
-		outer_cache.inv_all();
-}
+/**
+ * outer_disable - clean, invalidate and disable the outer cache
+ *
+ * Disable the outer cache, ensuring that any data contained in the outer
+ * cache is pushed out to lower levels of system memory.  The note and
+ * conditions above concerning outer_flush_all() applies here.
+ */
+extern void outer_disable(void);
 
-static inline void outer_disable(void)
-{
-	if (outer_cache.disable)
-		outer_cache.disable();
-}
-
+/**
+ * outer_resume - restore the cache configuration and re-enable outer cache
+ *
+ * Restore any configuration that the cache had when previously enabled,
+ * and re-enable the outer cache.
+ */
 static inline void outer_resume(void)
 {
 	if (outer_cache.resume)
@@ -90,13 +121,18 @@
 static inline void outer_flush_range(phys_addr_t start, phys_addr_t end)
 { }
 static inline void outer_flush_all(void) { }
-static inline void outer_inv_all(void) { }
 static inline void outer_disable(void) { }
 static inline void outer_resume(void) { }
 
 #endif
 
 #ifdef CONFIG_OUTER_CACHE_SYNC
+/**
+ * outer_sync - perform a sync point for outer cache
+ *
+ * Ensure that all outer cache operations are complete and any store
+ * buffers are drained.
+ */
 static inline void outer_sync(void)
 {
 	if (outer_cache.sync)
diff --git a/arch/arm/include/asm/setup.h b/arch/arm/include/asm/setup.h
index 8d6a089..e0adb9f 100644
--- a/arch/arm/include/asm/setup.h
+++ b/arch/arm/include/asm/setup.h
@@ -21,34 +21,6 @@
 #define __tagtable(tag, fn) \
 static const struct tagtable __tagtable_##fn __tag = { tag, fn }
 
-/*
- * Memory map description
- */
-#define NR_BANKS	CONFIG_ARM_NR_BANKS
-
-struct membank {
-	phys_addr_t start;
-	phys_addr_t size;
-	unsigned int highmem;
-};
-
-struct meminfo {
-	int nr_banks;
-	struct membank bank[NR_BANKS];
-};
-
-extern struct meminfo meminfo;
-
-#define for_each_bank(iter,mi)				\
-	for (iter = 0; iter < (mi)->nr_banks; iter++)
-
-#define bank_pfn_start(bank)	__phys_to_pfn((bank)->start)
-#define bank_pfn_end(bank)	__phys_to_pfn((bank)->start + (bank)->size)
-#define bank_pfn_size(bank)	((bank)->size >> PAGE_SHIFT)
-#define bank_phys_start(bank)	(bank)->start
-#define bank_phys_end(bank)	((bank)->start + (bank)->size)
-#define bank_phys_size(bank)	(bank)->size
-
 extern int arm_add_memory(u64 start, u64 size);
 extern void early_print(const char *str, ...);
 extern void dump_machine_table(void);
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 12c3a5d..75d9579 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -171,8 +171,9 @@
 #define __put_user_check(x,p)							\
 	({								\
 		unsigned long __limit = current_thread_info()->addr_limit - 1; \
+		const typeof(*(p)) __user *__tmp_p = (p);		\
 		register const typeof(*(p)) __r2 asm("r2") = (x);	\
-		register const typeof(*(p)) __user *__p asm("r0") = (p);\
+		register const typeof(*(p)) __user *__p asm("r0") = __tmp_p; \
 		register unsigned long __l asm("r1") = __limit;		\
 		register int __e asm("r0");				\
 		switch (sizeof(*(__p))) {				\
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 040619c..38ddd9f 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -39,6 +39,7 @@
 obj-$(CONFIG_ISA_DMA)		+= dma-isa.o
 obj-$(CONFIG_PCI)		+= bios32.o isa.o
 obj-$(CONFIG_ARM_CPU_SUSPEND)	+= sleep.o suspend.o
+obj-$(CONFIG_HIBERNATION)	+= hibernate.o
 obj-$(CONFIG_SMP)		+= smp.o
 ifdef CONFIG_MMU
 obj-$(CONFIG_SMP)		+= smp_tlb.o
diff --git a/arch/arm/kernel/atags_parse.c b/arch/arm/kernel/atags_parse.c
index 8c14de8..7807ef5 100644
--- a/arch/arm/kernel/atags_parse.c
+++ b/arch/arm/kernel/atags_parse.c
@@ -22,6 +22,7 @@
 #include <linux/fs.h>
 #include <linux/root_dev.h>
 #include <linux/screen_info.h>
+#include <linux/memblock.h>
 
 #include <asm/setup.h>
 #include <asm/system_info.h>
@@ -222,10 +223,10 @@
 	}
 
 	if (mdesc->fixup)
-		mdesc->fixup(tags, &from, &meminfo);
+		mdesc->fixup(tags, &from);
 
 	if (tags->hdr.tag == ATAG_CORE) {
-		if (meminfo.nr_banks != 0)
+		if (memblock_phys_mem_size())
 			squash_mem_tags(tags);
 		save_atags(tags);
 		parse_tags(tags);
diff --git a/arch/arm/kernel/devtree.c b/arch/arm/kernel/devtree.c
index c7419a5..679a83d 100644
--- a/arch/arm/kernel/devtree.c
+++ b/arch/arm/kernel/devtree.c
@@ -27,11 +27,6 @@
 #include <asm/mach/arch.h>
 #include <asm/mach-types.h>
 
-void __init early_init_dt_add_memory_arch(u64 base, u64 size)
-{
-	arm_add_memory(base, size);
-}
-
 void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
 {
 	return memblock_virt_alloc(size, align);
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 1879e8d..52a949a 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -344,7 +344,7 @@
 	@
 	@ Enable the alignment trap while in kernel mode
 	@
-	alignment_trap r0
+	alignment_trap r0, .LCcralign
 
 	@
 	@ Clear FP to mark the first stack frame
@@ -413,6 +413,11 @@
 	@
 	adr	r9, BSYM(ret_from_exception)
 
+	@ IRQs must be enabled before attempting to read the instruction from
+	@ user space since that could cause a page/translation fault if the
+	@ page table was modified by another CPU.
+	enable_irq
+
 	tst	r3, #PSR_T_BIT			@ Thumb mode?
 	bne	__und_usr_thumb
 	sub	r4, r2, #4			@ ARM instr at LR - 4
@@ -484,7 +489,8 @@
  */
 	.pushsection .fixup, "ax"
 	.align	2
-4:	mov	pc, r9
+4:	str     r4, [sp, #S_PC]			@ retry current instruction
+	mov	pc, r9
 	.popsection
 	.pushsection __ex_table,"a"
 	.long	1b, 4b
@@ -517,7 +523,7 @@
  *  r9  = normal "successful" return address
  *  r10 = this threads thread_info structure
  *  lr  = unrecognised instruction return address
- * IRQs disabled, FIQs enabled.
+ * IRQs enabled, FIQs enabled.
  */
 	@
 	@ Fall-through from Thumb-2 __und_usr
@@ -624,7 +630,6 @@
 #endif
 
 do_fpe:
-	enable_irq
 	ldr	r4, .LCfp
 	add	r10, r10, #TI_FPSTATE		@ r10 = workspace
 	ldr	pc, [r4]			@ Call FP module USR entry point
@@ -652,8 +657,7 @@
 	b	1f
 __und_usr_fault_16:
 	mov	r1, #2
-1:	enable_irq
-	mov	r0, sp
+1:	mov	r0, sp
 	adr	lr, BSYM(ret_from_exception)
 	b	__und_fault
 ENDPROC(__und_usr_fault_32)
@@ -1143,11 +1147,8 @@
 	.data
 
 	.globl	cr_alignment
-	.globl	cr_no_alignment
 cr_alignment:
 	.space	4
-cr_no_alignment:
-	.space	4
 
 #ifdef CONFIG_MULTI_IRQ_HANDLER
 	.globl	handle_arch_irq
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index a2dcafd..7139d4a 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -365,13 +365,7 @@
 	str	r0, [sp, #S_OLD_R0]		@ Save OLD_R0
 #endif
 	zero_fp
-
-#ifdef CONFIG_ALIGNMENT_TRAP
-	ldr	ip, __cr_alignment
-	ldr	ip, [ip]
-	mcr	p15, 0, ip, c1, c0		@ update control register
-#endif
-
+	alignment_trap ip, __cr_alignment
 	enable_irq
 	ct_user_exit
 	get_thread_info tsk
diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S
index 1420725..5d702f8 100644
--- a/arch/arm/kernel/entry-header.S
+++ b/arch/arm/kernel/entry-header.S
@@ -37,9 +37,9 @@
 #endif
 	.endm
 
-	.macro	alignment_trap, rtemp
+	.macro	alignment_trap, rtemp, label
 #ifdef CONFIG_ALIGNMENT_TRAP
-	ldr	\rtemp, .LCcralign
+	ldr	\rtemp, \label
 	ldr	\rtemp, [\rtemp]
 	mcr	p15, 0, \rtemp, c1, c0
 #endif
@@ -132,6 +132,10 @@
 	orrne	r5, V7M_xPSR_FRAMEPTRALIGN
 	biceq	r5, V7M_xPSR_FRAMEPTRALIGN
 
+	@ ensure bit 0 is cleared in the PC, otherwise behaviour is
+	@ unpredictable
+	bic	r4, #1
+
 	@ write basic exception frame
 	stmdb	r2!, {r1, r3-r5}
 	ldmia	sp, {r1, r3-r5}
diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c
index c108ddc..af9a8a9 100644
--- a/arch/arm/kernel/ftrace.c
+++ b/arch/arm/kernel/ftrace.c
@@ -14,6 +14,7 @@
 
 #include <linux/ftrace.h>
 #include <linux/uaccess.h>
+#include <linux/module.h>
 
 #include <asm/cacheflush.h>
 #include <asm/opcodes.h>
@@ -63,6 +64,18 @@
 }
 #endif
 
+int ftrace_arch_code_modify_prepare(void)
+{
+	set_all_modules_text_rw();
+	return 0;
+}
+
+int ftrace_arch_code_modify_post_process(void)
+{
+	set_all_modules_text_ro();
+	return 0;
+}
+
 static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr)
 {
 	return arm_gen_branch_link(pc, addr);
diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S
index c96ecac..572a383 100644
--- a/arch/arm/kernel/head-common.S
+++ b/arch/arm/kernel/head-common.S
@@ -99,8 +99,7 @@
 	str	r1, [r5]			@ Save machine type
 	str	r2, [r6]			@ Save atags pointer
 	cmp	r7, #0
-	bicne	r4, r0, #CR_A			@ Clear 'A' bit
-	stmneia	r7, {r0, r4}			@ Save control register values
+	strne	r0, [r7]			@ Save control register values
 	b	start_kernel
 ENDPROC(__mmap_switched)
 
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 591d6e4..2c35f0f 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -475,7 +475,7 @@
 
 
 #ifdef CONFIG_SMP_ON_UP
-	__INIT
+	__HEAD
 __fixup_smp:
 	and	r3, r9, #0x000f0000	@ architecture version
 	teq	r3, #0x000f0000		@ CPU ID supported?
diff --git a/arch/arm/kernel/hibernate.c b/arch/arm/kernel/hibernate.c
new file mode 100644
index 0000000..bb8b796
--- /dev/null
+++ b/arch/arm/kernel/hibernate.c
@@ -0,0 +1,107 @@
+/*
+ * Hibernation support specific for ARM
+ *
+ * Derived from work on ARM hibernation support by:
+ *
+ * Ubuntu project, hibernation support for mach-dove
+ * Copyright (C) 2010 Nokia Corporation (Hiroshi Doyu)
+ * Copyright (C) 2010 Texas Instruments, Inc. (Teerth Reddy et al.)
+ *  https://lkml.org/lkml/2010/6/18/4
+ *  https://lists.linux-foundation.org/pipermail/linux-pm/2010-June/027422.html
+ *  https://patchwork.kernel.org/patch/96442/
+ *
+ * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
+ *
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#include <linux/mm.h>
+#include <linux/suspend.h>
+#include <asm/system_misc.h>
+#include <asm/idmap.h>
+#include <asm/suspend.h>
+#include <asm/memory.h>
+
+extern const void __nosave_begin, __nosave_end;
+
+int pfn_is_nosave(unsigned long pfn)
+{
+	unsigned long nosave_begin_pfn = virt_to_pfn(&__nosave_begin);
+	unsigned long nosave_end_pfn = virt_to_pfn(&__nosave_end - 1);
+
+	return (pfn >= nosave_begin_pfn) && (pfn <= nosave_end_pfn);
+}
+
+void notrace save_processor_state(void)
+{
+	WARN_ON(num_online_cpus() != 1);
+	local_fiq_disable();
+}
+
+void notrace restore_processor_state(void)
+{
+	local_fiq_enable();
+}
+
+/*
+ * Snapshot kernel memory and reset the system.
+ *
+ * swsusp_save() is executed in the suspend finisher so that the CPU
+ * context pointer and memory are part of the saved image, which is
+ * required by the resume kernel image to restart execution from
+ * swsusp_arch_suspend().
+ *
+ * soft_restart is not technically needed, but is used to get success
+ * returned from cpu_suspend.
+ *
+ * When soft reboot completes, the hibernation snapshot is written out.
+ */
+static int notrace arch_save_image(unsigned long unused)
+{
+	int ret;
+
+	ret = swsusp_save();
+	if (ret == 0)
+		soft_restart(virt_to_phys(cpu_resume));
+	return ret;
+}
+
+/*
+ * Save the current CPU state before suspend / poweroff.
+ */
+int notrace swsusp_arch_suspend(void)
+{
+	return cpu_suspend(0, arch_save_image);
+}
+
+/*
+ * Restore page contents for physical pages that were in use during loading
+ * hibernation image.  Switch to idmap_pgd so the physical page tables
+ * are overwritten with the same contents.
+ */
+static void notrace arch_restore_image(void *unused)
+{
+	struct pbe *pbe;
+
+	cpu_switch_mm(idmap_pgd, &init_mm);
+	for (pbe = restore_pblist; pbe; pbe = pbe->next)
+		copy_page(pbe->orig_address, pbe->address);
+
+	soft_restart(virt_to_phys(cpu_resume));
+}
+
+static u64 resume_stack[PAGE_SIZE/2/sizeof(u64)] __nosavedata;
+
+/*
+ * Resume from the hibernation image.
+ * Due to the kernel heap / data restore, stack contents change underneath
+ * and that would make function calls impossible; switch to a temporary
+ * stack within the nosave region to avoid that problem.
+ */
+int swsusp_arch_resume(void)
+{
+	extern void call_with_stack(void (*fn)(void *), void *arg, void *sp);
+	call_with_stack(arch_restore_image, 0,
+		resume_stack + ARRAY_SIZE(resume_stack));
+	return 0;
+}
diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c
index 9723d17..2c42576 100644
--- a/arch/arm/kernel/irq.c
+++ b/arch/arm/kernel/irq.c
@@ -37,6 +37,7 @@
 #include <linux/proc_fs.h>
 #include <linux/export.h>
 
+#include <asm/hardware/cache-l2x0.h>
 #include <asm/exception.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/irq.h>
@@ -115,10 +116,21 @@
 
 void __init init_IRQ(void)
 {
+	int ret;
+
 	if (IS_ENABLED(CONFIG_OF) && !machine_desc->init_irq)
 		irqchip_init();
 	else
 		machine_desc->init_irq();
+
+	if (IS_ENABLED(CONFIG_OF) && IS_ENABLED(CONFIG_CACHE_L2X0) &&
+	    (machine_desc->l2c_aux_mask || machine_desc->l2c_aux_val)) {
+		outer_cache.write_sec = machine_desc->l2c_write_sec;
+		ret = l2x0_of_init(machine_desc->l2c_aux_val,
+				   machine_desc->l2c_aux_mask);
+		if (ret)
+			pr_err("L2C: failed to init: %d\n", ret);
+	}
 }
 
 #ifdef CONFIG_MULTI_IRQ_HANDLER
diff --git a/arch/arm/kernel/iwmmxt.S b/arch/arm/kernel/iwmmxt.S
index 2452dd1..a5599cf 100644
--- a/arch/arm/kernel/iwmmxt.S
+++ b/arch/arm/kernel/iwmmxt.S
@@ -18,6 +18,7 @@
 #include <asm/ptrace.h>
 #include <asm/thread_info.h>
 #include <asm/asm-offsets.h>
+#include <asm/assembler.h>
 
 #if defined(CONFIG_CPU_PJ4) || defined(CONFIG_CPU_PJ4B)
 #define PJ4(code...)		code
@@ -65,17 +66,18 @@
  * r9  = ret_from_exception
  * lr  = undefined instr exit
  *
- * called from prefetch exception handler with interrupts disabled
+ * called from prefetch exception handler with interrupts enabled
  */
 
 ENTRY(iwmmxt_task_enable)
+	inc_preempt_count r10, r3
 
 	XSC(mrc	p15, 0, r2, c15, c1, 0)
 	PJ4(mrc p15, 0, r2, c1, c0, 2)
 	@ CP0 and CP1 accessible?
 	XSC(tst	r2, #0x3)
 	PJ4(tst	r2, #0xf)
-	movne	pc, lr				@ if so no business here
+	bne	4f				@ if so no business here
 	@ enable access to CP0 and CP1
 	XSC(orr	r2, r2, #0x3)
 	XSC(mcr	p15, 0, r2, c15, c1, 0)
@@ -136,7 +138,7 @@
 	wstrd	wR15, [r1, #MMX_WR15]
 
 2:	teq	r0, #0				@ anything to load?
-	moveq	pc, lr
+	beq	3f
 
 concan_load:
 
@@ -169,8 +171,14 @@
 	@ clear CUP/MUP (only if r1 != 0)
 	teq	r1, #0
 	mov 	r2, #0
-	moveq	pc, lr
+	beq	3f
 	tmcr	wCon, r2
+
+3:
+#ifdef CONFIG_PREEMPT_COUNT
+	get_thread_info r10
+#endif
+4:	dec_preempt_count r10, r3
 	mov	pc, lr
 
 /*
diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index 51798d7..a71ae15 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -221,6 +221,7 @@
  * PMU platform driver and devicetree bindings.
  */
 static struct of_device_id cpu_pmu_of_device_ids[] = {
+	{.compatible = "arm,cortex-a17-pmu",	.data = armv7_a17_pmu_init},
 	{.compatible = "arm,cortex-a15-pmu",	.data = armv7_a15_pmu_init},
 	{.compatible = "arm,cortex-a12-pmu",	.data = armv7_a12_pmu_init},
 	{.compatible = "arm,cortex-a9-pmu",	.data = armv7_a9_pmu_init},
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index f4ef398..2037f72 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -1599,6 +1599,13 @@
 	return 0;
 }
 
+static int armv7_a17_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	armv7_a12_pmu_init(cpu_pmu);
+	cpu_pmu->name = "ARMv7 Cortex-A17";
+	return 0;
+}
+
 /*
  * Krait Performance Monitor Region Event Selection Register (PMRESRn)
  *
@@ -2021,6 +2028,11 @@
 	return -ENODEV;
 }
 
+static inline int armv7_a17_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	return -ENODEV;
+}
+
 static inline int krait_pmu_init(struct arm_pmu *cpu_pmu)
 {
 	return -ENODEV;
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 50e198c..8a16ee5 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -72,6 +72,7 @@
 __setup("fpe=", fpe_setup);
 #endif
 
+extern void init_default_cache_policy(unsigned long);
 extern void paging_init(const struct machine_desc *desc);
 extern void early_paging_init(const struct machine_desc *,
 			      struct proc_info_list *);
@@ -590,7 +591,7 @@
 
 	pr_info("CPU: %s [%08x] revision %d (ARMv%s), cr=%08lx\n",
 		cpu_name, read_cpuid_id(), read_cpuid_id() & 15,
-		proc_arch[cpu_architecture()], cr_alignment);
+		proc_arch[cpu_architecture()], get_cr());
 
 	snprintf(init_utsname()->machine, __NEW_UTS_LEN + 1, "%s%c",
 		 list->arch_name, ENDIANNESS);
@@ -603,7 +604,9 @@
 #ifndef CONFIG_ARM_THUMB
 	elf_hwcap &= ~(HWCAP_THUMB | HWCAP_IDIVT);
 #endif
-
+#ifdef CONFIG_MMU
+	init_default_cache_policy(list->__cpu_mm_mmu_flags);
+#endif
 	erratum_a15_798181_init();
 
 	feat_v6_fixup();
@@ -628,15 +631,8 @@
 
 int __init arm_add_memory(u64 start, u64 size)
 {
-	struct membank *bank = &meminfo.bank[meminfo.nr_banks];
 	u64 aligned_start;
 
-	if (meminfo.nr_banks >= NR_BANKS) {
-		pr_crit("NR_BANKS too low, ignoring memory at 0x%08llx\n",
-			(long long)start);
-		return -EINVAL;
-	}
-
 	/*
 	 * Ensure that start/size are aligned to a page boundary.
 	 * Size is appropriately rounded down, start is rounded up.
@@ -677,17 +673,17 @@
 		aligned_start = PHYS_OFFSET;
 	}
 
-	bank->start = aligned_start;
-	bank->size = size & ~(phys_addr_t)(PAGE_SIZE - 1);
+	start = aligned_start;
+	size = size & ~(phys_addr_t)(PAGE_SIZE - 1);
 
 	/*
 	 * Check whether this memory region has non-zero size or
 	 * invalid node number.
 	 */
-	if (bank->size == 0)
+	if (size == 0)
 		return -EINVAL;
 
-	meminfo.nr_banks++;
+	memblock_add(start, size);
 	return 0;
 }
 
@@ -695,6 +691,7 @@
  * Pick out the memory size.  We look for mem=size@start,
  * where start and size are "size[KkMm]"
  */
+
 static int __init early_mem(char *p)
 {
 	static int usermem __initdata = 0;
@@ -709,7 +706,8 @@
 	 */
 	if (usermem == 0) {
 		usermem = 1;
-		meminfo.nr_banks = 0;
+		memblock_remove(memblock_start_of_DRAM(),
+			memblock_end_of_DRAM() - memblock_start_of_DRAM());
 	}
 
 	start = PHYS_OFFSET;
@@ -854,13 +852,6 @@
 static inline void reserve_crashkernel(void) {}
 #endif /* CONFIG_KEXEC */
 
-static int __init meminfo_cmp(const void *_a, const void *_b)
-{
-	const struct membank *a = _a, *b = _b;
-	long cmp = bank_pfn_start(a) - bank_pfn_start(b);
-	return cmp < 0 ? -1 : cmp > 0 ? 1 : 0;
-}
-
 void __init hyp_mode_check(void)
 {
 #ifdef CONFIG_ARM_VIRT_EXT
@@ -903,12 +894,10 @@
 
 	parse_early_param();
 
-	sort(&meminfo.bank, meminfo.nr_banks, sizeof(meminfo.bank[0]), meminfo_cmp, NULL);
-
 	early_paging_init(mdesc, lookup_processor_type(read_cpuid_id()));
 	setup_dma_zone(mdesc);
 	sanity_check_meminfo();
-	arm_memblock_init(&meminfo, mdesc);
+	arm_memblock_init(mdesc);
 
 	paging_init(mdesc);
 	request_standard_resources(mdesc);
diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S
index b907d9b..1b880db 100644
--- a/arch/arm/kernel/sleep.S
+++ b/arch/arm/kernel/sleep.S
@@ -127,6 +127,10 @@
 	.align
 ENTRY(cpu_resume)
 ARM_BE8(setend be)			@ ensure we are in BE mode
+#ifdef CONFIG_ARM_VIRT_EXT
+	bl	__hyp_stub_install_secondary
+#endif
+	safe_svcmode_maskall r1
 	mov	r1, #0
 	ALT_SMP(mrc p15, 0, r0, c0, c0, 5)
 	ALT_UP_B(1f)
@@ -144,7 +148,6 @@
 	ldr	r0, [r0, #SLEEP_SAVE_SP_PHYS]
 	ldr	r0, [r0, r1, lsl #2]
 
-	setmode	PSR_I_BIT | PSR_F_BIT | SVC_MODE, r1  @ set SVC, irqs off
 	@ load phys pgd, stack, resume fn
   ARM(	ldmia	r0!, {r1, sp, pc}	)
 THUMB(	ldmia	r0!, {r1, r2, r3}	)
diff --git a/arch/arm/kernel/stacktrace.c b/arch/arm/kernel/stacktrace.c
index af4e8c8..f065eb0 100644
--- a/arch/arm/kernel/stacktrace.c
+++ b/arch/arm/kernel/stacktrace.c
@@ -3,6 +3,7 @@
 #include <linux/stacktrace.h>
 
 #include <asm/stacktrace.h>
+#include <asm/traps.h>
 
 #if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND)
 /*
@@ -61,6 +62,7 @@
 #ifdef CONFIG_STACKTRACE
 struct stack_trace_data {
 	struct stack_trace *trace;
+	unsigned long last_pc;
 	unsigned int no_sched_functions;
 	unsigned int skip;
 };
@@ -69,6 +71,7 @@
 {
 	struct stack_trace_data *data = d;
 	struct stack_trace *trace = data->trace;
+	struct pt_regs *regs;
 	unsigned long addr = frame->pc;
 
 	if (data->no_sched_functions && in_sched_functions(addr))
@@ -80,16 +83,39 @@
 
 	trace->entries[trace->nr_entries++] = addr;
 
+	if (trace->nr_entries >= trace->max_entries)
+		return 1;
+
+	/*
+	 * in_exception_text() is designed to test if the PC is one of
+	 * the functions which has an exception stack above it, but
+	 * unfortunately what is in frame->pc is the return LR value,
+	 * not the saved PC value.  So, we need to track the previous
+	 * frame PC value when doing this.
+	 */
+	addr = data->last_pc;
+	data->last_pc = frame->pc;
+	if (!in_exception_text(addr))
+		return 0;
+
+	regs = (struct pt_regs *)frame->sp;
+
+	trace->entries[trace->nr_entries++] = regs->ARM_pc;
+
 	return trace->nr_entries >= trace->max_entries;
 }
 
-void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
+/* This must be noinline to so that our skip calculation works correctly */
+static noinline void __save_stack_trace(struct task_struct *tsk,
+	struct stack_trace *trace, unsigned int nosched)
 {
 	struct stack_trace_data data;
 	struct stackframe frame;
 
 	data.trace = trace;
+	data.last_pc = ULONG_MAX;
 	data.skip = trace->skip;
+	data.no_sched_functions = nosched;
 
 	if (tsk != current) {
 #ifdef CONFIG_SMP
@@ -102,7 +128,6 @@
 			trace->entries[trace->nr_entries++] = ULONG_MAX;
 		return;
 #else
-		data.no_sched_functions = 1;
 		frame.fp = thread_saved_fp(tsk);
 		frame.sp = thread_saved_sp(tsk);
 		frame.lr = 0;		/* recovered from the stack */
@@ -111,11 +136,12 @@
 	} else {
 		register unsigned long current_sp asm ("sp");
 
-		data.no_sched_functions = 0;
+		/* We don't want this function nor the caller */
+		data.skip += 2;
 		frame.fp = (unsigned long)__builtin_frame_address(0);
 		frame.sp = current_sp;
 		frame.lr = (unsigned long)__builtin_return_address(0);
-		frame.pc = (unsigned long)save_stack_trace_tsk;
+		frame.pc = (unsigned long)__save_stack_trace;
 	}
 
 	walk_stackframe(&frame, save_trace, &data);
@@ -123,9 +149,33 @@
 		trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
 
+void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
+{
+	struct stack_trace_data data;
+	struct stackframe frame;
+
+	data.trace = trace;
+	data.skip = trace->skip;
+	data.no_sched_functions = 0;
+
+	frame.fp = regs->ARM_fp;
+	frame.sp = regs->ARM_sp;
+	frame.lr = regs->ARM_lr;
+	frame.pc = regs->ARM_pc;
+
+	walk_stackframe(&frame, save_trace, &data);
+	if (trace->nr_entries < trace->max_entries)
+		trace->entries[trace->nr_entries++] = ULONG_MAX;
+}
+
+void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
+{
+	__save_stack_trace(tsk, trace, 1);
+}
+
 void save_stack_trace(struct stack_trace *trace)
 {
-	save_stack_trace_tsk(current, trace);
+	__save_stack_trace(current, trace, 0);
 }
 EXPORT_SYMBOL_GPL(save_stack_trace);
 #endif
diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c
index 0bc94b1..0fa8825 100644
--- a/arch/arm/kernel/topology.c
+++ b/arch/arm/kernel/topology.c
@@ -91,13 +91,13 @@
 {
 	const struct cpu_efficiency *cpu_eff;
 	struct device_node *cn = NULL;
-	unsigned long min_capacity = (unsigned long)(-1);
+	unsigned long min_capacity = ULONG_MAX;
 	unsigned long max_capacity = 0;
 	unsigned long capacity = 0;
-	int alloc_size, cpu = 0;
+	int cpu = 0;
 
-	alloc_size = nr_cpu_ids * sizeof(*__cpu_capacity);
-	__cpu_capacity = kzalloc(alloc_size, GFP_NOWAIT);
+	__cpu_capacity = kcalloc(nr_cpu_ids, sizeof(*__cpu_capacity),
+				 GFP_NOWAIT);
 
 	for_each_possible_cpu(cpu) {
 		const u32 *rate;
diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c
index 3c21769..cb791ac 100644
--- a/arch/arm/kernel/unwind.c
+++ b/arch/arm/kernel/unwind.c
@@ -285,7 +285,7 @@
 		if (unwind_pop_register(ctrl, &vsp, reg))
 				return -URC_FAILURE;
 
-	if (insn & 0x80)
+	if (insn & 0x8)
 		if (unwind_pop_register(ctrl, &vsp, 14))
 				return -URC_FAILURE;
 
diff --git a/arch/arm/kernel/uprobes.c b/arch/arm/kernel/uprobes.c
index f9bacee..56adf9c 100644
--- a/arch/arm/kernel/uprobes.c
+++ b/arch/arm/kernel/uprobes.c
@@ -113,6 +113,26 @@
 	return 0;
 }
 
+void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
+			   void *src, unsigned long len)
+{
+	void *xol_page_kaddr = kmap_atomic(page);
+	void *dst = xol_page_kaddr + (vaddr & ~PAGE_MASK);
+
+	preempt_disable();
+
+	/* Initialize the slot */
+	memcpy(dst, src, len);
+
+	/* flush caches (dcache/icache) */
+	flush_uprobe_xol_access(page, vaddr, dst, len);
+
+	preempt_enable();
+
+	kunmap_atomic(xol_page_kaddr);
+}
+
+
 int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
 	struct uprobe_task *utask = current->utask;
diff --git a/arch/arm/mach-bcm/bcm_5301x.c b/arch/arm/mach-bcm/bcm_5301x.c
index edff6976..e9bcbdb 100644
--- a/arch/arm/mach-bcm/bcm_5301x.c
+++ b/arch/arm/mach-bcm/bcm_5301x.c
@@ -43,19 +43,14 @@
 			"imprecise external abort");
 }
 
-static void __init bcm5301x_dt_init(void)
-{
-	l2x0_of_init(0, ~0UL);
-	of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
-}
-
 static const char __initconst *bcm5301x_dt_compat[] = {
 	"brcm,bcm4708",
 	NULL,
 };
 
 DT_MACHINE_START(BCM5301X, "BCM5301X")
+	.l2c_aux_val	= 0,
+	.l2c_aux_mask	= ~0,
 	.init_early	= bcm5301x_init_early,
-	.init_machine	= bcm5301x_dt_init,
 	.dt_compat	= bcm5301x_dt_compat,
 MACHINE_END
diff --git a/arch/arm/mach-berlin/berlin.c b/arch/arm/mach-berlin/berlin.c
index 025bcb5..ac181c6 100644
--- a/arch/arm/mach-berlin/berlin.c
+++ b/arch/arm/mach-berlin/berlin.c
@@ -18,16 +18,6 @@
 #include <asm/hardware/cache-l2x0.h>
 #include <asm/mach/arch.h>
 
-static void __init berlin_init_machine(void)
-{
-	/*
-	 * with DT probing for L2CCs, berlin_init_machine can be removed.
-	 * Note: 88DE3005 (Armada 1500-mini) uses pl310 l2cc
-	 */
-	l2x0_of_init(0x70c00000, 0xfeffffff);
-	of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
-}
-
 static const char * const berlin_dt_compat[] = {
 	"marvell,berlin",
 	NULL,
@@ -35,5 +25,10 @@
 
 DT_MACHINE_START(BERLIN_DT, "Marvell Berlin")
 	.dt_compat	= berlin_dt_compat,
-	.init_machine	= berlin_init_machine,
+	/*
+	 * with DT probing for L2CCs, berlin_init_machine can be removed.
+	 * Note: 88DE3005 (Armada 1500-mini) uses pl310 l2cc
+	 */
+	.l2c_aux_val	= 0x30c00000,
+	.l2c_aux_mask	= 0xfeffffff,
 MACHINE_END
diff --git a/arch/arm/mach-clps711x/board-clep7312.c b/arch/arm/mach-clps711x/board-clep7312.c
index 221b9de..94a7add 100644
--- a/arch/arm/mach-clps711x/board-clep7312.c
+++ b/arch/arm/mach-clps711x/board-clep7312.c
@@ -18,6 +18,7 @@
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/string.h>
+#include <linux/memblock.h>
 
 #include <asm/setup.h>
 #include <asm/mach-types.h>
@@ -26,11 +27,9 @@
 #include "common.h"
 
 static void __init
-fixup_clep7312(struct tag *tags, char **cmdline, struct meminfo *mi)
+fixup_clep7312(struct tag *tags, char **cmdline)
 {
-	mi->nr_banks=1;
-	mi->bank[0].start = 0xc0000000;
-	mi->bank[0].size = 0x01000000;
+	memblock_add(0xc0000000, 0x01000000);
 }
 
 MACHINE_START(CLEP7212, "Cirrus Logic 7212/7312")
diff --git a/arch/arm/mach-clps711x/board-edb7211.c b/arch/arm/mach-clps711x/board-edb7211.c
index 0776098..f9828f8 100644
--- a/arch/arm/mach-clps711x/board-edb7211.c
+++ b/arch/arm/mach-clps711x/board-edb7211.c
@@ -16,6 +16,7 @@
 #include <linux/interrupt.h>
 #include <linux/backlight.h>
 #include <linux/platform_device.h>
+#include <linux/memblock.h>
 
 #include <linux/mtd/physmap.h>
 #include <linux/mtd/partitions.h>
@@ -133,7 +134,7 @@
 }
 
 static void __init
-fixup_edb7211(struct tag *tags, char **cmdline, struct meminfo *mi)
+fixup_edb7211(struct tag *tags, char **cmdline)
 {
 	/*
 	 * Bank start addresses are not present in the information
@@ -143,11 +144,8 @@
 	 * Banks sizes _are_ present in the param block, but we're
 	 * not using that information yet.
 	 */
-	mi->bank[0].start = 0xc0000000;
-	mi->bank[0].size = SZ_8M;
-	mi->bank[1].start = 0xc1000000;
-	mi->bank[1].size = SZ_8M;
-	mi->nr_banks = 2;
+	memblock_add(0xc0000000, SZ_8M);
+	memblock_add(0xc1000000, SZ_8M);
 }
 
 static void __init edb7211_init(void)
diff --git a/arch/arm/mach-clps711x/board-p720t.c b/arch/arm/mach-clps711x/board-p720t.c
index 67b7337..0cf0e51 100644
--- a/arch/arm/mach-clps711x/board-p720t.c
+++ b/arch/arm/mach-clps711x/board-p720t.c
@@ -295,7 +295,7 @@
 };
 
 static void __init
-fixup_p720t(struct tag *tag, char **cmdline, struct meminfo *mi)
+fixup_p720t(struct tag *tag, char **cmdline)
 {
 	/*
 	 * Our bootloader doesn't setup any tags (yet).
diff --git a/arch/arm/mach-cns3xxx/core.c b/arch/arm/mach-cns3xxx/core.c
index 2ae28a6..f85449a 100644
--- a/arch/arm/mach-cns3xxx/core.c
+++ b/arch/arm/mach-cns3xxx/core.c
@@ -272,9 +272,9 @@
 	 *
 	 * 1 cycle of latency for setup, read and write accesses
 	 */
-	val = readl(base + L2X0_TAG_LATENCY_CTRL);
+	val = readl(base + L310_TAG_LATENCY_CTRL);
 	val &= 0xfffff888;
-	writel(val, base + L2X0_TAG_LATENCY_CTRL);
+	writel(val, base + L310_TAG_LATENCY_CTRL);
 
 	/*
 	 * Data RAM Control register
@@ -285,12 +285,12 @@
 	 *
 	 * 1 cycle of latency for setup, read and write accesses
 	 */
-	val = readl(base + L2X0_DATA_LATENCY_CTRL);
+	val = readl(base + L310_DATA_LATENCY_CTRL);
 	val &= 0xfffff888;
-	writel(val, base + L2X0_DATA_LATENCY_CTRL);
+	writel(val, base + L310_DATA_LATENCY_CTRL);
 
 	/* 32 KiB, 8-way, parity disable */
-	l2x0_init(base, 0x00540000, 0xfe000fff);
+	l2x0_init(base, 0x00500000, 0xfe0f0fff);
 }
 
 #endif /* CONFIG_CACHE_L2X0 */
diff --git a/arch/arm/mach-ep93xx/crunch-bits.S b/arch/arm/mach-ep93xx/crunch-bits.S
index 0ec9bb4..e96923a 100644
--- a/arch/arm/mach-ep93xx/crunch-bits.S
+++ b/arch/arm/mach-ep93xx/crunch-bits.S
@@ -16,6 +16,7 @@
 #include <asm/ptrace.h>
 #include <asm/thread_info.h>
 #include <asm/asm-offsets.h>
+#include <asm/assembler.h>
 #include <mach/ep93xx-regs.h>
 
 /*
@@ -62,14 +63,16 @@
  * r9  = ret_from_exception
  * lr  = undefined instr exit
  *
- * called from prefetch exception handler with interrupts disabled
+ * called from prefetch exception handler with interrupts enabled
  */
 ENTRY(crunch_task_enable)
+	inc_preempt_count r10, r3
+
 	ldr	r8, =(EP93XX_APB_VIRT_BASE + 0x00130000)	@ syscon addr
 
 	ldr	r1, [r8, #0x80]
 	tst	r1, #0x00800000			@ access to crunch enabled?
-	movne	pc, lr				@ if so no business here
+	bne	2f				@ if so no business here
 	mov	r3, #0xaa			@ unlock syscon swlock
 	str	r3, [r8, #0xc0]
 	orr	r1, r1, #0x00800000		@ enable access to crunch
@@ -142,7 +145,7 @@
 
 	teq		r0, #0				@ anything to load?
 	cfldr64eq	mvdx0, [r1, #CRUNCH_MVDX0]	@ mvdx0 was clobbered
-	moveq		pc, lr
+	beq		1f
 
 crunch_load:
 	cfldr64		mvdx0, [r0, #CRUNCH_DSPSC]	@ load status word
@@ -190,6 +193,11 @@
 	cfldr64		mvdx14, [r0, #CRUNCH_MVDX14]
 	cfldr64		mvdx15, [r0, #CRUNCH_MVDX15]
 
+1:
+#ifdef CONFIG_PREEMPT_COUNT
+	get_thread_info r10
+#endif
+2:	dec_preempt_count r10, r3
 	mov	pc, lr
 
 /*
diff --git a/arch/arm/mach-exynos/common.h b/arch/arm/mach-exynos/common.h
index 9ef3f83..88c619d 100644
--- a/arch/arm/mach-exynos/common.h
+++ b/arch/arm/mach-exynos/common.h
@@ -55,7 +55,6 @@
 	NUM_SYS_POWERDOWN,
 };
 
-extern unsigned long l2x0_regs_phys;
 struct exynos_pmu_conf {
 	void __iomem *reg;
 	unsigned int val[NUM_SYS_POWERDOWN];
diff --git a/arch/arm/mach-exynos/exynos.c b/arch/arm/mach-exynos/exynos.c
index b32a907..a763c08 100644
--- a/arch/arm/mach-exynos/exynos.c
+++ b/arch/arm/mach-exynos/exynos.c
@@ -32,9 +32,6 @@
 #include "mfc.h"
 #include "regs-pmu.h"
 
-#define L2_AUX_VAL 0x7C470001
-#define L2_AUX_MASK 0xC200ffff
-
 static struct map_desc exynos4_iodesc[] __initdata = {
 	{
 		.virtual	= (unsigned long)S3C_VA_SYS,
@@ -319,22 +316,6 @@
 }
 core_initcall(exynos_core_init);
 
-static int __init exynos4_l2x0_cache_init(void)
-{
-	int ret;
-
-	ret = l2x0_of_init(L2_AUX_VAL, L2_AUX_MASK);
-	if (ret)
-		return ret;
-
-	if (IS_ENABLED(CONFIG_S5P_SLEEP)) {
-		l2x0_regs_phys = virt_to_phys(&l2x0_saved_regs);
-		clean_dcache_area(&l2x0_regs_phys, sizeof(unsigned long));
-	}
-	return 0;
-}
-early_initcall(exynos4_l2x0_cache_init);
-
 static void __init exynos_dt_machine_init(void)
 {
 	struct device_node *i2c_np;
@@ -400,6 +381,8 @@
 DT_MACHINE_START(EXYNOS_DT, "SAMSUNG EXYNOS (Flattened Device Tree)")
 	/* Maintainer: Thomas Abraham <thomas.abraham@linaro.org> */
 	/* Maintainer: Kukjin Kim <kgene.kim@samsung.com> */
+	.l2c_aux_val	= 0x3c400001,
+	.l2c_aux_mask	= 0xc20fffff,
 	.smp		= smp_ops(exynos_smp_ops),
 	.map_io		= exynos_init_io,
 	.init_early	= exynos_firmware_init,
diff --git a/arch/arm/mach-exynos/sleep.S b/arch/arm/mach-exynos/sleep.S
index a2613e9..108a45f 100644
--- a/arch/arm/mach-exynos/sleep.S
+++ b/arch/arm/mach-exynos/sleep.S
@@ -16,8 +16,6 @@
  */
 
 #include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/hardware/cache-l2x0.h>
 
 #define CPU_MASK	0xff0ffff0
 #define CPU_CORTEX_A9	0x410fc090
@@ -53,33 +51,7 @@
 	and	r0, r0, r1
 	ldr	r1, =CPU_CORTEX_A9
 	cmp	r0, r1
-	bne	skip_l2_resume
-	adr	r0, l2x0_regs_phys
-	ldr	r0, [r0]
-	cmp	r0, #0
-	beq	skip_l2_resume
-	ldr	r1, [r0, #L2X0_R_PHY_BASE]
-	ldr	r2, [r1, #L2X0_CTRL]
-	tst	r2, #0x1
-	bne	skip_l2_resume
-	ldr	r2, [r0, #L2X0_R_AUX_CTRL]
-	str	r2, [r1, #L2X0_AUX_CTRL]
-	ldr	r2, [r0, #L2X0_R_TAG_LATENCY]
-	str	r2, [r1, #L2X0_TAG_LATENCY_CTRL]
-	ldr	r2, [r0, #L2X0_R_DATA_LATENCY]
-	str	r2, [r1, #L2X0_DATA_LATENCY_CTRL]
-	ldr	r2, [r0, #L2X0_R_PREFETCH_CTRL]
-	str	r2, [r1, #L2X0_PREFETCH_CTRL]
-	ldr	r2, [r0, #L2X0_R_PWR_CTRL]
-	str	r2, [r1, #L2X0_POWER_CTRL]
-	mov	r2, #1
-	str	r2, [r1, #L2X0_CTRL]
-skip_l2_resume:
+	bleq	l2c310_early_resume
 #endif
 	b	cpu_resume
 ENDPROC(exynos_cpu_resume)
-#ifdef CONFIG_CACHE_L2X0
-	.globl l2x0_regs_phys
-l2x0_regs_phys:
-	.long	0
-#endif
diff --git a/arch/arm/mach-footbridge/cats-hw.c b/arch/arm/mach-footbridge/cats-hw.c
index da04150..8f05489 100644
--- a/arch/arm/mach-footbridge/cats-hw.c
+++ b/arch/arm/mach-footbridge/cats-hw.c
@@ -76,7 +76,7 @@
  * hard reboots fail on early boards.
  */
 static void __init
-fixup_cats(struct tag *tags, char **cmdline, struct meminfo *mi)
+fixup_cats(struct tag *tags, char **cmdline)
 {
 #if defined(CONFIG_VGA_CONSOLE) || defined(CONFIG_DUMMY_CONSOLE)
 	screen_info.orig_video_lines  = 25;
diff --git a/arch/arm/mach-footbridge/netwinder-hw.c b/arch/arm/mach-footbridge/netwinder-hw.c
index eb1fa5c..cdee08c 100644
--- a/arch/arm/mach-footbridge/netwinder-hw.c
+++ b/arch/arm/mach-footbridge/netwinder-hw.c
@@ -620,7 +620,7 @@
  * the parameter page.
  */
 static void __init
-fixup_netwinder(struct tag *tags, char **cmdline, struct meminfo *mi)
+fixup_netwinder(struct tag *tags, char **cmdline)
 {
 #ifdef CONFIG_ISAPNP
 	extern int isapnp_disable;
diff --git a/arch/arm/mach-highbank/highbank.c b/arch/arm/mach-highbank/highbank.c
index c7de89b..8c35ae4 100644
--- a/arch/arm/mach-highbank/highbank.c
+++ b/arch/arm/mach-highbank/highbank.c
@@ -51,11 +51,13 @@
 }
 
 
-static void highbank_l2x0_disable(void)
+static void highbank_l2c310_write_sec(unsigned long val, unsigned reg)
 {
-	outer_flush_all();
-	/* Disable PL310 L2 Cache controller */
-	highbank_smc1(0x102, 0x0);
+	if (reg == L2X0_CTRL)
+		highbank_smc1(0x102, val);
+	else
+		WARN_ONCE(1, "Highbank L2C310: ignoring write to reg 0x%x\n",
+			  reg);
 }
 
 static void __init highbank_init_irq(void)
@@ -64,14 +66,6 @@
 
 	if (of_find_compatible_node(NULL, NULL, "arm,cortex-a9"))
 		highbank_scu_map_io();
-
-	/* Enable PL310 L2 Cache controller */
-	if (IS_ENABLED(CONFIG_CACHE_L2X0) &&
-	    of_find_compatible_node(NULL, NULL, "arm,pl310-cache")) {
-		highbank_smc1(0x102, 0x1);
-		l2x0_of_init(0, ~0UL);
-		outer_cache.disable = highbank_l2x0_disable;
-	}
 }
 
 static void highbank_power_off(void)
@@ -185,6 +179,9 @@
 #if defined(CONFIG_ZONE_DMA) && defined(CONFIG_ARM_LPAE)
 	.dma_zone_size	= (4ULL * SZ_1G),
 #endif
+	.l2c_aux_val	= 0,
+	.l2c_aux_mask	= ~0,
+	.l2c_write_sec	= highbank_l2c310_write_sec,
 	.init_irq	= highbank_init_irq,
 	.init_machine	= highbank_init,
 	.dt_compat	= highbank_match,
diff --git a/arch/arm/mach-imx/mach-vf610.c b/arch/arm/mach-imx/mach-vf610.c
index 2d8aef5..c446027 100644
--- a/arch/arm/mach-imx/mach-vf610.c
+++ b/arch/arm/mach-imx/mach-vf610.c
@@ -20,19 +20,14 @@
 	of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
 }
 
-static void __init vf610_init_irq(void)
-{
-	l2x0_of_init(0, ~0UL);
-	irqchip_init();
-}
-
 static const char *vf610_dt_compat[] __initconst = {
 	"fsl,vf610",
 	NULL,
 };
 
 DT_MACHINE_START(VYBRID_VF610, "Freescale Vybrid VF610 (Device Tree)")
-	.init_irq	= vf610_init_irq,
+	.l2c_aux_val	= 0,
+	.l2c_aux_mask	= ~0,
 	.init_machine   = vf610_init_machine,
 	.dt_compat	= vf610_dt_compat,
 	.restart	= mxc_restart,
diff --git a/arch/arm/mach-imx/suspend-imx6.S b/arch/arm/mach-imx/suspend-imx6.S
index 20048ff..fe123b0 100644
--- a/arch/arm/mach-imx/suspend-imx6.S
+++ b/arch/arm/mach-imx/suspend-imx6.S
@@ -334,28 +334,10 @@
  * turned into relative ones.
  */
 
-#ifdef CONFIG_CACHE_L2X0
-	.macro	pl310_resume
-	adr	r0, l2x0_saved_regs_offset
-	ldr	r2, [r0]
-	add	r2, r2, r0
-	ldr	r0, [r2, #L2X0_R_PHY_BASE]	@ get physical base of l2x0
-	ldr	r1, [r2, #L2X0_R_AUX_CTRL]	@ get aux_ctrl value
-	str	r1, [r0, #L2X0_AUX_CTRL]	@ restore aux_ctrl
-	mov	r1, #0x1
-	str	r1, [r0, #L2X0_CTRL]		@ re-enable L2
-	.endm
-
-l2x0_saved_regs_offset:
-	.word	l2x0_saved_regs - .
-
-#else
-	.macro	pl310_resume
-	.endm
-#endif
-
 ENTRY(v7_cpu_resume)
 	bl	v7_invalidate_l1
-	pl310_resume
+#ifdef CONFIG_CACHE_L2X0
+	bl	l2c310_early_resume
+#endif
 	b	cpu_resume
 ENDPROC(v7_cpu_resume)
diff --git a/arch/arm/mach-imx/system.c b/arch/arm/mach-imx/system.c
index 5e3027d..3b0733e 100644
--- a/arch/arm/mach-imx/system.c
+++ b/arch/arm/mach-imx/system.c
@@ -124,7 +124,7 @@
 	}
 
 	/* Configure the L2 PREFETCH and POWER registers */
-	val = readl_relaxed(l2x0_base + L2X0_PREFETCH_CTRL);
+	val = readl_relaxed(l2x0_base + L310_PREFETCH_CTRL);
 	val |= 0x70800000;
 	/*
 	 * The L2 cache controller(PL310) version on the i.MX6D/Q is r3p1-50rel0
@@ -137,14 +137,12 @@
 	 */
 	if (cpu_is_imx6q())
 		val &= ~(1 << 30 | 1 << 23);
-	writel_relaxed(val, l2x0_base + L2X0_PREFETCH_CTRL);
-	val = L2X0_DYNAMIC_CLK_GATING_EN | L2X0_STNDBY_MODE_EN;
-	writel_relaxed(val, l2x0_base + L2X0_POWER_CTRL);
+	writel_relaxed(val, l2x0_base + L310_PREFETCH_CTRL);
 
 	iounmap(l2x0_base);
 	of_node_put(np);
 
 out:
-	l2x0_of_init(0, ~0UL);
+	l2x0_of_init(0, ~0);
 }
 #endif
diff --git a/arch/arm/mach-msm/board-halibut.c b/arch/arm/mach-msm/board-halibut.c
index a775298..61bfe58 100644
--- a/arch/arm/mach-msm/board-halibut.c
+++ b/arch/arm/mach-msm/board-halibut.c
@@ -83,11 +83,6 @@
 	platform_add_devices(devices, ARRAY_SIZE(devices));
 }
 
-static void __init halibut_fixup(struct tag *tags, char **cmdline,
-				 struct meminfo *mi)
-{
-}
-
 static void __init halibut_map_io(void)
 {
 	msm_map_common_io();
@@ -100,7 +95,6 @@
 
 MACHINE_START(HALIBUT, "Halibut Board (QCT SURF7200A)")
 	.atag_offset	= 0x100,
-	.fixup		= halibut_fixup,
 	.map_io		= halibut_map_io,
 	.init_early	= halibut_init_early,
 	.init_irq	= halibut_init_irq,
diff --git a/arch/arm/mach-msm/board-mahimahi.c b/arch/arm/mach-msm/board-mahimahi.c
index 7d9981c..873c3ca 100644
--- a/arch/arm/mach-msm/board-mahimahi.c
+++ b/arch/arm/mach-msm/board-mahimahi.c
@@ -22,6 +22,7 @@
 #include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
+#include <linux/memblock.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
@@ -52,16 +53,10 @@
 	platform_add_devices(devices, ARRAY_SIZE(devices));
 }
 
-static void __init mahimahi_fixup(struct tag *tags, char **cmdline,
-				  struct meminfo *mi)
+static void __init mahimahi_fixup(struct tag *tags, char **cmdline)
 {
-	mi->nr_banks = 2;
-	mi->bank[0].start = PHYS_OFFSET;
-	mi->bank[0].node = PHYS_TO_NID(PHYS_OFFSET);
-	mi->bank[0].size = (219*1024*1024);
-	mi->bank[1].start = MSM_HIGHMEM_BASE;
-	mi->bank[1].node = PHYS_TO_NID(MSM_HIGHMEM_BASE);
-	mi->bank[1].size = MSM_HIGHMEM_SIZE;
+	memblock_add(PHYS_OFFSET, 219*SZ_1M);
+	memblock_add(MSM_HIGHMEM_BASE, MSM_HIGHMEM_SIZE);
 }
 
 static void __init mahimahi_map_io(void)
diff --git a/arch/arm/mach-msm/board-msm7x30.c b/arch/arm/mach-msm/board-msm7x30.c
index 46de789..b621b23 100644
--- a/arch/arm/mach-msm/board-msm7x30.c
+++ b/arch/arm/mach-msm/board-msm7x30.c
@@ -40,8 +40,7 @@
 #include "proc_comm.h"
 #include "common.h"
 
-static void __init msm7x30_fixup(struct tag *tag, char **cmdline,
-		struct meminfo *mi)
+static void __init msm7x30_fixup(struct tag *tag, char **cmdline)
 {
 	for (; tag->hdr.size; tag = tag_next(tag))
 		if (tag->hdr.tag == ATAG_MEM && tag->u.mem.start == 0x200000) {
diff --git a/arch/arm/mach-msm/board-sapphire.c b/arch/arm/mach-msm/board-sapphire.c
index 3276051..e509679 100644
--- a/arch/arm/mach-msm/board-sapphire.c
+++ b/arch/arm/mach-msm/board-sapphire.c
@@ -35,6 +35,7 @@
 
 #include <linux/mtd/nand.h>
 #include <linux/mtd/partitions.h>
+#include <linux/memblock.h>
 
 #include "gpio_chip.h"
 #include "board-sapphire.h"
@@ -74,22 +75,18 @@
 	}
 };
 
-static void __init sapphire_fixup(struct tag *tags, char **cmdline,
-				  struct meminfo *mi)
+static void __init sapphire_fixup(struct tag *tags, char **cmdline)
 {
 	int smi_sz = parse_tag_smi((const struct tag *)tags);
 
-	mi->nr_banks = 1;
-	mi->bank[0].start = PHYS_OFFSET;
-	mi->bank[0].node = PHYS_TO_NID(PHYS_OFFSET);
 	if (smi_sz == 32) {
-		mi->bank[0].size = (84*1024*1024);
+		memblock_add(PHYS_OFFSET, 84*SZ_1M);
 	} else if (smi_sz == 64) {
-		mi->bank[0].size = (101*1024*1024);
+		memblock_add(PHYS_OFFSET, 101*SZ_1M);
 	} else {
+		memblock_add(PHYS_OFFSET, 101*SZ_1M);
 		/* Give a default value when not get smi size */
 		smi_sz = 64;
-		mi->bank[0].size = (101*1024*1024);
 	}
 }
 
diff --git a/arch/arm/mach-msm/board-trout.c b/arch/arm/mach-msm/board-trout.c
index 015d544..58826cf 100644
--- a/arch/arm/mach-msm/board-trout.c
+++ b/arch/arm/mach-msm/board-trout.c
@@ -19,6 +19,7 @@
 #include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/clkdev.h>
+#include <linux/memblock.h>
 
 #include <asm/system_info.h>
 #include <asm/mach-types.h>
@@ -55,12 +56,9 @@
 	msm_init_irq();
 }
 
-static void __init trout_fixup(struct tag *tags, char **cmdline,
-			       struct meminfo *mi)
+static void __init trout_fixup(struct tag *tags, char **cmdline)
 {
-	mi->nr_banks = 1;
-	mi->bank[0].start = PHYS_OFFSET;
-	mi->bank[0].size = (101*1024*1024);
+	memblock_add(PHYS_OFFSET, 101*SZ_1M);
 }
 
 static void __init trout_init(void)
diff --git a/arch/arm/mach-mvebu/board-v7.c b/arch/arm/mach-mvebu/board-v7.c
index 333fca8..48169ca 100644
--- a/arch/arm/mach-mvebu/board-v7.c
+++ b/arch/arm/mach-mvebu/board-v7.c
@@ -59,9 +59,6 @@
 	clocksource_of_init();
 	coherency_init();
 	BUG_ON(mvebu_mbus_dt_init());
-#ifdef CONFIG_CACHE_L2X0
-	l2x0_of_init(0, ~0UL);
-#endif
 
 	if (of_machine_is_compatible("marvell,armada375"))
 		hook_fault_code(16 + 6, armada_375_external_abort_wa, SIGBUS, 0,
@@ -109,6 +106,8 @@
 };
 
 DT_MACHINE_START(ARMADA_370_XP_DT, "Marvell Armada 370/XP (Device Tree)")
+	.l2c_aux_val	= 0,
+	.l2c_aux_mask	= ~0,
 	.smp		= smp_ops(armada_xp_smp_ops),
 	.init_machine	= mvebu_dt_init,
 	.init_time	= mvebu_timer_and_clk_init,
@@ -122,6 +121,8 @@
 };
 
 DT_MACHINE_START(ARMADA_375_DT, "Marvell Armada 375 (Device Tree)")
+	.l2c_aux_val	= 0,
+	.l2c_aux_mask	= ~0,
 	.init_time	= mvebu_timer_and_clk_init,
 	.restart	= mvebu_restart,
 	.dt_compat	= armada_375_dt_compat,
@@ -134,6 +135,8 @@
 };
 
 DT_MACHINE_START(ARMADA_38X_DT, "Marvell Armada 380/385 (Device Tree)")
+	.l2c_aux_val	= 0,
+	.l2c_aux_mask	= ~0,
 	.init_time	= mvebu_timer_and_clk_init,
 	.restart	= mvebu_restart,
 	.dt_compat	= armada_38x_dt_compat,
diff --git a/arch/arm/mach-nomadik/cpu-8815.c b/arch/arm/mach-nomadik/cpu-8815.c
index 4a1065e..9116ca4 100644
--- a/arch/arm/mach-nomadik/cpu-8815.c
+++ b/arch/arm/mach-nomadik/cpu-8815.c
@@ -143,23 +143,16 @@
 }
 device_initcall(cpu8815_mmcsd_init);
 
-static void __init cpu8815_init_of(void)
-{
-#ifdef CONFIG_CACHE_L2X0
-	/* At full speed latency must be >=2, so 0x249 in low bits */
-	l2x0_of_init(0x00730249, 0xfe000fff);
-#endif
-	of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
-}
-
 static const char * cpu8815_board_compat[] = {
 	"calaosystems,usb-s8815",
 	NULL,
 };
 
 DT_MACHINE_START(NOMADIK_DT, "Nomadik STn8815")
+	/* At full speed latency must be >=2, so 0x249 in low bits */
+	.l2c_aux_val	= 0x00700249,
+	.l2c_aux_mask	= 0xfe0fefff,
 	.map_io		= cpu8815_map_io,
-	.init_machine	= cpu8815_init_of,
 	.restart	= cpu8815_restart,
 	.dt_compat      = cpu8815_board_compat,
 MACHINE_END
diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig
index cb31d43..0ba4826 100644
--- a/arch/arm/mach-omap2/Kconfig
+++ b/arch/arm/mach-omap2/Kconfig
@@ -65,6 +65,7 @@
 	select ARCH_HAS_OPP
 	select ARM_GIC
 	select MACH_OMAP_GENERIC
+	select MIGHT_HAVE_CACHE_L2X0
 
 config SOC_DRA7XX
 	bool "TI DRA7XX"
diff --git a/arch/arm/mach-omap2/common.h b/arch/arm/mach-omap2/common.h
index d88aff7..ff02973 100644
--- a/arch/arm/mach-omap2/common.h
+++ b/arch/arm/mach-omap2/common.h
@@ -91,6 +91,7 @@
 extern void omap3_secure_sync32k_timer_init(void);
 extern void omap3_gptimer_timer_init(void);
 extern void omap4_local_timer_init(void);
+int omap_l2_cache_init(void);
 extern void omap5_realtime_timer_init(void);
 
 void omap2420_init_early(void);
diff --git a/arch/arm/mach-omap2/io.c b/arch/arm/mach-omap2/io.c
index f14f9ac..4e2df49 100644
--- a/arch/arm/mach-omap2/io.c
+++ b/arch/arm/mach-omap2/io.c
@@ -609,6 +609,7 @@
 	am43xx_clockdomains_init();
 	am43xx_hwmod_init();
 	omap_hwmod_init_postsetup();
+	omap_l2_cache_init();
 	omap_clk_soc_init = am43xx_dt_clk_init;
 }
 
@@ -640,6 +641,7 @@
 	omap44xx_clockdomains_init();
 	omap44xx_hwmod_init();
 	omap_hwmod_init_postsetup();
+	omap_l2_cache_init();
 	omap_clk_soc_init = omap4xxx_dt_clk_init;
 }
 
diff --git a/arch/arm/mach-omap2/omap-mpuss-lowpower.c b/arch/arm/mach-omap2/omap-mpuss-lowpower.c
index 667915d..61cb77f 100644
--- a/arch/arm/mach-omap2/omap-mpuss-lowpower.c
+++ b/arch/arm/mach-omap2/omap-mpuss-lowpower.c
@@ -187,19 +187,15 @@
  * in every restore MPUSS OFF path.
  */
 #ifdef CONFIG_CACHE_L2X0
-static void save_l2x0_context(void)
+static void __init save_l2x0_context(void)
 {
-	u32 val;
-	void __iomem *l2x0_base = omap4_get_l2cache_base();
-	if (l2x0_base) {
-		val = __raw_readl(l2x0_base + L2X0_AUX_CTRL);
-		__raw_writel(val, sar_base + L2X0_AUXCTRL_OFFSET);
-		val = __raw_readl(l2x0_base + L2X0_PREFETCH_CTRL);
-		__raw_writel(val, sar_base + L2X0_PREFETCH_CTRL_OFFSET);
-	}
+	__raw_writel(l2x0_saved_regs.aux_ctrl,
+		     sar_base + L2X0_AUXCTRL_OFFSET);
+	__raw_writel(l2x0_saved_regs.prefetch_ctrl,
+		     sar_base + L2X0_PREFETCH_CTRL_OFFSET);
 }
 #else
-static void save_l2x0_context(void)
+static void __init save_l2x0_context(void)
 {}
 #endif
 
diff --git a/arch/arm/mach-omap2/omap4-common.c b/arch/arm/mach-omap2/omap4-common.c
index 95e171a..c41ff8b 100644
--- a/arch/arm/mach-omap2/omap4-common.c
+++ b/arch/arm/mach-omap2/omap4-common.c
@@ -167,75 +167,57 @@
 	return l2cache_base;
 }
 
-static void omap4_l2x0_disable(void)
+static void omap4_l2c310_write_sec(unsigned long val, unsigned reg)
 {
-	outer_flush_all();
-	/* Disable PL310 L2 Cache controller */
-	omap_smc1(0x102, 0x0);
+	unsigned smc_op;
+
+	switch (reg) {
+	case L2X0_CTRL:
+		smc_op = OMAP4_MON_L2X0_CTRL_INDEX;
+		break;
+
+	case L2X0_AUX_CTRL:
+		smc_op = OMAP4_MON_L2X0_AUXCTRL_INDEX;
+		break;
+
+	case L2X0_DEBUG_CTRL:
+		smc_op = OMAP4_MON_L2X0_DBG_CTRL_INDEX;
+		break;
+
+	case L310_PREFETCH_CTRL:
+		smc_op = OMAP4_MON_L2X0_PREFETCH_INDEX;
+		break;
+
+	default:
+		WARN_ONCE(1, "OMAP L2C310: ignoring write to reg 0x%x\n", reg);
+		return;
+	}
+
+	omap_smc1(smc_op, val);
 }
 
-static void omap4_l2x0_set_debug(unsigned long val)
+int __init omap_l2_cache_init(void)
 {
-	/* Program PL310 L2 Cache controller debug register */
-	omap_smc1(0x100, val);
-}
-
-static int __init omap_l2_cache_init(void)
-{
-	u32 aux_ctrl = 0;
-
-	/*
-	 * To avoid code running on other OMAPs in
-	 * multi-omap builds
-	 */
-	if (!cpu_is_omap44xx())
-		return -ENODEV;
+	u32 aux_ctrl;
 
 	/* Static mapping, never released */
 	l2cache_base = ioremap(OMAP44XX_L2CACHE_BASE, SZ_4K);
 	if (WARN_ON(!l2cache_base))
 		return -ENOMEM;
 
-	/*
-	 * 16-way associativity, parity disabled
-	 * Way size - 32KB (es1.0)
-	 * Way size - 64KB (es2.0 +)
-	 */
-	aux_ctrl = ((1 << L2X0_AUX_CTRL_ASSOCIATIVITY_SHIFT) |
-			(0x1 << 25) |
-			(0x1 << L2X0_AUX_CTRL_NS_LOCKDOWN_SHIFT) |
-			(0x1 << L2X0_AUX_CTRL_NS_INT_CTRL_SHIFT));
+	/* 16-way associativity, parity disabled, way size - 64KB (es2.0 +) */
+	aux_ctrl = L2C_AUX_CTRL_SHARED_OVERRIDE |
+		   L310_AUX_CTRL_DATA_PREFETCH |
+		   L310_AUX_CTRL_INSTR_PREFETCH;
 
-	if (omap_rev() == OMAP4430_REV_ES1_0) {
-		aux_ctrl |= 0x2 << L2X0_AUX_CTRL_WAY_SIZE_SHIFT;
-	} else {
-		aux_ctrl |= ((0x3 << L2X0_AUX_CTRL_WAY_SIZE_SHIFT) |
-			(1 << L2X0_AUX_CTRL_SHARE_OVERRIDE_SHIFT) |
-			(1 << L2X0_AUX_CTRL_DATA_PREFETCH_SHIFT) |
-			(1 << L2X0_AUX_CTRL_INSTR_PREFETCH_SHIFT) |
-			(1 << L2X0_AUX_CTRL_EARLY_BRESP_SHIFT));
-	}
-	if (omap_rev() != OMAP4430_REV_ES1_0)
-		omap_smc1(0x109, aux_ctrl);
-
-	/* Enable PL310 L2 Cache controller */
-	omap_smc1(0x102, 0x1);
-
+	outer_cache.write_sec = omap4_l2c310_write_sec;
 	if (of_have_populated_dt())
-		l2x0_of_init(aux_ctrl, L2X0_AUX_CTRL_MASK);
+		l2x0_of_init(aux_ctrl, 0xcf9fffff);
 	else
-		l2x0_init(l2cache_base, aux_ctrl, L2X0_AUX_CTRL_MASK);
-
-	/*
-	 * Override default outer_cache.disable with a OMAP4
-	 * specific one
-	*/
-	outer_cache.disable = omap4_l2x0_disable;
-	outer_cache.set_debug = omap4_l2x0_set_debug;
+		l2x0_init(l2cache_base, aux_ctrl, 0xcf9fffff);
 
 	return 0;
 }
-omap_early_initcall(omap_l2_cache_init);
 #endif
 
 void __iomem *omap4_get_sar_ram_base(void)
diff --git a/arch/arm/mach-orion5x/common.c b/arch/arm/mach-orion5x/common.c
index 3f1de11..6bbb7b5 100644
--- a/arch/arm/mach-orion5x/common.c
+++ b/arch/arm/mach-orion5x/common.c
@@ -365,8 +365,7 @@
  * Many orion-based systems have buggy bootloader implementations.
  * This is a common fixup for bogus memory tags.
  */
-void __init tag_fixup_mem32(struct tag *t, char **from,
-			    struct meminfo *meminfo)
+void __init tag_fixup_mem32(struct tag *t, char **from)
 {
 	for (; t->hdr.size; t = tag_next(t))
 		if (t->hdr.tag == ATAG_MEM &&
diff --git a/arch/arm/mach-orion5x/common.h b/arch/arm/mach-orion5x/common.h
index 7548db2..ca38030 100644
--- a/arch/arm/mach-orion5x/common.h
+++ b/arch/arm/mach-orion5x/common.h
@@ -71,9 +71,8 @@
 static inline void edmini_v2_init(void) {};
 #endif
 
-struct meminfo;
 struct tag;
-extern void __init tag_fixup_mem32(struct tag *, char **, struct meminfo *);
+extern void __init tag_fixup_mem32(struct tag *, char **);
 
 /*****************************************************************************
  * Helpers to access Orion registers
diff --git a/arch/arm/mach-prima2/Makefile b/arch/arm/mach-prima2/Makefile
index 7a6b4a3..8846e7d 100644
--- a/arch/arm/mach-prima2/Makefile
+++ b/arch/arm/mach-prima2/Makefile
@@ -2,7 +2,6 @@
 obj-y += common.o
 obj-y += rtciobrg.o
 obj-$(CONFIG_DEBUG_LL) += lluart.o
-obj-$(CONFIG_CACHE_L2X0) += l2x0.o
 obj-$(CONFIG_SUSPEND) += pm.o sleep.o
 obj-$(CONFIG_SMP) += platsmp.o headsmp.o
 obj-$(CONFIG_HOTPLUG_CPU)  += hotplug.o
diff --git a/arch/arm/mach-prima2/common.c b/arch/arm/mach-prima2/common.c
index 47c7819..a860ea2 100644
--- a/arch/arm/mach-prima2/common.c
+++ b/arch/arm/mach-prima2/common.c
@@ -34,6 +34,8 @@
 
 DT_MACHINE_START(ATLAS6_DT, "Generic ATLAS6 (Flattened Device Tree)")
 	/* Maintainer: Barry Song <baohua.song@csr.com> */
+	.l2c_aux_val	= 0,
+	.l2c_aux_mask	= ~0,
 	.map_io         = sirfsoc_map_io,
 	.init_late	= sirfsoc_init_late,
 	.dt_compat      = atlas6_dt_match,
@@ -48,6 +50,8 @@
 
 DT_MACHINE_START(PRIMA2_DT, "Generic PRIMA2 (Flattened Device Tree)")
 	/* Maintainer: Barry Song <baohua.song@csr.com> */
+	.l2c_aux_val	= 0,
+	.l2c_aux_mask	= ~0,
 	.map_io         = sirfsoc_map_io,
 	.dma_zone_size	= SZ_256M,
 	.init_late	= sirfsoc_init_late,
@@ -63,6 +67,8 @@
 
 DT_MACHINE_START(MARCO_DT, "Generic MARCO (Flattened Device Tree)")
 	/* Maintainer: Barry Song <baohua.song@csr.com> */
+	.l2c_aux_val	= 0,
+	.l2c_aux_mask	= ~0,
 	.smp            = smp_ops(sirfsoc_smp_ops),
 	.map_io         = sirfsoc_map_io,
 	.init_late	= sirfsoc_init_late,
diff --git a/arch/arm/mach-prima2/l2x0.c b/arch/arm/mach-prima2/l2x0.c
deleted file mode 100644
index c710253..0000000
--- a/arch/arm/mach-prima2/l2x0.c
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * l2 cache initialization for CSR SiRFprimaII
- *
- * Copyright (c) 2011 Cambridge Silicon Radio Limited, a CSR plc group company.
- *
- * Licensed under GPLv2 or later.
- */
-
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/of.h>
-#include <asm/hardware/cache-l2x0.h>
-
-struct l2x0_aux {
-	u32 val;
-	u32 mask;
-};
-
-static const struct l2x0_aux prima2_l2x0_aux __initconst = {
-	.val = 2 << L2X0_AUX_CTRL_WAY_SIZE_SHIFT,
-	.mask =	0,
-};
-
-static const struct l2x0_aux marco_l2x0_aux __initconst = {
-	.val = (2 << L2X0_AUX_CTRL_WAY_SIZE_SHIFT) |
-		(1 << L2X0_AUX_CTRL_ASSOCIATIVITY_SHIFT),
-	.mask = L2X0_AUX_CTRL_MASK,
-};
-
-static const struct of_device_id sirf_l2x0_ids[] __initconst = {
-	{ .compatible = "sirf,prima2-pl310-cache", .data = &prima2_l2x0_aux, },
-	{ .compatible = "sirf,marco-pl310-cache", .data = &marco_l2x0_aux, },
-	{},
-};
-
-static int __init sirfsoc_l2x0_init(void)
-{
-	struct device_node *np;
-	const struct l2x0_aux *aux;
-
-	np = of_find_matching_node(NULL, sirf_l2x0_ids);
-	if (np) {
-		aux = of_match_node(sirf_l2x0_ids, np)->data;
-		return l2x0_of_init(aux->val, aux->mask);
-	}
-
-	return 0;
-}
-early_initcall(sirfsoc_l2x0_init);
diff --git a/arch/arm/mach-prima2/pm.c b/arch/arm/mach-prima2/pm.c
index c4525a8..96e9bc1 100644
--- a/arch/arm/mach-prima2/pm.c
+++ b/arch/arm/mach-prima2/pm.c
@@ -71,7 +71,6 @@
 	case PM_SUSPEND_MEM:
 		sirfsoc_pre_suspend_power_off();
 
-		outer_flush_all();
 		outer_disable();
 		/* go zzz */
 		cpu_suspend(0, sirfsoc_finish_suspend);
diff --git a/arch/arm/mach-pxa/cm-x300.c b/arch/arm/mach-pxa/cm-x300.c
index 584439b..4d3588d 100644
--- a/arch/arm/mach-pxa/cm-x300.c
+++ b/arch/arm/mach-pxa/cm-x300.c
@@ -837,8 +837,7 @@
 	cm_x300_init_bl();
 }
 
-static void __init cm_x300_fixup(struct tag *tags, char **cmdline,
-				 struct meminfo *mi)
+static void __init cm_x300_fixup(struct tag *tags, char **cmdline)
 {
 	/* Make sure that mi->bank[0].start = PHYS_ADDR */
 	for (; tags->hdr.size; tags = tag_next(tags))
diff --git a/arch/arm/mach-pxa/corgi.c b/arch/arm/mach-pxa/corgi.c
index 57d6054..91dd1c7 100644
--- a/arch/arm/mach-pxa/corgi.c
+++ b/arch/arm/mach-pxa/corgi.c
@@ -34,6 +34,7 @@
 #include <linux/input/matrix_keypad.h>
 #include <linux/gpio_keys.h>
 #include <linux/module.h>
+#include <linux/memblock.h>
 #include <video/w100fb.h>
 
 #include <asm/setup.h>
@@ -753,16 +754,13 @@
 	platform_add_devices(devices, ARRAY_SIZE(devices));
 }
 
-static void __init fixup_corgi(struct tag *tags, char **cmdline,
-			       struct meminfo *mi)
+static void __init fixup_corgi(struct tag *tags, char **cmdline)
 {
 	sharpsl_save_param();
-	mi->nr_banks=1;
-	mi->bank[0].start = 0xa0000000;
 	if (machine_is_corgi())
-		mi->bank[0].size = (32*1024*1024);
+		memblock_add(0xa0000000, SZ_32M);
 	else
-		mi->bank[0].size = (64*1024*1024);
+		memblock_add(0xa0000000, SZ_64M);
 }
 
 #ifdef CONFIG_MACH_CORGI
diff --git a/arch/arm/mach-pxa/eseries.c b/arch/arm/mach-pxa/eseries.c
index 8280ebc..cfb8641 100644
--- a/arch/arm/mach-pxa/eseries.c
+++ b/arch/arm/mach-pxa/eseries.c
@@ -21,6 +21,7 @@
 #include <linux/mtd/nand.h>
 #include <linux/mtd/partitions.h>
 #include <linux/usb/gpio_vbus.h>
+#include <linux/memblock.h>
 
 #include <video/w100fb.h>
 
@@ -41,14 +42,12 @@
 #include "clock.h"
 
 /* Only e800 has 128MB RAM */
-void __init eseries_fixup(struct tag *tags, char **cmdline, struct meminfo *mi)
+void __init eseries_fixup(struct tag *tags, char **cmdline)
 {
-	mi->nr_banks=1;
-	mi->bank[0].start = 0xa0000000;
 	if (machine_is_e800())
-		mi->bank[0].size = (128*1024*1024);
+		memblock_add(0xa0000000, SZ_128M);
 	else
-		mi->bank[0].size = (64*1024*1024);
+		memblock_add(0xa0000000, SZ_64M);
 }
 
 struct gpio_vbus_mach_info e7xx_udc_info = {
diff --git a/arch/arm/mach-pxa/poodle.c b/arch/arm/mach-pxa/poodle.c
index aedf053..1319916 100644
--- a/arch/arm/mach-pxa/poodle.c
+++ b/arch/arm/mach-pxa/poodle.c
@@ -29,6 +29,7 @@
 #include <linux/spi/ads7846.h>
 #include <linux/spi/pxa2xx_spi.h>
 #include <linux/mtd/sharpsl.h>
+#include <linux/memblock.h>
 
 #include <mach/hardware.h>
 #include <asm/mach-types.h>
@@ -456,13 +457,10 @@
 	poodle_init_spi();
 }
 
-static void __init fixup_poodle(struct tag *tags, char **cmdline,
-				struct meminfo *mi)
+static void __init fixup_poodle(struct tag *tags, char **cmdline)
 {
 	sharpsl_save_param();
-	mi->nr_banks=1;
-	mi->bank[0].start = 0xa0000000;
-	mi->bank[0].size = (32*1024*1024);
+	memblock_add(0xa0000000, SZ_32M);
 }
 
 MACHINE_START(POODLE, "SHARP Poodle")
diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c
index 0b11c1a..840c3a4 100644
--- a/arch/arm/mach-pxa/spitz.c
+++ b/arch/arm/mach-pxa/spitz.c
@@ -32,6 +32,7 @@
 #include <linux/io.h>
 #include <linux/module.h>
 #include <linux/reboot.h>
+#include <linux/memblock.h>
 
 #include <asm/setup.h>
 #include <asm/mach-types.h>
@@ -971,13 +972,10 @@
 	spitz_i2c_init();
 }
 
-static void __init spitz_fixup(struct tag *tags, char **cmdline,
-			       struct meminfo *mi)
+static void __init spitz_fixup(struct tag *tags, char **cmdline)
 {
 	sharpsl_save_param();
-	mi->nr_banks = 1;
-	mi->bank[0].start = 0xa0000000;
-	mi->bank[0].size = (64*1024*1024);
+	memblock_add(0xa0000000, SZ_64M);
 }
 
 #ifdef CONFIG_MACH_SPITZ
diff --git a/arch/arm/mach-pxa/tosa.c b/arch/arm/mach-pxa/tosa.c
index ef5557b..c158a6e 100644
--- a/arch/arm/mach-pxa/tosa.c
+++ b/arch/arm/mach-pxa/tosa.c
@@ -37,6 +37,7 @@
 #include <linux/i2c/pxa-i2c.h>
 #include <linux/usb/gpio_vbus.h>
 #include <linux/reboot.h>
+#include <linux/memblock.h>
 
 #include <asm/setup.h>
 #include <asm/mach-types.h>
@@ -960,13 +961,10 @@
 	platform_add_devices(devices, ARRAY_SIZE(devices));
 }
 
-static void __init fixup_tosa(struct tag *tags, char **cmdline,
-			      struct meminfo *mi)
+static void __init fixup_tosa(struct tag *tags, char **cmdline)
 {
 	sharpsl_save_param();
-	mi->nr_banks=1;
-	mi->bank[0].start = 0xa0000000;
-	mi->bank[0].size = (64*1024*1024);
+	memblock_add(0xa0000000, SZ_64M);
 }
 
 MACHINE_START(TOSA, "SHARP Tosa")
diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c
index 1d5ee5c..c2fae3a 100644
--- a/arch/arm/mach-realview/core.c
+++ b/arch/arm/mach-realview/core.c
@@ -31,6 +31,7 @@
 #include <linux/amba/mmci.h>
 #include <linux/gfp.h>
 #include <linux/mtd/physmap.h>
+#include <linux/memblock.h>
 
 #include <mach/hardware.h>
 #include <asm/irq.h>
@@ -370,19 +371,15 @@
 /*
  * Setup the memory banks.
  */
-void realview_fixup(struct tag *tags, char **from, struct meminfo *meminfo)
+void realview_fixup(struct tag *tags, char **from)
 {
 	/*
 	 * Most RealView platforms have 512MB contiguous RAM at 0x70000000.
 	 * Half of this is mirrored at 0.
 	 */
 #ifdef CONFIG_REALVIEW_HIGH_PHYS_OFFSET
-	meminfo->bank[0].start = 0x70000000;
-	meminfo->bank[0].size = SZ_512M;
-	meminfo->nr_banks = 1;
+	memblock_add(0x70000000, SZ_512M);
 #else
-	meminfo->bank[0].start = 0;
-	meminfo->bank[0].size = SZ_256M;
-	meminfo->nr_banks = 1;
+	memblock_add(0, SZ_256M);
 #endif
 }
diff --git a/arch/arm/mach-realview/core.h b/arch/arm/mach-realview/core.h
index 602ca5e..844946d 100644
--- a/arch/arm/mach-realview/core.h
+++ b/arch/arm/mach-realview/core.h
@@ -51,8 +51,7 @@
 extern int realview_eth_register(const char *name, struct resource *res);
 extern int realview_usb_register(struct resource *res);
 extern void realview_init_early(void);
-extern void realview_fixup(struct tag *tags, char **from,
-			   struct meminfo *meminfo);
+extern void realview_fixup(struct tag *tags, char **from);
 
 extern struct smp_operations realview_smp_ops;
 extern void realview_cpu_die(unsigned int cpu);
diff --git a/arch/arm/mach-realview/realview_eb.c b/arch/arm/mach-realview/realview_eb.c
index c85ddb2..b575895 100644
--- a/arch/arm/mach-realview/realview_eb.c
+++ b/arch/arm/mach-realview/realview_eb.c
@@ -442,8 +442,13 @@
 		realview_eb11mp_fixup();
 
 #ifdef CONFIG_CACHE_L2X0
-		/* 1MB (128KB/way), 8-way associativity, evmon/parity/share enabled
-		 * Bits:  .... ...0 0111 1001 0000 .... .... .... */
+		/*
+		 * The PL220 needs to be manually configured as the hardware
+		 * doesn't report the correct sizes.
+		 * 1MB (128KB/way), 8-way associativity, event monitor and
+		 * parity enabled, ignore share bit, no force write allocate
+		 * Bits:  .... ...0 0111 1001 0000 .... .... ....
+		 */
 		l2x0_init(__io_address(REALVIEW_EB11MP_L220_BASE), 0x00790000, 0xfe000fff);
 #endif
 		platform_device_register(&pmu_device);
diff --git a/arch/arm/mach-realview/realview_pb1176.c b/arch/arm/mach-realview/realview_pb1176.c
index c5eade7..aad9c5a 100644
--- a/arch/arm/mach-realview/realview_pb1176.c
+++ b/arch/arm/mach-realview/realview_pb1176.c
@@ -32,6 +32,7 @@
 #include <linux/irqchip/arm-gic.h>
 #include <linux/platform_data/clk-realview.h>
 #include <linux/reboot.h>
+#include <linux/memblock.h>
 
 #include <mach/hardware.h>
 #include <asm/irq.h>
@@ -339,15 +340,12 @@
 	dsb();
 }
 
-static void realview_pb1176_fixup(struct tag *tags, char **from,
-				  struct meminfo *meminfo)
+static void realview_pb1176_fixup(struct tag *tags, char **from)
 {
 	/*
 	 * RealView PB1176 only has 128MB of RAM mapped at 0.
 	 */
-	meminfo->bank[0].start = 0;
-	meminfo->bank[0].size = SZ_128M;
-	meminfo->nr_banks = 1;
+	memblock_add(0, SZ_128M);
 }
 
 static void __init realview_pb1176_init(void)
@@ -355,7 +353,13 @@
 	int i;
 
 #ifdef CONFIG_CACHE_L2X0
-	/* 128Kb (16Kb/way) 8-way associativity. evmon/parity/share enabled. */
+	/*
+	 * The PL220 needs to be manually configured as the hardware
+	 * doesn't report the correct sizes.
+	 * 128kB (16kB/way), 8-way associativity, event monitor and
+	 * parity enabled, ignore share bit, no force write allocate
+	 * Bits:  .... ...0 0111 0011 0000 .... .... ....
+	 */
 	l2x0_init(__io_address(REALVIEW_PB1176_L220_BASE), 0x00730000, 0xfe000fff);
 #endif
 
diff --git a/arch/arm/mach-realview/realview_pb11mp.c b/arch/arm/mach-realview/realview_pb11mp.c
index f4b0962..101deaf 100644
--- a/arch/arm/mach-realview/realview_pb11mp.c
+++ b/arch/arm/mach-realview/realview_pb11mp.c
@@ -337,8 +337,13 @@
 	int i;
 
 #ifdef CONFIG_CACHE_L2X0
-	/* 1MB (128KB/way), 8-way associativity, evmon/parity/share enabled
-	 * Bits:  .... ...0 0111 1001 0000 .... .... .... */
+	/*
+	 * The PL220 needs to be manually configured as the hardware
+	 * doesn't report the correct sizes.
+	 * 1MB (128KB/way), 8-way associativity, event monitor and
+	 * parity enabled, ignore share bit, no force write allocate
+	 * Bits:  .... ...0 0111 1001 0000 .... .... ....
+	 */
 	l2x0_init(__io_address(REALVIEW_TC11MP_L220_BASE), 0x00790000, 0xfe000fff);
 #endif
 
diff --git a/arch/arm/mach-realview/realview_pbx.c b/arch/arm/mach-realview/realview_pbx.c
index 9d75493..535697a 100644
--- a/arch/arm/mach-realview/realview_pbx.c
+++ b/arch/arm/mach-realview/realview_pbx.c
@@ -29,6 +29,7 @@
 #include <linux/irqchip/arm-gic.h>
 #include <linux/platform_data/clk-realview.h>
 #include <linux/reboot.h>
+#include <linux/memblock.h>
 
 #include <asm/irq.h>
 #include <asm/mach-types.h>
@@ -325,23 +326,19 @@
 	realview_pbx_twd_init();
 }
 
-static void realview_pbx_fixup(struct tag *tags, char **from,
-			       struct meminfo *meminfo)
+static void realview_pbx_fixup(struct tag *tags, char **from)
 {
 #ifdef CONFIG_SPARSEMEM
 	/*
 	 * Memory configuration with SPARSEMEM enabled on RealView PBX (see
 	 * asm/mach/memory.h for more information).
 	 */
-	meminfo->bank[0].start = 0;
-	meminfo->bank[0].size = SZ_256M;
-	meminfo->bank[1].start = 0x20000000;
-	meminfo->bank[1].size = SZ_512M;
-	meminfo->bank[2].start = 0x80000000;
-	meminfo->bank[2].size = SZ_256M;
-	meminfo->nr_banks = 3;
+
+	memblock_add(0, SZ_256M);
+	memblock_add(0x20000000, SZ_512M);
+	memblock_add(0x80000000, SZ_256M);
 #else
-	realview_fixup(tags, from, meminfo);
+	realview_fixup(tags, from);
 #endif
 }
 
@@ -370,8 +367,8 @@
 			__io_address(REALVIEW_PBX_TILE_L220_BASE);
 
 		/* set RAM latencies to 1 cycle for eASIC */
-		writel(0, l2x0_base + L2X0_TAG_LATENCY_CTRL);
-		writel(0, l2x0_base + L2X0_DATA_LATENCY_CTRL);
+		writel(0, l2x0_base + L310_TAG_LATENCY_CTRL);
+		writel(0, l2x0_base + L310_DATA_LATENCY_CTRL);
 
 		/* 16KB way size, 8-way associativity, parity disabled
 		 * Bits:  .. 0 0 0 0 1 00 1 0 1 001 0 000 0 .... .... .... */
diff --git a/arch/arm/mach-rockchip/rockchip.c b/arch/arm/mach-rockchip/rockchip.c
index d211d6f..138b997 100644
--- a/arch/arm/mach-rockchip/rockchip.c
+++ b/arch/arm/mach-rockchip/rockchip.c
@@ -24,12 +24,6 @@
 #include <asm/hardware/cache-l2x0.h>
 #include "core.h"
 
-static void __init rockchip_dt_init(void)
-{
-	l2x0_of_init(0, ~0UL);
-	of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
-}
-
 static const char * const rockchip_board_dt_compat[] = {
 	"rockchip,rk2928",
 	"rockchip,rk3066a",
@@ -39,7 +33,8 @@
 };
 
 DT_MACHINE_START(ROCKCHIP_DT, "Rockchip Cortex-A9 (Device Tree)")
+	.l2c_aux_val	= 0,
+	.l2c_aux_mask	= ~0,
 	.smp		= smp_ops(rockchip_smp_ops),
-	.init_machine	= rockchip_dt_init,
 	.dt_compat	= rockchip_board_dt_compat,
 MACHINE_END
diff --git a/arch/arm/mach-s3c24xx/mach-smdk2413.c b/arch/arm/mach-s3c24xx/mach-smdk2413.c
index 233fe52..a03c855 100644
--- a/arch/arm/mach-s3c24xx/mach-smdk2413.c
+++ b/arch/arm/mach-s3c24xx/mach-smdk2413.c
@@ -22,6 +22,7 @@
 #include <linux/serial_s3c.h>
 #include <linux/platform_device.h>
 #include <linux/io.h>
+#include <linux/memblock.h>
 
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
@@ -93,13 +94,10 @@
 	&s3c2412_device_dma,
 };
 
-static void __init smdk2413_fixup(struct tag *tags, char **cmdline,
-				  struct meminfo *mi)
+static void __init smdk2413_fixup(struct tag *tags, char **cmdline)
 {
 	if (tags != phys_to_virt(S3C2410_SDRAM_PA + 0x100)) {
-		mi->nr_banks=1;
-		mi->bank[0].start = 0x30000000;
-		mi->bank[0].size = SZ_64M;
+		memblock_add(0x30000000, SZ_64M);
 	}
 }
 
diff --git a/arch/arm/mach-s3c24xx/mach-vstms.c b/arch/arm/mach-s3c24xx/mach-vstms.c
index 40868c0..a79af78 100644
--- a/arch/arm/mach-s3c24xx/mach-vstms.c
+++ b/arch/arm/mach-s3c24xx/mach-vstms.c
@@ -23,6 +23,7 @@
 #include <linux/mtd/nand.h>
 #include <linux/mtd/nand_ecc.h>
 #include <linux/mtd/partitions.h>
+#include <linux/memblock.h>
 
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
@@ -129,13 +130,10 @@
 	&s3c2412_device_dma,
 };
 
-static void __init vstms_fixup(struct tag *tags, char **cmdline,
-			       struct meminfo *mi)
+static void __init vstms_fixup(struct tag *tags, char **cmdline)
 {
 	if (tags != phys_to_virt(S3C2410_SDRAM_PA + 0x100)) {
-		mi->nr_banks=1;
-		mi->bank[0].start = 0x30000000;
-		mi->bank[0].size = SZ_64M;
+		memblock_add(0x30000000, SZ_64M);
 	}
 }
 
diff --git a/arch/arm/mach-sa1100/assabet.c b/arch/arm/mach-sa1100/assabet.c
index 8443a27..7dd894e 100644
--- a/arch/arm/mach-sa1100/assabet.c
+++ b/arch/arm/mach-sa1100/assabet.c
@@ -531,7 +531,7 @@
 }
 
 static void __init
-fixup_assabet(struct tag *tags, char **cmdline, struct meminfo *mi)
+fixup_assabet(struct tag *tags, char **cmdline)
 {
 	/* This must be done before any call to machine_has_neponset() */
 	map_sa1100_gpio_regs();
diff --git a/arch/arm/mach-shmobile/board-armadillo800eva-reference.c b/arch/arm/mach-shmobile/board-armadillo800eva-reference.c
index 57d1a78..39e11f4 100644
--- a/arch/arm/mach-shmobile/board-armadillo800eva-reference.c
+++ b/arch/arm/mach-shmobile/board-armadillo800eva-reference.c
@@ -164,8 +164,8 @@
 	r8a7740_meram_workaround();
 
 #ifdef CONFIG_CACHE_L2X0
-	/* Early BRESP enable, Shared attribute override enable, 32K*8way */
-	l2x0_init(IOMEM(0xf0002000), 0x40440000, 0x82000fff);
+	/* Shared attribute override enable, 32K*8way */
+	l2x0_init(IOMEM(0xf0002000), 0x00400000, 0xc20f0fff);
 #endif
 
 	r8a7740_add_standard_devices_dt();
diff --git a/arch/arm/mach-shmobile/board-armadillo800eva.c b/arch/arm/mach-shmobile/board-armadillo800eva.c
index 486063d..f8e25fd 100644
--- a/arch/arm/mach-shmobile/board-armadillo800eva.c
+++ b/arch/arm/mach-shmobile/board-armadillo800eva.c
@@ -1271,8 +1271,8 @@
 
 
 #ifdef CONFIG_CACHE_L2X0
-	/* Early BRESP enable, Shared attribute override enable, 32K*8way */
-	l2x0_init(IOMEM(0xf0002000), 0x40440000, 0x82000fff);
+	/* Shared attribute override enable, 32K*8way */
+	l2x0_init(IOMEM(0xf0002000), 0x00400000, 0xc20f0fff);
 #endif
 
 	i2c_register_board_info(0, i2c0_devices, ARRAY_SIZE(i2c0_devices));
diff --git a/arch/arm/mach-shmobile/board-kzm9g-reference.c b/arch/arm/mach-shmobile/board-kzm9g-reference.c
index 598e324..a735a1d 100644
--- a/arch/arm/mach-shmobile/board-kzm9g-reference.c
+++ b/arch/arm/mach-shmobile/board-kzm9g-reference.c
@@ -36,8 +36,8 @@
 	sh73a0_add_standard_devices_dt();
 
 #ifdef CONFIG_CACHE_L2X0
-	/* Early BRESP enable, Shared attribute override enable, 64K*8way */
-	l2x0_init(IOMEM(0xf0100000), 0x40460000, 0x82000fff);
+	/* Shared attribute override enable, 64K*8way */
+	l2x0_init(IOMEM(0xf0100000), 0x00400000, 0xc20f0fff);
 #endif
 }
 
diff --git a/arch/arm/mach-shmobile/board-kzm9g.c b/arch/arm/mach-shmobile/board-kzm9g.c
index 03dc3ac..f94ec8c 100644
--- a/arch/arm/mach-shmobile/board-kzm9g.c
+++ b/arch/arm/mach-shmobile/board-kzm9g.c
@@ -876,8 +876,8 @@
 	gpio_request_one(223, GPIOF_IN, NULL); /* IRQ8 */
 
 #ifdef CONFIG_CACHE_L2X0
-	/* Early BRESP enable, Shared attribute override enable, 64K*8way */
-	l2x0_init(IOMEM(0xf0100000), 0x40460000, 0x82000fff);
+	/* Shared attribute override enable, 64K*8way */
+	l2x0_init(IOMEM(0xf0100000), 0x00400000, 0xc20f0fff);
 #endif
 
 	i2c_register_board_info(0, i2c0_devices, ARRAY_SIZE(i2c0_devices));
diff --git a/arch/arm/mach-shmobile/setup-r8a7778.c b/arch/arm/mach-shmobile/setup-r8a7778.c
index 6d69452..6dd7ddf 100644
--- a/arch/arm/mach-shmobile/setup-r8a7778.c
+++ b/arch/arm/mach-shmobile/setup-r8a7778.c
@@ -298,10 +298,10 @@
 	void __iomem *base = ioremap_nocache(0xf0100000, 0x1000);
 	if (base) {
 		/*
-		 * Early BRESP enable, Shared attribute override enable, 64K*16way
+		 * Shared attribute override enable, 64K*16way
 		 * don't call iounmap(base)
 		 */
-		l2x0_init(base, 0x40470000, 0x82000fff);
+		l2x0_init(base, 0x00400000, 0xc20f0fff);
 	}
 #endif
 
diff --git a/arch/arm/mach-shmobile/setup-r8a7779.c b/arch/arm/mach-shmobile/setup-r8a7779.c
index 8e860b3..a6630fc 100644
--- a/arch/arm/mach-shmobile/setup-r8a7779.c
+++ b/arch/arm/mach-shmobile/setup-r8a7779.c
@@ -700,8 +700,8 @@
 void __init r8a7779_add_standard_devices(void)
 {
 #ifdef CONFIG_CACHE_L2X0
-	/* Early BRESP enable, Shared attribute override enable, 64K*16way */
-	l2x0_init(IOMEM(0xf0100000), 0x40470000, 0x82000fff);
+	/* Shared attribute override enable, 64K*16way */
+	l2x0_init(IOMEM(0xf0100000), 0x00400000, 0xc20f0fff);
 #endif
 	r8a7779_pm_init();
 
diff --git a/arch/arm/mach-socfpga/socfpga.c b/arch/arm/mach-socfpga/socfpga.c
index d86231e..adbf383 100644
--- a/arch/arm/mach-socfpga/socfpga.c
+++ b/arch/arm/mach-socfpga/socfpga.c
@@ -98,22 +98,17 @@
 	writel(temp, rst_manager_base_addr + SOCFPGA_RSTMGR_CTRL);
 }
 
-static void __init socfpga_cyclone5_init(void)
-{
-	l2x0_of_init(0, ~0UL);
-	of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
-}
-
 static const char *altera_dt_match[] = {
 	"altr,socfpga",
 	NULL
 };
 
 DT_MACHINE_START(SOCFPGA, "Altera SOCFPGA")
+	.l2c_aux_val	= 0,
+	.l2c_aux_mask	= ~0,
 	.smp		= smp_ops(socfpga_smp_ops),
 	.map_io		= socfpga_map_io,
 	.init_irq	= socfpga_init_irq,
-	.init_machine	= socfpga_cyclone5_init,
 	.restart	= socfpga_cyclone5_restart,
 	.dt_compat	= altera_dt_match,
 MACHINE_END
diff --git a/arch/arm/mach-spear/platsmp.c b/arch/arm/mach-spear/platsmp.c
index c19751f..fd42977 100644
--- a/arch/arm/mach-spear/platsmp.c
+++ b/arch/arm/mach-spear/platsmp.c
@@ -20,6 +20,18 @@
 #include <mach/spear.h>
 #include "generic.h"
 
+/*
+ * Write pen_release in a way that is guaranteed to be visible to all
+ * observers, irrespective of whether they're taking part in coherency
+ * or not.  This is necessary for the hotplug code to work reliably.
+ */
+static void write_pen_release(int val)
+{
+	pen_release = val;
+	smp_wmb();
+	sync_cache_w(&pen_release);
+}
+
 static DEFINE_SPINLOCK(boot_lock);
 
 static void __iomem *scu_base = IOMEM(VA_SCU_BASE);
@@ -30,8 +42,7 @@
 	 * let the primary processor know we're out of the
 	 * pen, then head off into the C entry point
 	 */
-	pen_release = -1;
-	smp_wmb();
+	write_pen_release(-1);
 
 	/*
 	 * Synchronise with the boot thread.
@@ -58,9 +69,7 @@
 	 * Note that "pen_release" is the hardware CPU ID, whereas
 	 * "cpu" is Linux's internal ID.
 	 */
-	pen_release = cpu;
-	flush_cache_all();
-	outer_flush_all();
+	write_pen_release(cpu);
 
 	timeout = jiffies + (1 * HZ);
 	while (time_before(jiffies, timeout)) {
diff --git a/arch/arm/mach-spear/spear13xx.c b/arch/arm/mach-spear/spear13xx.c
index 7aa6e8c..c9897ea 100644
--- a/arch/arm/mach-spear/spear13xx.c
+++ b/arch/arm/mach-spear/spear13xx.c
@@ -38,15 +38,15 @@
 	if (!IS_ENABLED(CONFIG_CACHE_L2X0))
 		return;
 
-	writel_relaxed(0x06, VA_L2CC_BASE + L2X0_PREFETCH_CTRL);
+	writel_relaxed(0x06, VA_L2CC_BASE + L310_PREFETCH_CTRL);
 
 	/*
 	 * Program following latencies in order to make
 	 * SPEAr1340 work at 600 MHz
 	 */
-	writel_relaxed(0x221, VA_L2CC_BASE + L2X0_TAG_LATENCY_CTRL);
-	writel_relaxed(0x441, VA_L2CC_BASE + L2X0_DATA_LATENCY_CTRL);
-	l2x0_init(VA_L2CC_BASE, 0x70A60001, 0xfe00ffff);
+	writel_relaxed(0x221, VA_L2CC_BASE + L310_TAG_LATENCY_CTRL);
+	writel_relaxed(0x441, VA_L2CC_BASE + L310_DATA_LATENCY_CTRL);
+	l2x0_init(VA_L2CC_BASE, 0x30a00001, 0xfe0fffff);
 }
 
 /*
diff --git a/arch/arm/mach-sti/board-dt.c b/arch/arm/mach-sti/board-dt.c
index 1217fb5..910e978 100644
--- a/arch/arm/mach-sti/board-dt.c
+++ b/arch/arm/mach-sti/board-dt.c
@@ -14,25 +14,6 @@
 
 #include "smp.h"
 
-void __init stih41x_l2x0_init(void)
-{
-	u32 way_size = 0x4;
-	u32 aux_ctrl;
-	/* may be this can be encoded in macros like BIT*() */
-	aux_ctrl = (0x1 << L2X0_AUX_CTRL_SHARE_OVERRIDE_SHIFT) |
-		(0x1 << L2X0_AUX_CTRL_DATA_PREFETCH_SHIFT) |
-		(0x1 << L2X0_AUX_CTRL_INSTR_PREFETCH_SHIFT) |
-		(way_size << L2X0_AUX_CTRL_WAY_SIZE_SHIFT);
-
-	l2x0_of_init(aux_ctrl, L2X0_AUX_CTRL_MASK);
-}
-
-static void __init stih41x_machine_init(void)
-{
-	stih41x_l2x0_init();
-	of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
-}
-
 static const char *stih41x_dt_match[] __initdata = {
 	"st,stih415",
 	"st,stih416",
@@ -40,7 +21,11 @@
 };
 
 DT_MACHINE_START(STM, "STiH415/416 SoC with Flattened Device Tree")
-	.init_machine	= stih41x_machine_init,
-	.smp		= smp_ops(sti_smp_ops),
 	.dt_compat	= stih41x_dt_match,
+	.l2c_aux_val	= L2C_AUX_CTRL_SHARED_OVERRIDE |
+			  L310_AUX_CTRL_DATA_PREFETCH |
+			  L310_AUX_CTRL_INSTR_PREFETCH |
+			  L2C_AUX_CTRL_WAY_SIZE(4),
+	.l2c_aux_mask	= 0xc0000fff,
+	.smp		= smp_ops(sti_smp_ops),
 MACHINE_END
diff --git a/arch/arm/mach-tegra/pm.h b/arch/arm/mach-tegra/pm.h
index 6e92a7c..f4a8969 100644
--- a/arch/arm/mach-tegra/pm.h
+++ b/arch/arm/mach-tegra/pm.h
@@ -35,8 +35,6 @@
 void tegra30_lp1_iram_hook(void);
 void tegra30_sleep_core_init(void);
 
-extern unsigned long l2x0_saved_regs_addr;
-
 void tegra_clear_cpu_in_lp2(void);
 bool tegra_set_cpu_in_lp2(void);
 
diff --git a/arch/arm/mach-tegra/reset-handler.S b/arch/arm/mach-tegra/reset-handler.S
index 8c1ba4f..578d4d1a 100644
--- a/arch/arm/mach-tegra/reset-handler.S
+++ b/arch/arm/mach-tegra/reset-handler.S
@@ -19,7 +19,6 @@
 
 #include <asm/cache.h>
 #include <asm/asm-offsets.h>
-#include <asm/hardware/cache-l2x0.h>
 
 #include "flowctrl.h"
 #include "fuse.h"
@@ -78,8 +77,10 @@
 	str	r1, [r0]
 #endif
 
+#ifdef CONFIG_CACHE_L2X0
 	/* L2 cache resume & re-enable */
-	l2_cache_resume r0, r1, r2, l2x0_saved_regs_addr
+	bl	l2c310_early_resume
+#endif
 end_ca9_scu_l2_resume:
 	mov32	r9, 0xc0f
 	cmp	r8, r9
@@ -89,12 +90,6 @@
 ENDPROC(tegra_resume)
 #endif
 
-#ifdef CONFIG_CACHE_L2X0
-	.globl	l2x0_saved_regs_addr
-l2x0_saved_regs_addr:
-	.long	0
-#endif
-
 	.align L1_CACHE_SHIFT
 ENTRY(__tegra_cpu_reset_handler_start)
 
diff --git a/arch/arm/mach-tegra/sleep.h b/arch/arm/mach-tegra/sleep.h
index a4edbb3..339fe42 100644
--- a/arch/arm/mach-tegra/sleep.h
+++ b/arch/arm/mach-tegra/sleep.h
@@ -120,37 +120,6 @@
 	mov	\tmp1, \tmp1, lsr #8
 .endm
 
-/* Macro to resume & re-enable L2 cache */
-#ifndef L2X0_CTRL_EN
-#define L2X0_CTRL_EN	1
-#endif
-
-#ifdef CONFIG_CACHE_L2X0
-.macro l2_cache_resume, tmp1, tmp2, tmp3, phys_l2x0_saved_regs
-	W(adr)	\tmp1, \phys_l2x0_saved_regs
-	ldr	\tmp1, [\tmp1]
-	ldr	\tmp2, [\tmp1, #L2X0_R_PHY_BASE]
-	ldr	\tmp3, [\tmp2, #L2X0_CTRL]
-	tst	\tmp3, #L2X0_CTRL_EN
-	bne	exit_l2_resume
-	ldr	\tmp3, [\tmp1, #L2X0_R_TAG_LATENCY]
-	str	\tmp3, [\tmp2, #L2X0_TAG_LATENCY_CTRL]
-	ldr	\tmp3, [\tmp1, #L2X0_R_DATA_LATENCY]
-	str	\tmp3, [\tmp2, #L2X0_DATA_LATENCY_CTRL]
-	ldr	\tmp3, [\tmp1, #L2X0_R_PREFETCH_CTRL]
-	str	\tmp3, [\tmp2, #L2X0_PREFETCH_CTRL]
-	ldr	\tmp3, [\tmp1, #L2X0_R_PWR_CTRL]
-	str	\tmp3, [\tmp2, #L2X0_POWER_CTRL]
-	ldr	\tmp3, [\tmp1, #L2X0_R_AUX_CTRL]
-	str	\tmp3, [\tmp2, #L2X0_AUX_CTRL]
-	mov	\tmp3, #L2X0_CTRL_EN
-	str	\tmp3, [\tmp2, #L2X0_CTRL]
-exit_l2_resume:
-.endm
-#else /* CONFIG_CACHE_L2X0 */
-.macro l2_cache_resume, tmp1, tmp2, tmp3, phys_l2x0_saved_regs
-.endm
-#endif /* CONFIG_CACHE_L2X0 */
 #else
 void tegra_pen_lock(void);
 void tegra_pen_unlock(void);
diff --git a/arch/arm/mach-tegra/tegra.c b/arch/arm/mach-tegra/tegra.c
index 6191603..15ac9fc 100644
--- a/arch/arm/mach-tegra/tegra.c
+++ b/arch/arm/mach-tegra/tegra.c
@@ -70,40 +70,12 @@
 	0,
 };
 
-static void __init tegra_init_cache(void)
-{
-#ifdef CONFIG_CACHE_L2X0
-	static const struct of_device_id pl310_ids[] __initconst = {
-		{ .compatible = "arm,pl310-cache",  },
-		{}
-	};
-
-	struct device_node *np;
-	int ret;
-	void __iomem *p = IO_ADDRESS(TEGRA_ARM_PERIF_BASE) + 0x3000;
-	u32 aux_ctrl, cache_type;
-
-	np = of_find_matching_node(NULL, pl310_ids);
-	if (!np)
-		return;
-
-	cache_type = readl(p + L2X0_CACHE_TYPE);
-	aux_ctrl = (cache_type & 0x700) << (17-8);
-	aux_ctrl |= 0x7C400001;
-
-	ret = l2x0_of_init(aux_ctrl, 0x8200c3fe);
-	if (!ret)
-		l2x0_saved_regs_addr = virt_to_phys(&l2x0_saved_regs);
-#endif
-}
-
 static void __init tegra_init_early(void)
 {
 	of_register_trusted_foundations();
 	tegra_apb_io_init();
 	tegra_init_fuse();
 	tegra_cpu_reset_handler_init();
-	tegra_init_cache();
 	tegra_powergate_init();
 	tegra_hotplug_init();
 }
@@ -191,8 +163,10 @@
 };
 
 DT_MACHINE_START(TEGRA_DT, "NVIDIA Tegra SoC (Flattened Device Tree)")
-	.map_io		= tegra_map_common_io,
+	.l2c_aux_val	= 0x3c400001,
+	.l2c_aux_mask	= 0xc20fc3fe,
 	.smp		= smp_ops(tegra_smp_ops),
+	.map_io		= tegra_map_common_io,
 	.init_early	= tegra_init_early,
 	.init_irq	= tegra_dt_init_irq,
 	.init_machine	= tegra_dt_init,
diff --git a/arch/arm/mach-ux500/cache-l2x0.c b/arch/arm/mach-ux500/cache-l2x0.c
index 264f894..842ebed 100644
--- a/arch/arm/mach-ux500/cache-l2x0.c
+++ b/arch/arm/mach-ux500/cache-l2x0.c
@@ -35,10 +35,16 @@
 	return 0;
 }
 
+static void ux500_l2c310_write_sec(unsigned long val, unsigned reg)
+{
+	/*
+	 * We can't write to secure registers as we are in non-secure
+	 * mode, until we have some SMI service available.
+	 */
+}
+
 static int __init ux500_l2x0_init(void)
 {
-	u32 aux_val = 0x3e000000;
-
 	if (cpu_is_u8500_family() || cpu_is_ux540_family())
 		l2x0_base = __io_address(U8500_L2CC_BASE);
 	else
@@ -48,28 +54,12 @@
 	/* Unlock before init */
 	ux500_l2x0_unlock();
 
-	/* DBx540's L2 has 128KB way size */
-	if (cpu_is_ux540_family())
-		/* 128KB way size */
-		aux_val |= (0x4 << L2X0_AUX_CTRL_WAY_SIZE_SHIFT);
-	else
-		/* 64KB way size */
-		aux_val |= (0x3 << L2X0_AUX_CTRL_WAY_SIZE_SHIFT);
+	outer_cache.write_sec = ux500_l2c310_write_sec;
 
-	/* 64KB way size, 8 way associativity, force WA */
 	if (of_have_populated_dt())
-		l2x0_of_init(aux_val, 0xc0000fff);
+		l2x0_of_init(0, ~0);
 	else
-		l2x0_init(l2x0_base, aux_val, 0xc0000fff);
-
-	/*
-	 * We can't disable l2 as we are in non secure mode, currently
-	 * this seems be called only during kexec path. So let's
-	 * override outer.disable with nasty assignment until we have
-	 * some SMI service available.
-	 */
-	outer_cache.disable = NULL;
-	outer_cache.set_debug = NULL;
+		l2x0_init(l2x0_base, 0, ~0);
 
 	return 0;
 }
diff --git a/arch/arm/mach-vexpress/ct-ca9x4.c b/arch/arm/mach-vexpress/ct-ca9x4.c
index 6f34497..204038e 100644
--- a/arch/arm/mach-vexpress/ct-ca9x4.c
+++ b/arch/arm/mach-vexpress/ct-ca9x4.c
@@ -45,6 +45,23 @@
 	iotable_init(ct_ca9x4_io_desc, ARRAY_SIZE(ct_ca9x4_io_desc));
 }
 
+static void __init ca9x4_l2_init(void)
+{
+#ifdef CONFIG_CACHE_L2X0
+	void __iomem *l2x0_base = ioremap(CT_CA9X4_L2CC, SZ_4K);
+
+	if (l2x0_base) {
+		/* set RAM latencies to 1 cycle for this core tile. */
+		writel(0, l2x0_base + L310_TAG_LATENCY_CTRL);
+		writel(0, l2x0_base + L310_DATA_LATENCY_CTRL);
+
+		l2x0_init(l2x0_base, 0x00400000, 0xfe0fffff);
+	} else {
+		pr_err("L2C: unable to map L2 cache controller\n");
+	}
+#endif
+}
+
 #ifdef CONFIG_HAVE_ARM_TWD
 static DEFINE_TWD_LOCAL_TIMER(twd_local_timer, A9_MPCORE_TWD, IRQ_LOCALTIMER);
 
@@ -63,6 +80,7 @@
 	gic_init(0, 29, ioremap(A9_MPCORE_GIC_DIST, SZ_4K),
 		 ioremap(A9_MPCORE_GIC_CPU, SZ_256));
 	ca9x4_twd_init();
+	ca9x4_l2_init();
 }
 
 static int ct_ca9x4_clcd_setup(struct clcd_fb *fb)
@@ -141,16 +159,6 @@
 {
 	int i;
 
-#ifdef CONFIG_CACHE_L2X0
-	void __iomem *l2x0_base = ioremap(CT_CA9X4_L2CC, SZ_4K);
-
-	/* set RAM latencies to 1 cycle for this core tile. */
-	writel(0, l2x0_base + L2X0_TAG_LATENCY_CTRL);
-	writel(0, l2x0_base + L2X0_DATA_LATENCY_CTRL);
-
-	l2x0_init(l2x0_base, 0x00400000, 0xfe0fffff);
-#endif
-
 	for (i = 0; i < ARRAY_SIZE(ct_ca9x4_amba_devs); i++)
 		amba_device_register(ct_ca9x4_amba_devs[i], &iomem_resource);
 
diff --git a/arch/arm/mach-vexpress/tc2_pm.c b/arch/arm/mach-vexpress/tc2_pm.c
index 29e7785..b743a0a 100644
--- a/arch/arm/mach-vexpress/tc2_pm.c
+++ b/arch/arm/mach-vexpress/tc2_pm.c
@@ -209,7 +209,7 @@
 #define POLL_MSEC 10
 #define TIMEOUT_MSEC 1000
 
-static int tc2_pm_power_down_finish(unsigned int cpu, unsigned int cluster)
+static int tc2_pm_wait_for_powerdown(unsigned int cpu, unsigned int cluster)
 {
 	unsigned tries;
 
@@ -290,7 +290,7 @@
 static const struct mcpm_platform_ops tc2_pm_power_ops = {
 	.power_up		= tc2_pm_power_up,
 	.power_down		= tc2_pm_power_down,
-	.power_down_finish	= tc2_pm_power_down_finish,
+	.wait_for_powerdown	= tc2_pm_wait_for_powerdown,
 	.suspend		= tc2_pm_suspend,
 	.powered_up		= tc2_pm_powered_up,
 };
diff --git a/arch/arm/mach-vexpress/v2m.c b/arch/arm/mach-vexpress/v2m.c
index 4f8b8cb..b2fea70 100644
--- a/arch/arm/mach-vexpress/v2m.c
+++ b/arch/arm/mach-vexpress/v2m.c
@@ -432,7 +432,6 @@
 
 static void __init v2m_dt_init(void)
 {
-	l2x0_of_init(0x00400000, 0xfe0fffff);
 	of_platform_populate(NULL, v2m_dt_bus_match, NULL, NULL);
 }
 
@@ -443,6 +442,8 @@
 
 DT_MACHINE_START(VEXPRESS_DT, "ARM-Versatile Express")
 	.dt_compat	= v2m_dt_match,
+	.l2c_aux_val	= 0x00400000,
+	.l2c_aux_mask	= 0xfe0fffff,
 	.smp		= smp_ops(vexpress_smp_ops),
 	.smp_init	= smp_init_ops(vexpress_smp_init_ops),
 	.map_io		= v2m_dt_map_io,
diff --git a/arch/arm/mach-zynq/common.c b/arch/arm/mach-zynq/common.c
index 6fcc584..d1e992e 100644
--- a/arch/arm/mach-zynq/common.c
+++ b/arch/arm/mach-zynq/common.c
@@ -67,11 +67,6 @@
 {
 	struct platform_device_info devinfo = { .name = "cpufreq-cpu0", };
 
-	/*
-	 * 64KB way size, 8-way associativity, parity disabled
-	 */
-	l2x0_of_init(0x02060000, 0xF0F0FFFF);
-
 	of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
 
 	platform_device_register(&zynq_cpuidle_device);
@@ -133,6 +128,9 @@
 };
 
 DT_MACHINE_START(XILINX_EP107, "Xilinx Zynq Platform")
+	/* 64KB way size, 8-way associativity, parity disabled */
+	.l2c_aux_val	= 0x02000000,
+	.l2c_aux_mask	= 0xf0ffffff,
 	.smp		= smp_ops(zynq_smp_ops),
 	.map_io		= zynq_map_io,
 	.init_irq	= zynq_irq_init,
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 5bf7c3c..eda0dd0 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -897,6 +897,57 @@
 	  This option enables optimisations for the PL310 cache
 	  controller.
 
+config PL310_ERRATA_588369
+	bool "PL310 errata: Clean & Invalidate maintenance operations do not invalidate clean lines"
+	depends on CACHE_L2X0
+	help
+	   The PL310 L2 cache controller implements three types of Clean &
+	   Invalidate maintenance operations: by Physical Address
+	   (offset 0x7F0), by Index/Way (0x7F8) and by Way (0x7FC).
+	   They are architecturally defined to behave as the execution of a
+	   clean operation followed immediately by an invalidate operation,
+	   both performing to the same memory location. This functionality
+	   is not correctly implemented in PL310 as clean lines are not
+	   invalidated as a result of these operations.
+
+config PL310_ERRATA_727915
+	bool "PL310 errata: Background Clean & Invalidate by Way operation can cause data corruption"
+	depends on CACHE_L2X0
+	help
+	  PL310 implements the Clean & Invalidate by Way L2 cache maintenance
+	  operation (offset 0x7FC). This operation runs in background so that
+	  PL310 can handle normal accesses while it is in progress. Under very
+	  rare circumstances, due to this erratum, write data can be lost when
+	  PL310 treats a cacheable write transaction during a Clean &
+	  Invalidate by Way operation.
+
+config PL310_ERRATA_753970
+	bool "PL310 errata: cache sync operation may be faulty"
+	depends on CACHE_PL310
+	help
+	  This option enables the workaround for the 753970 PL310 (r3p0) erratum.
+
+	  Under some condition the effect of cache sync operation on
+	  the store buffer still remains when the operation completes.
+	  This means that the store buffer is always asked to drain and
+	  this prevents it from merging any further writes. The workaround
+	  is to replace the normal offset of cache sync operation (0x730)
+	  by another offset targeting an unmapped PL310 register 0x740.
+	  This has the same effect as the cache sync operation: store buffer
+	  drain and waiting for all buffers empty.
+
+config PL310_ERRATA_769419
+	bool "PL310 errata: no automatic Store Buffer drain"
+	depends on CACHE_L2X0
+	help
+	  On revisions of the PL310 prior to r3p2, the Store Buffer does
+	  not automatically drain. This can cause normal, non-cacheable
+	  writes to be retained when the memory system is idle, leading
+	  to suboptimal I/O performance for drivers using coherent DMA.
+	  This option adds a write barrier to the cpu_idle loop so that,
+	  on systems with an outer cache, the store buffer is drained
+	  explicitly.
+
 config CACHE_TAUROS2
 	bool "Enable the Tauros2 L2 cache controller"
 	depends on (ARCH_DOVE || ARCH_MMP || CPU_PJ4)
diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile
index 7f39ce2..91da64d 100644
--- a/arch/arm/mm/Makefile
+++ b/arch/arm/mm/Makefile
@@ -95,7 +95,8 @@
 AFLAGS_proc-v6.o	:=-Wa,-march=armv6
 AFLAGS_proc-v7.o	:=-Wa,-march=armv7-a
 
+obj-$(CONFIG_OUTER_CACHE)	+= l2c-common.o
 obj-$(CONFIG_CACHE_FEROCEON_L2)	+= cache-feroceon-l2.o
-obj-$(CONFIG_CACHE_L2X0)	+= cache-l2x0.o
+obj-$(CONFIG_CACHE_L2X0)	+= cache-l2x0.o l2c-l2x0-resume.o
 obj-$(CONFIG_CACHE_XSC3L2)	+= cache-xsc3l2.o
 obj-$(CONFIG_CACHE_TAUROS2)	+= cache-tauros2.o
diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
index 9240364..b8cb1a2 100644
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -28,6 +28,7 @@
 #include <asm/opcodes.h>
 
 #include "fault.h"
+#include "mm.h"
 
 /*
  * 32-bit misaligned trap handler (c) 1998 San Mehat (CCC) -July 1998
@@ -81,6 +82,7 @@
 static unsigned long ai_dword;
 static unsigned long ai_multi;
 static int ai_usermode;
+static unsigned long cr_no_alignment;
 
 core_param(alignment, ai_usermode, int, 0600);
 
@@ -91,7 +93,7 @@
 /* Return true if and only if the ARMv6 unaligned access model is in use. */
 static bool cpu_is_v6_unaligned(void)
 {
-	return cpu_architecture() >= CPU_ARCH_ARMv6 && (cr_alignment & CR_U);
+	return cpu_architecture() >= CPU_ARCH_ARMv6 && get_cr() & CR_U;
 }
 
 static int safe_usermode(int new_usermode, bool warn)
@@ -949,6 +951,13 @@
 	return 0;
 }
 
+static int __init noalign_setup(char *__unused)
+{
+	set_cr(__clear_cr(CR_A));
+	return 1;
+}
+__setup("noalign", noalign_setup);
+
 /*
  * This needs to be done after sysctl_init, otherwise sys/ will be
  * overwritten.  Actually, this shouldn't be in sys/ at all since
@@ -966,14 +975,12 @@
 		return -ENOMEM;
 #endif
 
-#ifdef CONFIG_CPU_CP15
 	if (cpu_is_v6_unaligned()) {
-		cr_alignment &= ~CR_A;
-		cr_no_alignment &= ~CR_A;
-		set_cr(cr_alignment);
+		set_cr(__clear_cr(CR_A));
 		ai_usermode = safe_usermode(ai_usermode, false);
 	}
-#endif
+
+	cr_no_alignment = get_cr() & ~CR_A;
 
 	hook_fault_code(FAULT_CODE_ALIGNMENT, do_alignment, SIGBUS, BUS_ADRALN,
 			"alignment exception");
diff --git a/arch/arm/mm/cache-feroceon-l2.c b/arch/arm/mm/cache-feroceon-l2.c
index dc814a5..e028a7f 100644
--- a/arch/arm/mm/cache-feroceon-l2.c
+++ b/arch/arm/mm/cache-feroceon-l2.c
@@ -350,7 +350,6 @@
 	outer_cache.inv_range = feroceon_l2_inv_range;
 	outer_cache.clean_range = feroceon_l2_clean_range;
 	outer_cache.flush_range = feroceon_l2_flush_range;
-	outer_cache.inv_all = l2_inv_all;
 
 	enable_l2();
 
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index 7abde2c..efc5cab 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -16,18 +16,33 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
+#include <linux/cpu.h>
 #include <linux/err.h>
 #include <linux/init.h>
+#include <linux/smp.h>
 #include <linux/spinlock.h>
 #include <linux/io.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
 
 #include <asm/cacheflush.h>
+#include <asm/cp15.h>
+#include <asm/cputype.h>
 #include <asm/hardware/cache-l2x0.h>
 #include "cache-tauros3.h"
 #include "cache-aurora-l2.h"
 
+struct l2c_init_data {
+	const char *type;
+	unsigned way_size_0;
+	unsigned num_lock;
+	void (*of_parse)(const struct device_node *, u32 *, u32 *);
+	void (*enable)(void __iomem *, u32, unsigned);
+	void (*fixup)(void __iomem *, u32, struct outer_cache_fns *);
+	void (*save)(void __iomem *);
+	struct outer_cache_fns outer_cache;
+};
+
 #define CACHE_LINE_SIZE		32
 
 static void __iomem *l2x0_base;
@@ -36,34 +51,97 @@
 static u32 l2x0_size;
 static unsigned long sync_reg_offset = L2X0_CACHE_SYNC;
 
-/* Aurora don't have the cache ID register available, so we have to
- * pass it though the device tree */
-static u32  cache_id_part_number_from_dt;
-
 struct l2x0_regs l2x0_saved_regs;
 
-struct l2x0_of_data {
-	void (*setup)(const struct device_node *, u32 *, u32 *);
-	void (*save)(void);
-	struct outer_cache_fns outer_cache;
-};
-
-static bool of_init = false;
-
-static inline void cache_wait_way(void __iomem *reg, unsigned long mask)
+/*
+ * Common code for all cache controllers.
+ */
+static inline void l2c_wait_mask(void __iomem *reg, unsigned long mask)
 {
 	/* wait for cache operation by line or way to complete */
 	while (readl_relaxed(reg) & mask)
 		cpu_relax();
 }
 
+/*
+ * By default, we write directly to secure registers.  Platforms must
+ * override this if they are running non-secure.
+ */
+static void l2c_write_sec(unsigned long val, void __iomem *base, unsigned reg)
+{
+	if (val == readl_relaxed(base + reg))
+		return;
+	if (outer_cache.write_sec)
+		outer_cache.write_sec(val, reg);
+	else
+		writel_relaxed(val, base + reg);
+}
+
+/*
+ * This should only be called when we have a requirement that the
+ * register be written due to a work-around, as platforms running
+ * in non-secure mode may not be able to access this register.
+ */
+static inline void l2c_set_debug(void __iomem *base, unsigned long val)
+{
+	l2c_write_sec(val, base, L2X0_DEBUG_CTRL);
+}
+
+static void __l2c_op_way(void __iomem *reg)
+{
+	writel_relaxed(l2x0_way_mask, reg);
+	l2c_wait_mask(reg, l2x0_way_mask);
+}
+
+static inline void l2c_unlock(void __iomem *base, unsigned num)
+{
+	unsigned i;
+
+	for (i = 0; i < num; i++) {
+		writel_relaxed(0, base + L2X0_LOCKDOWN_WAY_D_BASE +
+			       i * L2X0_LOCKDOWN_STRIDE);
+		writel_relaxed(0, base + L2X0_LOCKDOWN_WAY_I_BASE +
+			       i * L2X0_LOCKDOWN_STRIDE);
+	}
+}
+
+/*
+ * Enable the L2 cache controller.  This function must only be
+ * called when the cache controller is known to be disabled.
+ */
+static void l2c_enable(void __iomem *base, u32 aux, unsigned num_lock)
+{
+	unsigned long flags;
+
+	l2c_write_sec(aux, base, L2X0_AUX_CTRL);
+
+	l2c_unlock(base, num_lock);
+
+	local_irq_save(flags);
+	__l2c_op_way(base + L2X0_INV_WAY);
+	writel_relaxed(0, base + sync_reg_offset);
+	l2c_wait_mask(base + sync_reg_offset, 1);
+	local_irq_restore(flags);
+
+	l2c_write_sec(L2X0_CTRL_EN, base, L2X0_CTRL);
+}
+
+static void l2c_disable(void)
+{
+	void __iomem *base = l2x0_base;
+
+	outer_cache.flush_all();
+	l2c_write_sec(0, base, L2X0_CTRL);
+	dsb(st);
+}
+
 #ifdef CONFIG_CACHE_PL310
 static inline void cache_wait(void __iomem *reg, unsigned long mask)
 {
 	/* cache operations by line are atomic on PL310 */
 }
 #else
-#define cache_wait	cache_wait_way
+#define cache_wait	l2c_wait_mask
 #endif
 
 static inline void cache_sync(void)
@@ -74,59 +152,16 @@
 	cache_wait(base + L2X0_CACHE_SYNC, 1);
 }
 
-static inline void l2x0_clean_line(unsigned long addr)
-{
-	void __iomem *base = l2x0_base;
-	cache_wait(base + L2X0_CLEAN_LINE_PA, 1);
-	writel_relaxed(addr, base + L2X0_CLEAN_LINE_PA);
-}
-
-static inline void l2x0_inv_line(unsigned long addr)
-{
-	void __iomem *base = l2x0_base;
-	cache_wait(base + L2X0_INV_LINE_PA, 1);
-	writel_relaxed(addr, base + L2X0_INV_LINE_PA);
-}
-
 #if defined(CONFIG_PL310_ERRATA_588369) || defined(CONFIG_PL310_ERRATA_727915)
 static inline void debug_writel(unsigned long val)
 {
-	if (outer_cache.set_debug)
-		outer_cache.set_debug(val);
-}
-
-static void pl310_set_debug(unsigned long val)
-{
-	writel_relaxed(val, l2x0_base + L2X0_DEBUG_CTRL);
+	l2c_set_debug(l2x0_base, val);
 }
 #else
 /* Optimised out for non-errata case */
 static inline void debug_writel(unsigned long val)
 {
 }
-
-#define pl310_set_debug	NULL
-#endif
-
-#ifdef CONFIG_PL310_ERRATA_588369
-static inline void l2x0_flush_line(unsigned long addr)
-{
-	void __iomem *base = l2x0_base;
-
-	/* Clean by PA followed by Invalidate by PA */
-	cache_wait(base + L2X0_CLEAN_LINE_PA, 1);
-	writel_relaxed(addr, base + L2X0_CLEAN_LINE_PA);
-	cache_wait(base + L2X0_INV_LINE_PA, 1);
-	writel_relaxed(addr, base + L2X0_INV_LINE_PA);
-}
-#else
-
-static inline void l2x0_flush_line(unsigned long addr)
-{
-	void __iomem *base = l2x0_base;
-	cache_wait(base + L2X0_CLEAN_INV_LINE_PA, 1);
-	writel_relaxed(addr, base + L2X0_CLEAN_INV_LINE_PA);
-}
 #endif
 
 static void l2x0_cache_sync(void)
@@ -141,8 +176,7 @@
 static void __l2x0_flush_all(void)
 {
 	debug_writel(0x03);
-	writel_relaxed(l2x0_way_mask, l2x0_base + L2X0_CLEAN_INV_WAY);
-	cache_wait_way(l2x0_base + L2X0_CLEAN_INV_WAY, l2x0_way_mask);
+	__l2c_op_way(l2x0_base + L2X0_CLEAN_INV_WAY);
 	cache_sync();
 	debug_writel(0x00);
 }
@@ -157,275 +191,883 @@
 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
 }
 
-static void l2x0_clean_all(void)
-{
-	unsigned long flags;
-
-	/* clean all ways */
-	raw_spin_lock_irqsave(&l2x0_lock, flags);
-	writel_relaxed(l2x0_way_mask, l2x0_base + L2X0_CLEAN_WAY);
-	cache_wait_way(l2x0_base + L2X0_CLEAN_WAY, l2x0_way_mask);
-	cache_sync();
-	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
-}
-
-static void l2x0_inv_all(void)
-{
-	unsigned long flags;
-
-	/* invalidate all ways */
-	raw_spin_lock_irqsave(&l2x0_lock, flags);
-	/* Invalidating when L2 is enabled is a nono */
-	BUG_ON(readl(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN);
-	writel_relaxed(l2x0_way_mask, l2x0_base + L2X0_INV_WAY);
-	cache_wait_way(l2x0_base + L2X0_INV_WAY, l2x0_way_mask);
-	cache_sync();
-	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
-}
-
-static void l2x0_inv_range(unsigned long start, unsigned long end)
-{
-	void __iomem *base = l2x0_base;
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&l2x0_lock, flags);
-	if (start & (CACHE_LINE_SIZE - 1)) {
-		start &= ~(CACHE_LINE_SIZE - 1);
-		debug_writel(0x03);
-		l2x0_flush_line(start);
-		debug_writel(0x00);
-		start += CACHE_LINE_SIZE;
-	}
-
-	if (end & (CACHE_LINE_SIZE - 1)) {
-		end &= ~(CACHE_LINE_SIZE - 1);
-		debug_writel(0x03);
-		l2x0_flush_line(end);
-		debug_writel(0x00);
-	}
-
-	while (start < end) {
-		unsigned long blk_end = start + min(end - start, 4096UL);
-
-		while (start < blk_end) {
-			l2x0_inv_line(start);
-			start += CACHE_LINE_SIZE;
-		}
-
-		if (blk_end < end) {
-			raw_spin_unlock_irqrestore(&l2x0_lock, flags);
-			raw_spin_lock_irqsave(&l2x0_lock, flags);
-		}
-	}
-	cache_wait(base + L2X0_INV_LINE_PA, 1);
-	cache_sync();
-	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
-}
-
-static void l2x0_clean_range(unsigned long start, unsigned long end)
-{
-	void __iomem *base = l2x0_base;
-	unsigned long flags;
-
-	if ((end - start) >= l2x0_size) {
-		l2x0_clean_all();
-		return;
-	}
-
-	raw_spin_lock_irqsave(&l2x0_lock, flags);
-	start &= ~(CACHE_LINE_SIZE - 1);
-	while (start < end) {
-		unsigned long blk_end = start + min(end - start, 4096UL);
-
-		while (start < blk_end) {
-			l2x0_clean_line(start);
-			start += CACHE_LINE_SIZE;
-		}
-
-		if (blk_end < end) {
-			raw_spin_unlock_irqrestore(&l2x0_lock, flags);
-			raw_spin_lock_irqsave(&l2x0_lock, flags);
-		}
-	}
-	cache_wait(base + L2X0_CLEAN_LINE_PA, 1);
-	cache_sync();
-	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
-}
-
-static void l2x0_flush_range(unsigned long start, unsigned long end)
-{
-	void __iomem *base = l2x0_base;
-	unsigned long flags;
-
-	if ((end - start) >= l2x0_size) {
-		l2x0_flush_all();
-		return;
-	}
-
-	raw_spin_lock_irqsave(&l2x0_lock, flags);
-	start &= ~(CACHE_LINE_SIZE - 1);
-	while (start < end) {
-		unsigned long blk_end = start + min(end - start, 4096UL);
-
-		debug_writel(0x03);
-		while (start < blk_end) {
-			l2x0_flush_line(start);
-			start += CACHE_LINE_SIZE;
-		}
-		debug_writel(0x00);
-
-		if (blk_end < end) {
-			raw_spin_unlock_irqrestore(&l2x0_lock, flags);
-			raw_spin_lock_irqsave(&l2x0_lock, flags);
-		}
-	}
-	cache_wait(base + L2X0_CLEAN_INV_LINE_PA, 1);
-	cache_sync();
-	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
-}
-
 static void l2x0_disable(void)
 {
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&l2x0_lock, flags);
 	__l2x0_flush_all();
-	writel_relaxed(0, l2x0_base + L2X0_CTRL);
+	l2c_write_sec(0, l2x0_base, L2X0_CTRL);
 	dsb(st);
 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
 }
 
-static void l2x0_unlock(u32 cache_id)
+static void l2c_save(void __iomem *base)
 {
-	int lockregs;
-	int i;
+	l2x0_saved_regs.aux_ctrl = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
+}
 
-	switch (cache_id & L2X0_CACHE_ID_PART_MASK) {
-	case L2X0_CACHE_ID_PART_L310:
-		lockregs = 8;
-		break;
-	case AURORA_CACHE_ID:
-		lockregs = 4;
-		break;
-	default:
-		/* L210 and unknown types */
-		lockregs = 1;
-		break;
-	}
+/*
+ * L2C-210 specific code.
+ *
+ * The L2C-2x0 PA, set/way and sync operations are atomic, but we must
+ * ensure that no background operation is running.  The way operations
+ * are all background tasks.
+ *
+ * While a background operation is in progress, any new operation is
+ * ignored (unspecified whether this causes an error.)  Thankfully, not
+ * used on SMP.
+ *
+ * Never has a different sync register other than L2X0_CACHE_SYNC, but
+ * we use sync_reg_offset here so we can share some of this with L2C-310.
+ */
+static void __l2c210_cache_sync(void __iomem *base)
+{
+	writel_relaxed(0, base + sync_reg_offset);
+}
 
-	for (i = 0; i < lockregs; i++) {
-		writel_relaxed(0x0, l2x0_base + L2X0_LOCKDOWN_WAY_D_BASE +
-			       i * L2X0_LOCKDOWN_STRIDE);
-		writel_relaxed(0x0, l2x0_base + L2X0_LOCKDOWN_WAY_I_BASE +
-			       i * L2X0_LOCKDOWN_STRIDE);
+static void __l2c210_op_pa_range(void __iomem *reg, unsigned long start,
+	unsigned long end)
+{
+	while (start < end) {
+		writel_relaxed(start, reg);
+		start += CACHE_LINE_SIZE;
 	}
 }
 
-void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
+static void l2c210_inv_range(unsigned long start, unsigned long end)
 {
-	u32 aux;
-	u32 cache_id;
-	u32 way_size = 0;
-	int ways;
-	int way_size_shift = L2X0_WAY_SIZE_SHIFT;
-	const char *type;
+	void __iomem *base = l2x0_base;
 
-	l2x0_base = base;
-	if (cache_id_part_number_from_dt)
-		cache_id = cache_id_part_number_from_dt;
-	else
-		cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID);
-	aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
+	if (start & (CACHE_LINE_SIZE - 1)) {
+		start &= ~(CACHE_LINE_SIZE - 1);
+		writel_relaxed(start, base + L2X0_CLEAN_INV_LINE_PA);
+		start += CACHE_LINE_SIZE;
+	}
 
+	if (end & (CACHE_LINE_SIZE - 1)) {
+		end &= ~(CACHE_LINE_SIZE - 1);
+		writel_relaxed(end, base + L2X0_CLEAN_INV_LINE_PA);
+	}
+
+	__l2c210_op_pa_range(base + L2X0_INV_LINE_PA, start, end);
+	__l2c210_cache_sync(base);
+}
+
+static void l2c210_clean_range(unsigned long start, unsigned long end)
+{
+	void __iomem *base = l2x0_base;
+
+	start &= ~(CACHE_LINE_SIZE - 1);
+	__l2c210_op_pa_range(base + L2X0_CLEAN_LINE_PA, start, end);
+	__l2c210_cache_sync(base);
+}
+
+static void l2c210_flush_range(unsigned long start, unsigned long end)
+{
+	void __iomem *base = l2x0_base;
+
+	start &= ~(CACHE_LINE_SIZE - 1);
+	__l2c210_op_pa_range(base + L2X0_CLEAN_INV_LINE_PA, start, end);
+	__l2c210_cache_sync(base);
+}
+
+static void l2c210_flush_all(void)
+{
+	void __iomem *base = l2x0_base;
+
+	BUG_ON(!irqs_disabled());
+
+	__l2c_op_way(base + L2X0_CLEAN_INV_WAY);
+	__l2c210_cache_sync(base);
+}
+
+static void l2c210_sync(void)
+{
+	__l2c210_cache_sync(l2x0_base);
+}
+
+static void l2c210_resume(void)
+{
+	void __iomem *base = l2x0_base;
+
+	if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN))
+		l2c_enable(base, l2x0_saved_regs.aux_ctrl, 1);
+}
+
+static const struct l2c_init_data l2c210_data __initconst = {
+	.type = "L2C-210",
+	.way_size_0 = SZ_8K,
+	.num_lock = 1,
+	.enable = l2c_enable,
+	.save = l2c_save,
+	.outer_cache = {
+		.inv_range = l2c210_inv_range,
+		.clean_range = l2c210_clean_range,
+		.flush_range = l2c210_flush_range,
+		.flush_all = l2c210_flush_all,
+		.disable = l2c_disable,
+		.sync = l2c210_sync,
+		.resume = l2c210_resume,
+	},
+};
+
+/*
+ * L2C-220 specific code.
+ *
+ * All operations are background operations: they have to be waited for.
+ * Conflicting requests generate a slave error (which will cause an
+ * imprecise abort.)  Never uses sync_reg_offset, so we hard-code the
+ * sync register here.
+ *
+ * However, we can re-use the l2c210_resume call.
+ */
+static inline void __l2c220_cache_sync(void __iomem *base)
+{
+	writel_relaxed(0, base + L2X0_CACHE_SYNC);
+	l2c_wait_mask(base + L2X0_CACHE_SYNC, 1);
+}
+
+static void l2c220_op_way(void __iomem *base, unsigned reg)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&l2x0_lock, flags);
+	__l2c_op_way(base + reg);
+	__l2c220_cache_sync(base);
+	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+}
+
+static unsigned long l2c220_op_pa_range(void __iomem *reg, unsigned long start,
+	unsigned long end, unsigned long flags)
+{
+	raw_spinlock_t *lock = &l2x0_lock;
+
+	while (start < end) {
+		unsigned long blk_end = start + min(end - start, 4096UL);
+
+		while (start < blk_end) {
+			l2c_wait_mask(reg, 1);
+			writel_relaxed(start, reg);
+			start += CACHE_LINE_SIZE;
+		}
+
+		if (blk_end < end) {
+			raw_spin_unlock_irqrestore(lock, flags);
+			raw_spin_lock_irqsave(lock, flags);
+		}
+	}
+
+	return flags;
+}
+
+static void l2c220_inv_range(unsigned long start, unsigned long end)
+{
+	void __iomem *base = l2x0_base;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&l2x0_lock, flags);
+	if ((start | end) & (CACHE_LINE_SIZE - 1)) {
+		if (start & (CACHE_LINE_SIZE - 1)) {
+			start &= ~(CACHE_LINE_SIZE - 1);
+			writel_relaxed(start, base + L2X0_CLEAN_INV_LINE_PA);
+			start += CACHE_LINE_SIZE;
+		}
+
+		if (end & (CACHE_LINE_SIZE - 1)) {
+			end &= ~(CACHE_LINE_SIZE - 1);
+			l2c_wait_mask(base + L2X0_CLEAN_INV_LINE_PA, 1);
+			writel_relaxed(end, base + L2X0_CLEAN_INV_LINE_PA);
+		}
+	}
+
+	flags = l2c220_op_pa_range(base + L2X0_INV_LINE_PA,
+				   start, end, flags);
+	l2c_wait_mask(base + L2X0_INV_LINE_PA, 1);
+	__l2c220_cache_sync(base);
+	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+}
+
+static void l2c220_clean_range(unsigned long start, unsigned long end)
+{
+	void __iomem *base = l2x0_base;
+	unsigned long flags;
+
+	start &= ~(CACHE_LINE_SIZE - 1);
+	if ((end - start) >= l2x0_size) {
+		l2c220_op_way(base, L2X0_CLEAN_WAY);
+		return;
+	}
+
+	raw_spin_lock_irqsave(&l2x0_lock, flags);
+	flags = l2c220_op_pa_range(base + L2X0_CLEAN_LINE_PA,
+				   start, end, flags);
+	l2c_wait_mask(base + L2X0_CLEAN_INV_LINE_PA, 1);
+	__l2c220_cache_sync(base);
+	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+}
+
+static void l2c220_flush_range(unsigned long start, unsigned long end)
+{
+	void __iomem *base = l2x0_base;
+	unsigned long flags;
+
+	start &= ~(CACHE_LINE_SIZE - 1);
+	if ((end - start) >= l2x0_size) {
+		l2c220_op_way(base, L2X0_CLEAN_INV_WAY);
+		return;
+	}
+
+	raw_spin_lock_irqsave(&l2x0_lock, flags);
+	flags = l2c220_op_pa_range(base + L2X0_CLEAN_INV_LINE_PA,
+				   start, end, flags);
+	l2c_wait_mask(base + L2X0_CLEAN_INV_LINE_PA, 1);
+	__l2c220_cache_sync(base);
+	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+}
+
+static void l2c220_flush_all(void)
+{
+	l2c220_op_way(l2x0_base, L2X0_CLEAN_INV_WAY);
+}
+
+static void l2c220_sync(void)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&l2x0_lock, flags);
+	__l2c220_cache_sync(l2x0_base);
+	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+}
+
+static void l2c220_enable(void __iomem *base, u32 aux, unsigned num_lock)
+{
+	/*
+	 * Always enable non-secure access to the lockdown registers -
+	 * we write to them as part of the L2C enable sequence so they
+	 * need to be accessible.
+	 */
+	aux |= L220_AUX_CTRL_NS_LOCKDOWN;
+
+	l2c_enable(base, aux, num_lock);
+}
+
+static const struct l2c_init_data l2c220_data = {
+	.type = "L2C-220",
+	.way_size_0 = SZ_8K,
+	.num_lock = 1,
+	.enable = l2c220_enable,
+	.save = l2c_save,
+	.outer_cache = {
+		.inv_range = l2c220_inv_range,
+		.clean_range = l2c220_clean_range,
+		.flush_range = l2c220_flush_range,
+		.flush_all = l2c220_flush_all,
+		.disable = l2c_disable,
+		.sync = l2c220_sync,
+		.resume = l2c210_resume,
+	},
+};
+
+/*
+ * L2C-310 specific code.
+ *
+ * Very similar to L2C-210, the PA, set/way and sync operations are atomic,
+ * and the way operations are all background tasks.  However, issuing an
+ * operation while a background operation is in progress results in a
+ * SLVERR response.  We can reuse:
+ *
+ *  __l2c210_cache_sync (using sync_reg_offset)
+ *  l2c210_sync
+ *  l2c210_inv_range (if 588369 is not applicable)
+ *  l2c210_clean_range
+ *  l2c210_flush_range (if 588369 is not applicable)
+ *  l2c210_flush_all (if 727915 is not applicable)
+ *
+ * Errata:
+ * 588369: PL310 R0P0->R1P0, fixed R2P0.
+ *	Affects: all clean+invalidate operations
+ *	clean and invalidate skips the invalidate step, so we need to issue
+ *	separate operations.  We also require the above debug workaround
+ *	enclosing this code fragment on affected parts.  On unaffected parts,
+ *	we must not use this workaround without the debug register writes
+ *	to avoid exposing a problem similar to 727915.
+ *
+ * 727915: PL310 R2P0->R3P0, fixed R3P1.
+ *	Affects: clean+invalidate by way
+ *	clean and invalidate by way runs in the background, and a store can
+ *	hit the line between the clean operation and invalidate operation,
+ *	resulting in the store being lost.
+ *
+ * 752271: PL310 R3P0->R3P1-50REL0, fixed R3P2.
+ *	Affects: 8x64-bit (double fill) line fetches
+ *	double fill line fetches can fail to cause dirty data to be evicted
+ *	from the cache before the new data overwrites the second line.
+ *
+ * 753970: PL310 R3P0, fixed R3P1.
+ *	Affects: sync
+ *	prevents merging writes after the sync operation, until another L2C
+ *	operation is performed (or a number of other conditions.)
+ *
+ * 769419: PL310 R0P0->R3P1, fixed R3P2.
+ *	Affects: store buffer
+ *	store buffer is not automatically drained.
+ */
+static void l2c310_inv_range_erratum(unsigned long start, unsigned long end)
+{
+	void __iomem *base = l2x0_base;
+
+	if ((start | end) & (CACHE_LINE_SIZE - 1)) {
+		unsigned long flags;
+
+		/* Erratum 588369 for both clean+invalidate operations */
+		raw_spin_lock_irqsave(&l2x0_lock, flags);
+		l2c_set_debug(base, 0x03);
+
+		if (start & (CACHE_LINE_SIZE - 1)) {
+			start &= ~(CACHE_LINE_SIZE - 1);
+			writel_relaxed(start, base + L2X0_CLEAN_LINE_PA);
+			writel_relaxed(start, base + L2X0_INV_LINE_PA);
+			start += CACHE_LINE_SIZE;
+		}
+
+		if (end & (CACHE_LINE_SIZE - 1)) {
+			end &= ~(CACHE_LINE_SIZE - 1);
+			writel_relaxed(end, base + L2X0_CLEAN_LINE_PA);
+			writel_relaxed(end, base + L2X0_INV_LINE_PA);
+		}
+
+		l2c_set_debug(base, 0x00);
+		raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+	}
+
+	__l2c210_op_pa_range(base + L2X0_INV_LINE_PA, start, end);
+	__l2c210_cache_sync(base);
+}
+
+static void l2c310_flush_range_erratum(unsigned long start, unsigned long end)
+{
+	raw_spinlock_t *lock = &l2x0_lock;
+	unsigned long flags;
+	void __iomem *base = l2x0_base;
+
+	raw_spin_lock_irqsave(lock, flags);
+	while (start < end) {
+		unsigned long blk_end = start + min(end - start, 4096UL);
+
+		l2c_set_debug(base, 0x03);
+		while (start < blk_end) {
+			writel_relaxed(start, base + L2X0_CLEAN_LINE_PA);
+			writel_relaxed(start, base + L2X0_INV_LINE_PA);
+			start += CACHE_LINE_SIZE;
+		}
+		l2c_set_debug(base, 0x00);
+
+		if (blk_end < end) {
+			raw_spin_unlock_irqrestore(lock, flags);
+			raw_spin_lock_irqsave(lock, flags);
+		}
+	}
+	raw_spin_unlock_irqrestore(lock, flags);
+	__l2c210_cache_sync(base);
+}
+
+static void l2c310_flush_all_erratum(void)
+{
+	void __iomem *base = l2x0_base;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&l2x0_lock, flags);
+	l2c_set_debug(base, 0x03);
+	__l2c_op_way(base + L2X0_CLEAN_INV_WAY);
+	l2c_set_debug(base, 0x00);
+	__l2c210_cache_sync(base);
+	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+}
+
+static void __init l2c310_save(void __iomem *base)
+{
+	unsigned revision;
+
+	l2c_save(base);
+
+	l2x0_saved_regs.tag_latency = readl_relaxed(base +
+		L310_TAG_LATENCY_CTRL);
+	l2x0_saved_regs.data_latency = readl_relaxed(base +
+		L310_DATA_LATENCY_CTRL);
+	l2x0_saved_regs.filter_end = readl_relaxed(base +
+		L310_ADDR_FILTER_END);
+	l2x0_saved_regs.filter_start = readl_relaxed(base +
+		L310_ADDR_FILTER_START);
+
+	revision = readl_relaxed(base + L2X0_CACHE_ID) &
+			L2X0_CACHE_ID_RTL_MASK;
+
+	/* From r2p0, there is Prefetch offset/control register */
+	if (revision >= L310_CACHE_ID_RTL_R2P0)
+		l2x0_saved_regs.prefetch_ctrl = readl_relaxed(base +
+							L310_PREFETCH_CTRL);
+
+	/* From r3p0, there is Power control register */
+	if (revision >= L310_CACHE_ID_RTL_R3P0)
+		l2x0_saved_regs.pwr_ctrl = readl_relaxed(base +
+							L310_POWER_CTRL);
+}
+
+static void l2c310_resume(void)
+{
+	void __iomem *base = l2x0_base;
+
+	if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)) {
+		unsigned revision;
+
+		/* restore pl310 setup */
+		writel_relaxed(l2x0_saved_regs.tag_latency,
+			       base + L310_TAG_LATENCY_CTRL);
+		writel_relaxed(l2x0_saved_regs.data_latency,
+			       base + L310_DATA_LATENCY_CTRL);
+		writel_relaxed(l2x0_saved_regs.filter_end,
+			       base + L310_ADDR_FILTER_END);
+		writel_relaxed(l2x0_saved_regs.filter_start,
+			       base + L310_ADDR_FILTER_START);
+
+		revision = readl_relaxed(base + L2X0_CACHE_ID) &
+				L2X0_CACHE_ID_RTL_MASK;
+
+		if (revision >= L310_CACHE_ID_RTL_R2P0)
+			l2c_write_sec(l2x0_saved_regs.prefetch_ctrl, base,
+				      L310_PREFETCH_CTRL);
+		if (revision >= L310_CACHE_ID_RTL_R3P0)
+			l2c_write_sec(l2x0_saved_regs.pwr_ctrl, base,
+				      L310_POWER_CTRL);
+
+		l2c_enable(base, l2x0_saved_regs.aux_ctrl, 8);
+
+		/* Re-enable full-line-of-zeros for Cortex-A9 */
+		if (l2x0_saved_regs.aux_ctrl & L310_AUX_CTRL_FULL_LINE_ZERO)
+			set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1));
+	}
+}
+
+static int l2c310_cpu_enable_flz(struct notifier_block *nb, unsigned long act, void *data)
+{
+	switch (act & ~CPU_TASKS_FROZEN) {
+	case CPU_STARTING:
+		set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1));
+		break;
+	case CPU_DYING:
+		set_auxcr(get_auxcr() & ~(BIT(3) | BIT(2) | BIT(1)));
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static void __init l2c310_enable(void __iomem *base, u32 aux, unsigned num_lock)
+{
+	unsigned rev = readl_relaxed(base + L2X0_CACHE_ID) & L2X0_CACHE_ID_PART_MASK;
+	bool cortex_a9 = read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9;
+
+	if (rev >= L310_CACHE_ID_RTL_R2P0) {
+		if (cortex_a9) {
+			aux |= L310_AUX_CTRL_EARLY_BRESP;
+			pr_info("L2C-310 enabling early BRESP for Cortex-A9\n");
+		} else if (aux & L310_AUX_CTRL_EARLY_BRESP) {
+			pr_warn("L2C-310 early BRESP only supported with Cortex-A9\n");
+			aux &= ~L310_AUX_CTRL_EARLY_BRESP;
+		}
+	}
+
+	if (cortex_a9) {
+		u32 aux_cur = readl_relaxed(base + L2X0_AUX_CTRL);
+		u32 acr = get_auxcr();
+
+		pr_debug("Cortex-A9 ACR=0x%08x\n", acr);
+
+		if (acr & BIT(3) && !(aux_cur & L310_AUX_CTRL_FULL_LINE_ZERO))
+			pr_err("L2C-310: full line of zeros enabled in Cortex-A9 but not L2C-310 - invalid\n");
+
+		if (aux & L310_AUX_CTRL_FULL_LINE_ZERO && !(acr & BIT(3)))
+			pr_err("L2C-310: enabling full line of zeros but not enabled in Cortex-A9\n");
+
+		if (!(aux & L310_AUX_CTRL_FULL_LINE_ZERO) && !outer_cache.write_sec) {
+			aux |= L310_AUX_CTRL_FULL_LINE_ZERO;
+			pr_info("L2C-310 full line of zeros enabled for Cortex-A9\n");
+		}
+	} else if (aux & (L310_AUX_CTRL_FULL_LINE_ZERO | L310_AUX_CTRL_EARLY_BRESP)) {
+		pr_err("L2C-310: disabling Cortex-A9 specific feature bits\n");
+		aux &= ~(L310_AUX_CTRL_FULL_LINE_ZERO | L310_AUX_CTRL_EARLY_BRESP);
+	}
+
+	if (aux & (L310_AUX_CTRL_DATA_PREFETCH | L310_AUX_CTRL_INSTR_PREFETCH)) {
+		u32 prefetch = readl_relaxed(base + L310_PREFETCH_CTRL);
+
+		pr_info("L2C-310 %s%s prefetch enabled, offset %u lines\n",
+			aux & L310_AUX_CTRL_INSTR_PREFETCH ? "I" : "",
+			aux & L310_AUX_CTRL_DATA_PREFETCH ? "D" : "",
+			1 + (prefetch & L310_PREFETCH_CTRL_OFFSET_MASK));
+	}
+
+	/* r3p0 or later has power control register */
+	if (rev >= L310_CACHE_ID_RTL_R3P0) {
+		u32 power_ctrl;
+
+		l2c_write_sec(L310_DYNAMIC_CLK_GATING_EN | L310_STNDBY_MODE_EN,
+			      base, L310_POWER_CTRL);
+		power_ctrl = readl_relaxed(base + L310_POWER_CTRL);
+		pr_info("L2C-310 dynamic clock gating %sabled, standby mode %sabled\n",
+			power_ctrl & L310_DYNAMIC_CLK_GATING_EN ? "en" : "dis",
+			power_ctrl & L310_STNDBY_MODE_EN ? "en" : "dis");
+	}
+
+	/*
+	 * Always enable non-secure access to the lockdown registers -
+	 * we write to them as part of the L2C enable sequence so they
+	 * need to be accessible.
+	 */
+	aux |= L310_AUX_CTRL_NS_LOCKDOWN;
+
+	l2c_enable(base, aux, num_lock);
+
+	if (aux & L310_AUX_CTRL_FULL_LINE_ZERO) {
+		set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1));
+		cpu_notifier(l2c310_cpu_enable_flz, 0);
+	}
+}
+
+static void __init l2c310_fixup(void __iomem *base, u32 cache_id,
+	struct outer_cache_fns *fns)
+{
+	unsigned revision = cache_id & L2X0_CACHE_ID_RTL_MASK;
+	const char *errata[8];
+	unsigned n = 0;
+
+	if (IS_ENABLED(CONFIG_PL310_ERRATA_588369) &&
+	    revision < L310_CACHE_ID_RTL_R2P0 &&
+	    /* For bcm compatibility */
+	    fns->inv_range == l2c210_inv_range) {
+		fns->inv_range = l2c310_inv_range_erratum;
+		fns->flush_range = l2c310_flush_range_erratum;
+		errata[n++] = "588369";
+	}
+
+	if (IS_ENABLED(CONFIG_PL310_ERRATA_727915) &&
+	    revision >= L310_CACHE_ID_RTL_R2P0 &&
+	    revision < L310_CACHE_ID_RTL_R3P1) {
+		fns->flush_all = l2c310_flush_all_erratum;
+		errata[n++] = "727915";
+	}
+
+	if (revision >= L310_CACHE_ID_RTL_R3P0 &&
+	    revision < L310_CACHE_ID_RTL_R3P2) {
+		u32 val = readl_relaxed(base + L310_PREFETCH_CTRL);
+		/* I don't think bit23 is required here... but iMX6 does so */
+		if (val & (BIT(30) | BIT(23))) {
+			val &= ~(BIT(30) | BIT(23));
+			l2c_write_sec(val, base, L310_PREFETCH_CTRL);
+			errata[n++] = "752271";
+		}
+	}
+
+	if (IS_ENABLED(CONFIG_PL310_ERRATA_753970) &&
+	    revision == L310_CACHE_ID_RTL_R3P0) {
+		sync_reg_offset = L2X0_DUMMY_REG;
+		errata[n++] = "753970";
+	}
+
+	if (IS_ENABLED(CONFIG_PL310_ERRATA_769419))
+		errata[n++] = "769419";
+
+	if (n) {
+		unsigned i;
+
+		pr_info("L2C-310 errat%s", n > 1 ? "a" : "um");
+		for (i = 0; i < n; i++)
+			pr_cont(" %s", errata[i]);
+		pr_cont(" enabled\n");
+	}
+}
+
+static void l2c310_disable(void)
+{
+	/*
+	 * If full-line-of-zeros is enabled, we must first disable it in the
+	 * Cortex-A9 auxiliary control register before disabling the L2 cache.
+	 */
+	if (l2x0_saved_regs.aux_ctrl & L310_AUX_CTRL_FULL_LINE_ZERO)
+		set_auxcr(get_auxcr() & ~(BIT(3) | BIT(2) | BIT(1)));
+
+	l2c_disable();
+}
+
+static const struct l2c_init_data l2c310_init_fns __initconst = {
+	.type = "L2C-310",
+	.way_size_0 = SZ_8K,
+	.num_lock = 8,
+	.enable = l2c310_enable,
+	.fixup = l2c310_fixup,
+	.save = l2c310_save,
+	.outer_cache = {
+		.inv_range = l2c210_inv_range,
+		.clean_range = l2c210_clean_range,
+		.flush_range = l2c210_flush_range,
+		.flush_all = l2c210_flush_all,
+		.disable = l2c310_disable,
+		.sync = l2c210_sync,
+		.resume = l2c310_resume,
+	},
+};
+
+static void __init __l2c_init(const struct l2c_init_data *data,
+	u32 aux_val, u32 aux_mask, u32 cache_id)
+{
+	struct outer_cache_fns fns;
+	unsigned way_size_bits, ways;
+	u32 aux, old_aux;
+
+	/*
+	 * Sanity check the aux values.  aux_mask is the bits we preserve
+	 * from reading the hardware register, and aux_val is the bits we
+	 * set.
+	 */
+	if (aux_val & aux_mask)
+		pr_alert("L2C: platform provided aux values permit register corruption.\n");
+
+	old_aux = aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
 	aux &= aux_mask;
 	aux |= aux_val;
 
+	if (old_aux != aux)
+		pr_warn("L2C: DT/platform modifies aux control register: 0x%08x -> 0x%08x\n",
+		        old_aux, aux);
+
 	/* Determine the number of ways */
 	switch (cache_id & L2X0_CACHE_ID_PART_MASK) {
 	case L2X0_CACHE_ID_PART_L310:
+		if ((aux_val | ~aux_mask) & (L2C_AUX_CTRL_WAY_SIZE_MASK | L310_AUX_CTRL_ASSOCIATIVITY_16))
+			pr_warn("L2C: DT/platform tries to modify or specify cache size\n");
 		if (aux & (1 << 16))
 			ways = 16;
 		else
 			ways = 8;
-		type = "L310";
-#ifdef CONFIG_PL310_ERRATA_753970
-		/* Unmapped register. */
-		sync_reg_offset = L2X0_DUMMY_REG;
-#endif
-		if ((cache_id & L2X0_CACHE_ID_RTL_MASK) <= L2X0_CACHE_ID_RTL_R3P0)
-			outer_cache.set_debug = pl310_set_debug;
 		break;
+
 	case L2X0_CACHE_ID_PART_L210:
+	case L2X0_CACHE_ID_PART_L220:
 		ways = (aux >> 13) & 0xf;
-		type = "L210";
 		break;
 
 	case AURORA_CACHE_ID:
-		sync_reg_offset = AURORA_SYNC_REG;
 		ways = (aux >> 13) & 0xf;
 		ways = 2 << ((ways + 1) >> 2);
-		way_size_shift = AURORA_WAY_SIZE_SHIFT;
-		type = "Aurora";
 		break;
+
 	default:
 		/* Assume unknown chips have 8 ways */
 		ways = 8;
-		type = "L2x0 series";
 		break;
 	}
 
 	l2x0_way_mask = (1 << ways) - 1;
 
 	/*
-	 * L2 cache Size =  Way size * Number of ways
+	 * way_size_0 is the size that a way_size value of zero would be
+	 * given the calculation: way_size = way_size_0 << way_size_bits.
+	 * So, if way_size_bits=0 is reserved, but way_size_bits=1 is 16k,
+	 * then way_size_0 would be 8k.
+	 *
+	 * L2 cache size = number of ways * way size.
 	 */
-	way_size = (aux & L2X0_AUX_CTRL_WAY_SIZE_MASK) >> 17;
-	way_size = 1 << (way_size + way_size_shift);
+	way_size_bits = (aux & L2C_AUX_CTRL_WAY_SIZE_MASK) >>
+			L2C_AUX_CTRL_WAY_SIZE_SHIFT;
+	l2x0_size = ways * (data->way_size_0 << way_size_bits);
 
-	l2x0_size = ways * way_size * SZ_1K;
+	fns = data->outer_cache;
+	fns.write_sec = outer_cache.write_sec;
+	if (data->fixup)
+		data->fixup(l2x0_base, cache_id, &fns);
 
 	/*
-	 * Check if l2x0 controller is already enabled.
-	 * If you are booting from non-secure mode
-	 * accessing the below registers will fault.
+	 * Check if l2x0 controller is already enabled.  If we are booting
+	 * in non-secure mode accessing the below registers will fault.
 	 */
-	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {
-		/* Make sure that I&D is not locked down when starting */
-		l2x0_unlock(cache_id);
+	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN))
+		data->enable(l2x0_base, aux, data->num_lock);
 
-		/* l2x0 controller is disabled */
-		writel_relaxed(aux, l2x0_base + L2X0_AUX_CTRL);
+	outer_cache = fns;
 
-		l2x0_inv_all();
-
-		/* enable L2X0 */
-		writel_relaxed(L2X0_CTRL_EN, l2x0_base + L2X0_CTRL);
-	}
+	/*
+	 * It is strange to save the register state before initialisation,
+	 * but hey, this is what the DT implementations decided to do.
+	 */
+	if (data->save)
+		data->save(l2x0_base);
 
 	/* Re-read it in case some bits are reserved. */
 	aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
 
-	/* Save the value for resuming. */
-	l2x0_saved_regs.aux_ctrl = aux;
+	pr_info("%s cache controller enabled, %d ways, %d kB\n",
+		data->type, ways, l2x0_size >> 10);
+	pr_info("%s: CACHE_ID 0x%08x, AUX_CTRL 0x%08x\n",
+		data->type, cache_id, aux);
+}
 
-	if (!of_init) {
-		outer_cache.inv_range = l2x0_inv_range;
-		outer_cache.clean_range = l2x0_clean_range;
-		outer_cache.flush_range = l2x0_flush_range;
-		outer_cache.sync = l2x0_cache_sync;
-		outer_cache.flush_all = l2x0_flush_all;
-		outer_cache.inv_all = l2x0_inv_all;
-		outer_cache.disable = l2x0_disable;
+void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
+{
+	const struct l2c_init_data *data;
+	u32 cache_id;
+
+	l2x0_base = base;
+
+	cache_id = readl_relaxed(base + L2X0_CACHE_ID);
+
+	switch (cache_id & L2X0_CACHE_ID_PART_MASK) {
+	default:
+	case L2X0_CACHE_ID_PART_L210:
+		data = &l2c210_data;
+		break;
+
+	case L2X0_CACHE_ID_PART_L220:
+		data = &l2c220_data;
+		break;
+
+	case L2X0_CACHE_ID_PART_L310:
+		data = &l2c310_init_fns;
+		break;
 	}
 
-	pr_info("%s cache controller enabled\n", type);
-	pr_info("l2x0: %d ways, CACHE_ID 0x%08x, AUX_CTRL 0x%08x, Cache size: %d kB\n",
-		ways, cache_id, aux, l2x0_size >> 10);
+	__l2c_init(data, aux_val, aux_mask, cache_id);
 }
 
 #ifdef CONFIG_OF
 static int l2_wt_override;
 
+/* Aurora don't have the cache ID register available, so we have to
+ * pass it though the device tree */
+static u32 cache_id_part_number_from_dt;
+
+static void __init l2x0_of_parse(const struct device_node *np,
+				 u32 *aux_val, u32 *aux_mask)
+{
+	u32 data[2] = { 0, 0 };
+	u32 tag = 0;
+	u32 dirty = 0;
+	u32 val = 0, mask = 0;
+
+	of_property_read_u32(np, "arm,tag-latency", &tag);
+	if (tag) {
+		mask |= L2X0_AUX_CTRL_TAG_LATENCY_MASK;
+		val |= (tag - 1) << L2X0_AUX_CTRL_TAG_LATENCY_SHIFT;
+	}
+
+	of_property_read_u32_array(np, "arm,data-latency",
+				   data, ARRAY_SIZE(data));
+	if (data[0] && data[1]) {
+		mask |= L2X0_AUX_CTRL_DATA_RD_LATENCY_MASK |
+			L2X0_AUX_CTRL_DATA_WR_LATENCY_MASK;
+		val |= ((data[0] - 1) << L2X0_AUX_CTRL_DATA_RD_LATENCY_SHIFT) |
+		       ((data[1] - 1) << L2X0_AUX_CTRL_DATA_WR_LATENCY_SHIFT);
+	}
+
+	of_property_read_u32(np, "arm,dirty-latency", &dirty);
+	if (dirty) {
+		mask |= L2X0_AUX_CTRL_DIRTY_LATENCY_MASK;
+		val |= (dirty - 1) << L2X0_AUX_CTRL_DIRTY_LATENCY_SHIFT;
+	}
+
+	*aux_val &= ~mask;
+	*aux_val |= val;
+	*aux_mask &= ~mask;
+}
+
+static const struct l2c_init_data of_l2c210_data __initconst = {
+	.type = "L2C-210",
+	.way_size_0 = SZ_8K,
+	.num_lock = 1,
+	.of_parse = l2x0_of_parse,
+	.enable = l2c_enable,
+	.save = l2c_save,
+	.outer_cache = {
+		.inv_range   = l2c210_inv_range,
+		.clean_range = l2c210_clean_range,
+		.flush_range = l2c210_flush_range,
+		.flush_all   = l2c210_flush_all,
+		.disable     = l2c_disable,
+		.sync        = l2c210_sync,
+		.resume      = l2c210_resume,
+	},
+};
+
+static const struct l2c_init_data of_l2c220_data __initconst = {
+	.type = "L2C-220",
+	.way_size_0 = SZ_8K,
+	.num_lock = 1,
+	.of_parse = l2x0_of_parse,
+	.enable = l2c220_enable,
+	.save = l2c_save,
+	.outer_cache = {
+		.inv_range   = l2c220_inv_range,
+		.clean_range = l2c220_clean_range,
+		.flush_range = l2c220_flush_range,
+		.flush_all   = l2c220_flush_all,
+		.disable     = l2c_disable,
+		.sync        = l2c220_sync,
+		.resume      = l2c210_resume,
+	},
+};
+
+static void __init l2c310_of_parse(const struct device_node *np,
+	u32 *aux_val, u32 *aux_mask)
+{
+	u32 data[3] = { 0, 0, 0 };
+	u32 tag[3] = { 0, 0, 0 };
+	u32 filter[2] = { 0, 0 };
+
+	of_property_read_u32_array(np, "arm,tag-latency", tag, ARRAY_SIZE(tag));
+	if (tag[0] && tag[1] && tag[2])
+		writel_relaxed(
+			L310_LATENCY_CTRL_RD(tag[0] - 1) |
+			L310_LATENCY_CTRL_WR(tag[1] - 1) |
+			L310_LATENCY_CTRL_SETUP(tag[2] - 1),
+			l2x0_base + L310_TAG_LATENCY_CTRL);
+
+	of_property_read_u32_array(np, "arm,data-latency",
+				   data, ARRAY_SIZE(data));
+	if (data[0] && data[1] && data[2])
+		writel_relaxed(
+			L310_LATENCY_CTRL_RD(data[0] - 1) |
+			L310_LATENCY_CTRL_WR(data[1] - 1) |
+			L310_LATENCY_CTRL_SETUP(data[2] - 1),
+			l2x0_base + L310_DATA_LATENCY_CTRL);
+
+	of_property_read_u32_array(np, "arm,filter-ranges",
+				   filter, ARRAY_SIZE(filter));
+	if (filter[1]) {
+		writel_relaxed(ALIGN(filter[0] + filter[1], SZ_1M),
+			       l2x0_base + L310_ADDR_FILTER_END);
+		writel_relaxed((filter[0] & ~(SZ_1M - 1)) | L310_ADDR_FILTER_EN,
+			       l2x0_base + L310_ADDR_FILTER_START);
+	}
+}
+
+static const struct l2c_init_data of_l2c310_data __initconst = {
+	.type = "L2C-310",
+	.way_size_0 = SZ_8K,
+	.num_lock = 8,
+	.of_parse = l2c310_of_parse,
+	.enable = l2c310_enable,
+	.fixup = l2c310_fixup,
+	.save  = l2c310_save,
+	.outer_cache = {
+		.inv_range   = l2c210_inv_range,
+		.clean_range = l2c210_clean_range,
+		.flush_range = l2c210_flush_range,
+		.flush_all   = l2c210_flush_all,
+		.disable     = l2c310_disable,
+		.sync        = l2c210_sync,
+		.resume      = l2c310_resume,
+	},
+};
+
 /*
  * Note that the end addresses passed to Linux primitives are
  * noninclusive, while the hardware cache range operations use
@@ -524,6 +1166,100 @@
 	}
 }
 
+static void aurora_save(void __iomem *base)
+{
+	l2x0_saved_regs.ctrl = readl_relaxed(base + L2X0_CTRL);
+	l2x0_saved_regs.aux_ctrl = readl_relaxed(base + L2X0_AUX_CTRL);
+}
+
+static void aurora_resume(void)
+{
+	void __iomem *base = l2x0_base;
+
+	if (!(readl(base + L2X0_CTRL) & L2X0_CTRL_EN)) {
+		writel_relaxed(l2x0_saved_regs.aux_ctrl, base + L2X0_AUX_CTRL);
+		writel_relaxed(l2x0_saved_regs.ctrl, base + L2X0_CTRL);
+	}
+}
+
+/*
+ * For Aurora cache in no outer mode, enable via the CP15 coprocessor
+ * broadcasting of cache commands to L2.
+ */
+static void __init aurora_enable_no_outer(void __iomem *base, u32 aux,
+	unsigned num_lock)
+{
+	u32 u;
+
+	asm volatile("mrc p15, 1, %0, c15, c2, 0" : "=r" (u));
+	u |= AURORA_CTRL_FW;		/* Set the FW bit */
+	asm volatile("mcr p15, 1, %0, c15, c2, 0" : : "r" (u));
+
+	isb();
+
+	l2c_enable(base, aux, num_lock);
+}
+
+static void __init aurora_fixup(void __iomem *base, u32 cache_id,
+	struct outer_cache_fns *fns)
+{
+	sync_reg_offset = AURORA_SYNC_REG;
+}
+
+static void __init aurora_of_parse(const struct device_node *np,
+				u32 *aux_val, u32 *aux_mask)
+{
+	u32 val = AURORA_ACR_REPLACEMENT_TYPE_SEMIPLRU;
+	u32 mask =  AURORA_ACR_REPLACEMENT_MASK;
+
+	of_property_read_u32(np, "cache-id-part",
+			&cache_id_part_number_from_dt);
+
+	/* Determine and save the write policy */
+	l2_wt_override = of_property_read_bool(np, "wt-override");
+
+	if (l2_wt_override) {
+		val |= AURORA_ACR_FORCE_WRITE_THRO_POLICY;
+		mask |= AURORA_ACR_FORCE_WRITE_POLICY_MASK;
+	}
+
+	*aux_val &= ~mask;
+	*aux_val |= val;
+	*aux_mask &= ~mask;
+}
+
+static const struct l2c_init_data of_aurora_with_outer_data __initconst = {
+	.type = "Aurora",
+	.way_size_0 = SZ_4K,
+	.num_lock = 4,
+	.of_parse = aurora_of_parse,
+	.enable = l2c_enable,
+	.fixup = aurora_fixup,
+	.save  = aurora_save,
+	.outer_cache = {
+		.inv_range   = aurora_inv_range,
+		.clean_range = aurora_clean_range,
+		.flush_range = aurora_flush_range,
+		.flush_all   = l2x0_flush_all,
+		.disable     = l2x0_disable,
+		.sync        = l2x0_cache_sync,
+		.resume      = aurora_resume,
+	},
+};
+
+static const struct l2c_init_data of_aurora_no_outer_data __initconst = {
+	.type = "Aurora",
+	.way_size_0 = SZ_4K,
+	.num_lock = 4,
+	.of_parse = aurora_of_parse,
+	.enable = aurora_enable_no_outer,
+	.fixup = aurora_fixup,
+	.save  = aurora_save,
+	.outer_cache = {
+		.resume      = aurora_resume,
+	},
+};
+
 /*
  * For certain Broadcom SoCs, depending on the address range, different offsets
  * need to be added to the address before passing it to L2 for
@@ -588,16 +1324,16 @@
 
 	/* normal case, no cross section between start and end */
 	if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) {
-		l2x0_inv_range(new_start, new_end);
+		l2c210_inv_range(new_start, new_end);
 		return;
 	}
 
 	/* They cross sections, so it can only be a cross from section
 	 * 2 to section 3
 	 */
-	l2x0_inv_range(new_start,
+	l2c210_inv_range(new_start,
 		bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1));
-	l2x0_inv_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR),
+	l2c210_inv_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR),
 		new_end);
 }
 
@@ -610,26 +1346,21 @@
 	if (unlikely(end <= start))
 		return;
 
-	if ((end - start) >= l2x0_size) {
-		l2x0_clean_all();
-		return;
-	}
-
 	new_start = bcm_l2_phys_addr(start);
 	new_end = bcm_l2_phys_addr(end);
 
 	/* normal case, no cross section between start and end */
 	if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) {
-		l2x0_clean_range(new_start, new_end);
+		l2c210_clean_range(new_start, new_end);
 		return;
 	}
 
 	/* They cross sections, so it can only be a cross from section
 	 * 2 to section 3
 	 */
-	l2x0_clean_range(new_start,
+	l2c210_clean_range(new_start,
 		bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1));
-	l2x0_clean_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR),
+	l2c210_clean_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR),
 		new_end);
 }
 
@@ -643,7 +1374,7 @@
 		return;
 
 	if ((end - start) >= l2x0_size) {
-		l2x0_flush_all();
+		outer_cache.flush_all();
 		return;
 	}
 
@@ -652,283 +1383,67 @@
 
 	/* normal case, no cross section between start and end */
 	if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) {
-		l2x0_flush_range(new_start, new_end);
+		l2c210_flush_range(new_start, new_end);
 		return;
 	}
 
 	/* They cross sections, so it can only be a cross from section
 	 * 2 to section 3
 	 */
-	l2x0_flush_range(new_start,
+	l2c210_flush_range(new_start,
 		bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1));
-	l2x0_flush_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR),
+	l2c210_flush_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR),
 		new_end);
 }
 
-static void __init l2x0_of_setup(const struct device_node *np,
-				 u32 *aux_val, u32 *aux_mask)
+/* Broadcom L2C-310 start from ARMs R3P2 or later, and require no fixups */
+static const struct l2c_init_data of_bcm_l2x0_data __initconst = {
+	.type = "BCM-L2C-310",
+	.way_size_0 = SZ_8K,
+	.num_lock = 8,
+	.of_parse = l2c310_of_parse,
+	.enable = l2c310_enable,
+	.save  = l2c310_save,
+	.outer_cache = {
+		.inv_range   = bcm_inv_range,
+		.clean_range = bcm_clean_range,
+		.flush_range = bcm_flush_range,
+		.flush_all   = l2c210_flush_all,
+		.disable     = l2c310_disable,
+		.sync        = l2c210_sync,
+		.resume      = l2c310_resume,
+	},
+};
+
+static void __init tauros3_save(void __iomem *base)
 {
-	u32 data[2] = { 0, 0 };
-	u32 tag = 0;
-	u32 dirty = 0;
-	u32 val = 0, mask = 0;
+	l2c_save(base);
 
-	of_property_read_u32(np, "arm,tag-latency", &tag);
-	if (tag) {
-		mask |= L2X0_AUX_CTRL_TAG_LATENCY_MASK;
-		val |= (tag - 1) << L2X0_AUX_CTRL_TAG_LATENCY_SHIFT;
-	}
-
-	of_property_read_u32_array(np, "arm,data-latency",
-				   data, ARRAY_SIZE(data));
-	if (data[0] && data[1]) {
-		mask |= L2X0_AUX_CTRL_DATA_RD_LATENCY_MASK |
-			L2X0_AUX_CTRL_DATA_WR_LATENCY_MASK;
-		val |= ((data[0] - 1) << L2X0_AUX_CTRL_DATA_RD_LATENCY_SHIFT) |
-		       ((data[1] - 1) << L2X0_AUX_CTRL_DATA_WR_LATENCY_SHIFT);
-	}
-
-	of_property_read_u32(np, "arm,dirty-latency", &dirty);
-	if (dirty) {
-		mask |= L2X0_AUX_CTRL_DIRTY_LATENCY_MASK;
-		val |= (dirty - 1) << L2X0_AUX_CTRL_DIRTY_LATENCY_SHIFT;
-	}
-
-	*aux_val &= ~mask;
-	*aux_val |= val;
-	*aux_mask &= ~mask;
-}
-
-static void __init pl310_of_setup(const struct device_node *np,
-				  u32 *aux_val, u32 *aux_mask)
-{
-	u32 data[3] = { 0, 0, 0 };
-	u32 tag[3] = { 0, 0, 0 };
-	u32 filter[2] = { 0, 0 };
-
-	of_property_read_u32_array(np, "arm,tag-latency", tag, ARRAY_SIZE(tag));
-	if (tag[0] && tag[1] && tag[2])
-		writel_relaxed(
-			((tag[0] - 1) << L2X0_LATENCY_CTRL_RD_SHIFT) |
-			((tag[1] - 1) << L2X0_LATENCY_CTRL_WR_SHIFT) |
-			((tag[2] - 1) << L2X0_LATENCY_CTRL_SETUP_SHIFT),
-			l2x0_base + L2X0_TAG_LATENCY_CTRL);
-
-	of_property_read_u32_array(np, "arm,data-latency",
-				   data, ARRAY_SIZE(data));
-	if (data[0] && data[1] && data[2])
-		writel_relaxed(
-			((data[0] - 1) << L2X0_LATENCY_CTRL_RD_SHIFT) |
-			((data[1] - 1) << L2X0_LATENCY_CTRL_WR_SHIFT) |
-			((data[2] - 1) << L2X0_LATENCY_CTRL_SETUP_SHIFT),
-			l2x0_base + L2X0_DATA_LATENCY_CTRL);
-
-	of_property_read_u32_array(np, "arm,filter-ranges",
-				   filter, ARRAY_SIZE(filter));
-	if (filter[1]) {
-		writel_relaxed(ALIGN(filter[0] + filter[1], SZ_1M),
-			       l2x0_base + L2X0_ADDR_FILTER_END);
-		writel_relaxed((filter[0] & ~(SZ_1M - 1)) | L2X0_ADDR_FILTER_EN,
-			       l2x0_base + L2X0_ADDR_FILTER_START);
-	}
-}
-
-static void __init pl310_save(void)
-{
-	u32 l2x0_revision = readl_relaxed(l2x0_base + L2X0_CACHE_ID) &
-		L2X0_CACHE_ID_RTL_MASK;
-
-	l2x0_saved_regs.tag_latency = readl_relaxed(l2x0_base +
-		L2X0_TAG_LATENCY_CTRL);
-	l2x0_saved_regs.data_latency = readl_relaxed(l2x0_base +
-		L2X0_DATA_LATENCY_CTRL);
-	l2x0_saved_regs.filter_end = readl_relaxed(l2x0_base +
-		L2X0_ADDR_FILTER_END);
-	l2x0_saved_regs.filter_start = readl_relaxed(l2x0_base +
-		L2X0_ADDR_FILTER_START);
-
-	if (l2x0_revision >= L2X0_CACHE_ID_RTL_R2P0) {
-		/*
-		 * From r2p0, there is Prefetch offset/control register
-		 */
-		l2x0_saved_regs.prefetch_ctrl = readl_relaxed(l2x0_base +
-			L2X0_PREFETCH_CTRL);
-		/*
-		 * From r3p0, there is Power control register
-		 */
-		if (l2x0_revision >= L2X0_CACHE_ID_RTL_R3P0)
-			l2x0_saved_regs.pwr_ctrl = readl_relaxed(l2x0_base +
-				L2X0_POWER_CTRL);
-	}
-}
-
-static void aurora_save(void)
-{
-	l2x0_saved_regs.ctrl = readl_relaxed(l2x0_base + L2X0_CTRL);
-	l2x0_saved_regs.aux_ctrl = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
-}
-
-static void __init tauros3_save(void)
-{
 	l2x0_saved_regs.aux2_ctrl =
-		readl_relaxed(l2x0_base + TAUROS3_AUX2_CTRL);
+		readl_relaxed(base + TAUROS3_AUX2_CTRL);
 	l2x0_saved_regs.prefetch_ctrl =
-		readl_relaxed(l2x0_base + L2X0_PREFETCH_CTRL);
-}
-
-static void l2x0_resume(void)
-{
-	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {
-		/* restore aux ctrl and enable l2 */
-		l2x0_unlock(readl_relaxed(l2x0_base + L2X0_CACHE_ID));
-
-		writel_relaxed(l2x0_saved_regs.aux_ctrl, l2x0_base +
-			L2X0_AUX_CTRL);
-
-		l2x0_inv_all();
-
-		writel_relaxed(L2X0_CTRL_EN, l2x0_base + L2X0_CTRL);
-	}
-}
-
-static void pl310_resume(void)
-{
-	u32 l2x0_revision;
-
-	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {
-		/* restore pl310 setup */
-		writel_relaxed(l2x0_saved_regs.tag_latency,
-			l2x0_base + L2X0_TAG_LATENCY_CTRL);
-		writel_relaxed(l2x0_saved_regs.data_latency,
-			l2x0_base + L2X0_DATA_LATENCY_CTRL);
-		writel_relaxed(l2x0_saved_regs.filter_end,
-			l2x0_base + L2X0_ADDR_FILTER_END);
-		writel_relaxed(l2x0_saved_regs.filter_start,
-			l2x0_base + L2X0_ADDR_FILTER_START);
-
-		l2x0_revision = readl_relaxed(l2x0_base + L2X0_CACHE_ID) &
-			L2X0_CACHE_ID_RTL_MASK;
-
-		if (l2x0_revision >= L2X0_CACHE_ID_RTL_R2P0) {
-			writel_relaxed(l2x0_saved_regs.prefetch_ctrl,
-				l2x0_base + L2X0_PREFETCH_CTRL);
-			if (l2x0_revision >= L2X0_CACHE_ID_RTL_R3P0)
-				writel_relaxed(l2x0_saved_regs.pwr_ctrl,
-					l2x0_base + L2X0_POWER_CTRL);
-		}
-	}
-
-	l2x0_resume();
-}
-
-static void aurora_resume(void)
-{
-	if (!(readl(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {
-		writel_relaxed(l2x0_saved_regs.aux_ctrl,
-				l2x0_base + L2X0_AUX_CTRL);
-		writel_relaxed(l2x0_saved_regs.ctrl, l2x0_base + L2X0_CTRL);
-	}
+		readl_relaxed(base + L310_PREFETCH_CTRL);
 }
 
 static void tauros3_resume(void)
 {
-	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {
+	void __iomem *base = l2x0_base;
+
+	if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)) {
 		writel_relaxed(l2x0_saved_regs.aux2_ctrl,
-			       l2x0_base + TAUROS3_AUX2_CTRL);
+			       base + TAUROS3_AUX2_CTRL);
 		writel_relaxed(l2x0_saved_regs.prefetch_ctrl,
-			       l2x0_base + L2X0_PREFETCH_CTRL);
+			       base + L310_PREFETCH_CTRL);
+
+		l2c_enable(base, l2x0_saved_regs.aux_ctrl, 8);
 	}
-
-	l2x0_resume();
 }
 
-static void __init aurora_broadcast_l2_commands(void)
-{
-	__u32 u;
-	/* Enable Broadcasting of cache commands to L2*/
-	__asm__ __volatile__("mrc p15, 1, %0, c15, c2, 0" : "=r"(u));
-	u |= AURORA_CTRL_FW;		/* Set the FW bit */
-	__asm__ __volatile__("mcr p15, 1, %0, c15, c2, 0\n" : : "r"(u));
-	isb();
-}
-
-static void __init aurora_of_setup(const struct device_node *np,
-				u32 *aux_val, u32 *aux_mask)
-{
-	u32 val = AURORA_ACR_REPLACEMENT_TYPE_SEMIPLRU;
-	u32 mask =  AURORA_ACR_REPLACEMENT_MASK;
-
-	of_property_read_u32(np, "cache-id-part",
-			&cache_id_part_number_from_dt);
-
-	/* Determine and save the write policy */
-	l2_wt_override = of_property_read_bool(np, "wt-override");
-
-	if (l2_wt_override) {
-		val |= AURORA_ACR_FORCE_WRITE_THRO_POLICY;
-		mask |= AURORA_ACR_FORCE_WRITE_POLICY_MASK;
-	}
-
-	*aux_val &= ~mask;
-	*aux_val |= val;
-	*aux_mask &= ~mask;
-}
-
-static const struct l2x0_of_data pl310_data = {
-	.setup = pl310_of_setup,
-	.save  = pl310_save,
-	.outer_cache = {
-		.resume      = pl310_resume,
-		.inv_range   = l2x0_inv_range,
-		.clean_range = l2x0_clean_range,
-		.flush_range = l2x0_flush_range,
-		.sync        = l2x0_cache_sync,
-		.flush_all   = l2x0_flush_all,
-		.inv_all     = l2x0_inv_all,
-		.disable     = l2x0_disable,
-	},
-};
-
-static const struct l2x0_of_data l2x0_data = {
-	.setup = l2x0_of_setup,
-	.save  = NULL,
-	.outer_cache = {
-		.resume      = l2x0_resume,
-		.inv_range   = l2x0_inv_range,
-		.clean_range = l2x0_clean_range,
-		.flush_range = l2x0_flush_range,
-		.sync        = l2x0_cache_sync,
-		.flush_all   = l2x0_flush_all,
-		.inv_all     = l2x0_inv_all,
-		.disable     = l2x0_disable,
-	},
-};
-
-static const struct l2x0_of_data aurora_with_outer_data = {
-	.setup = aurora_of_setup,
-	.save  = aurora_save,
-	.outer_cache = {
-		.resume      = aurora_resume,
-		.inv_range   = aurora_inv_range,
-		.clean_range = aurora_clean_range,
-		.flush_range = aurora_flush_range,
-		.sync        = l2x0_cache_sync,
-		.flush_all   = l2x0_flush_all,
-		.inv_all     = l2x0_inv_all,
-		.disable     = l2x0_disable,
-	},
-};
-
-static const struct l2x0_of_data aurora_no_outer_data = {
-	.setup = aurora_of_setup,
-	.save  = aurora_save,
-	.outer_cache = {
-		.resume      = aurora_resume,
-	},
-};
-
-static const struct l2x0_of_data tauros3_data = {
-	.setup = NULL,
+static const struct l2c_init_data of_tauros3_data __initconst = {
+	.type = "Tauros3",
+	.way_size_0 = SZ_8K,
+	.num_lock = 8,
+	.enable = l2c_enable,
 	.save  = tauros3_save,
 	/* Tauros3 broadcasts L1 cache operations to L2 */
 	.outer_cache = {
@@ -936,43 +1451,26 @@
 	},
 };
 
-static const struct l2x0_of_data bcm_l2x0_data = {
-	.setup = pl310_of_setup,
-	.save  = pl310_save,
-	.outer_cache = {
-		.resume      = pl310_resume,
-		.inv_range   = bcm_inv_range,
-		.clean_range = bcm_clean_range,
-		.flush_range = bcm_flush_range,
-		.sync        = l2x0_cache_sync,
-		.flush_all   = l2x0_flush_all,
-		.inv_all     = l2x0_inv_all,
-		.disable     = l2x0_disable,
-	},
-};
-
+#define L2C_ID(name, fns) { .compatible = name, .data = (void *)&fns }
 static const struct of_device_id l2x0_ids[] __initconst = {
-	{ .compatible = "arm,l210-cache", .data = (void *)&l2x0_data },
-	{ .compatible = "arm,l220-cache", .data = (void *)&l2x0_data },
-	{ .compatible = "arm,pl310-cache", .data = (void *)&pl310_data },
-	{ .compatible = "bcm,bcm11351-a2-pl310-cache", /* deprecated name */
-	  .data = (void *)&bcm_l2x0_data},
-	{ .compatible = "brcm,bcm11351-a2-pl310-cache",
-	  .data = (void *)&bcm_l2x0_data},
-	{ .compatible = "marvell,aurora-outer-cache",
-	  .data = (void *)&aurora_with_outer_data},
-	{ .compatible = "marvell,aurora-system-cache",
-	  .data = (void *)&aurora_no_outer_data},
-	{ .compatible = "marvell,tauros3-cache",
-	  .data = (void *)&tauros3_data },
+	L2C_ID("arm,l210-cache", of_l2c210_data),
+	L2C_ID("arm,l220-cache", of_l2c220_data),
+	L2C_ID("arm,pl310-cache", of_l2c310_data),
+	L2C_ID("brcm,bcm11351-a2-pl310-cache", of_bcm_l2x0_data),
+	L2C_ID("marvell,aurora-outer-cache", of_aurora_with_outer_data),
+	L2C_ID("marvell,aurora-system-cache", of_aurora_no_outer_data),
+	L2C_ID("marvell,tauros3-cache", of_tauros3_data),
+	/* Deprecated IDs */
+	L2C_ID("bcm,bcm11351-a2-pl310-cache", of_bcm_l2x0_data),
 	{}
 };
 
 int __init l2x0_of_init(u32 aux_val, u32 aux_mask)
 {
+	const struct l2c_init_data *data;
 	struct device_node *np;
-	const struct l2x0_of_data *data;
 	struct resource res;
+	u32 cache_id, old_aux;
 
 	np = of_find_matching_node(NULL, l2x0_ids);
 	if (!np)
@@ -989,23 +1487,29 @@
 
 	data = of_match_node(l2x0_ids, np)->data;
 
-	/* L2 configuration can only be changed if the cache is disabled */
-	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {
-		if (data->setup)
-			data->setup(np, &aux_val, &aux_mask);
-
-		/* For aurora cache in no outer mode select the
-		 * correct mode using the coprocessor*/
-		if (data == &aurora_no_outer_data)
-			aurora_broadcast_l2_commands();
+	old_aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
+	if (old_aux != ((old_aux & aux_mask) | aux_val)) {
+		pr_warn("L2C: platform modifies aux control register: 0x%08x -> 0x%08x\n",
+		        old_aux, (old_aux & aux_mask) | aux_val);
+	} else if (aux_mask != ~0U && aux_val != 0) {
+		pr_alert("L2C: platform provided aux values match the hardware, so have no effect.  Please remove them.\n");
 	}
 
-	if (data->save)
-		data->save();
+	/* All L2 caches are unified, so this property should be specified */
+	if (!of_property_read_bool(np, "cache-unified"))
+		pr_err("L2C: device tree omits to specify unified cache\n");
 
-	of_init = true;
-	memcpy(&outer_cache, &data->outer_cache, sizeof(outer_cache));
-	l2x0_init(l2x0_base, aux_val, aux_mask);
+	/* L2 configuration can only be changed if the cache is disabled */
+	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN))
+		if (data->of_parse)
+			data->of_parse(np, &aux_val, &aux_mask);
+
+	if (cache_id_part_number_from_dt)
+		cache_id = cache_id_part_number_from_dt;
+	else
+		cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID);
+
+	__l2c_init(data, aux_val, aux_mask, cache_id);
 
 	return 0;
 }
diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
index 778bcf8..615c99e 100644
--- a/arch/arm/mm/cache-v7.S
+++ b/arch/arm/mm/cache-v7.S
@@ -59,7 +59,7 @@
        bgt     2b
        cmp     r2, #0
        bgt     1b
-       dsb
+       dsb     st
        isb
        mov     pc, lr
 ENDPROC(v7_invalidate_l1)
@@ -166,7 +166,7 @@
 finished:
 	mov	r10, #0				@ swith back to cache level 0
 	mcr	p15, 2, r10, c0, c0, 0		@ select current cache level in cssr
-	dsb
+	dsb	st
 	isb
 	mov	pc, lr
 ENDPROC(v7_flush_dcache_all)
@@ -335,7 +335,7 @@
 	add	r0, r0, r2
 	cmp	r0, r1
 	blo	1b
-	dsb
+	dsb	st
 	mov	pc, lr
 ENDPROC(v7_flush_kern_dcache_area)
 
@@ -368,7 +368,7 @@
 	add	r0, r0, r2
 	cmp	r0, r1
 	blo	1b
-	dsb
+	dsb	st
 	mov	pc, lr
 ENDPROC(v7_dma_inv_range)
 
@@ -390,7 +390,7 @@
 	add	r0, r0, r2
 	cmp	r0, r1
 	blo	1b
-	dsb
+	dsb	st
 	mov	pc, lr
 ENDPROC(v7_dma_clean_range)
 
@@ -412,7 +412,7 @@
 	add	r0, r0, r2
 	cmp	r0, r1
 	blo	1b
-	dsb
+	dsb	st
 	mov	pc, lr
 ENDPROC(v7_dma_flush_range)
 
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 18e98df..85a109b 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -904,11 +904,12 @@
 	phys_addr_t paddr = page_to_phys(page) + off;
 
 	/* FIXME: non-speculating: not required */
-	/* don't bother invalidating if DMA to device */
-	if (dir != DMA_TO_DEVICE)
+	/* in any case, don't bother invalidating if DMA to device */
+	if (dir != DMA_TO_DEVICE) {
 		outer_inv_range(paddr, paddr + size);
 
-	dma_cache_maint_page(page, off, size, dir, dmac_unmap_area);
+		dma_cache_maint_page(page, off, size, dir, dmac_unmap_area);
+	}
 
 	/*
 	 * Mark the D-cache clean for these pages to avoid extra flushing.
diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c
index 3387e60..43d54f5 100644
--- a/arch/arm/mm/flush.c
+++ b/arch/arm/mm/flush.c
@@ -104,17 +104,20 @@
 #define flush_icache_alias(pfn,vaddr,len)	do { } while (0)
 #endif
 
+#define FLAG_PA_IS_EXEC 1
+#define FLAG_PA_CORE_IN_MM 2
+
 static void flush_ptrace_access_other(void *args)
 {
 	__flush_icache_all();
 }
 
-static
-void flush_ptrace_access(struct vm_area_struct *vma, struct page *page,
-			 unsigned long uaddr, void *kaddr, unsigned long len)
+static inline
+void __flush_ptrace_access(struct page *page, unsigned long uaddr, void *kaddr,
+			   unsigned long len, unsigned int flags)
 {
 	if (cache_is_vivt()) {
-		if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) {
+		if (flags & FLAG_PA_CORE_IN_MM) {
 			unsigned long addr = (unsigned long)kaddr;
 			__cpuc_coherent_kern_range(addr, addr + len);
 		}
@@ -128,7 +131,7 @@
 	}
 
 	/* VIPT non-aliasing D-cache */
-	if (vma->vm_flags & VM_EXEC) {
+	if (flags & FLAG_PA_IS_EXEC) {
 		unsigned long addr = (unsigned long)kaddr;
 		if (icache_is_vipt_aliasing())
 			flush_icache_alias(page_to_pfn(page), uaddr, len);
@@ -140,6 +143,26 @@
 	}
 }
 
+static
+void flush_ptrace_access(struct vm_area_struct *vma, struct page *page,
+			 unsigned long uaddr, void *kaddr, unsigned long len)
+{
+	unsigned int flags = 0;
+	if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm)))
+		flags |= FLAG_PA_CORE_IN_MM;
+	if (vma->vm_flags & VM_EXEC)
+		flags |= FLAG_PA_IS_EXEC;
+	__flush_ptrace_access(page, uaddr, kaddr, len, flags);
+}
+
+void flush_uprobe_xol_access(struct page *page, unsigned long uaddr,
+			     void *kaddr, unsigned long len)
+{
+	unsigned int flags = FLAG_PA_CORE_IN_MM|FLAG_PA_IS_EXEC;
+
+	__flush_ptrace_access(page, uaddr, kaddr, len, flags);
+}
+
 /*
  * Copy user data from/to a page which is mapped into a different
  * processes address space.  Really, we want to allow our "user
diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c
index 21b9e1b..45aeaac 100644
--- a/arch/arm/mm/highmem.c
+++ b/arch/arm/mm/highmem.c
@@ -18,6 +18,21 @@
 #include <asm/tlbflush.h>
 #include "mm.h"
 
+pte_t *fixmap_page_table;
+
+static inline void set_fixmap_pte(int idx, pte_t pte)
+{
+	unsigned long vaddr = __fix_to_virt(idx);
+	set_pte_ext(fixmap_page_table + idx, pte, 0);
+	local_flush_tlb_kernel_page(vaddr);
+}
+
+static inline pte_t get_fixmap_pte(unsigned long vaddr)
+{
+	unsigned long idx = __virt_to_fix(vaddr);
+	return *(fixmap_page_table + idx);
+}
+
 void *kmap(struct page *page)
 {
 	might_sleep();
@@ -63,20 +78,20 @@
 	type = kmap_atomic_idx_push();
 
 	idx = type + KM_TYPE_NR * smp_processor_id();
-	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+	vaddr = __fix_to_virt(idx);
 #ifdef CONFIG_DEBUG_HIGHMEM
 	/*
 	 * With debugging enabled, kunmap_atomic forces that entry to 0.
 	 * Make sure it was indeed properly unmapped.
 	 */
-	BUG_ON(!pte_none(get_top_pte(vaddr)));
+	BUG_ON(!pte_none(*(fixmap_page_table + idx)));
 #endif
 	/*
 	 * When debugging is off, kunmap_atomic leaves the previous mapping
 	 * in place, so the contained TLB flush ensures the TLB is updated
 	 * with the new mapping.
 	 */
-	set_top_pte(vaddr, mk_pte(page, kmap_prot));
+	set_fixmap_pte(idx, mk_pte(page, kmap_prot));
 
 	return (void *)vaddr;
 }
@@ -94,8 +109,8 @@
 		if (cache_is_vivt())
 			__cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE);
 #ifdef CONFIG_DEBUG_HIGHMEM
-		BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
-		set_top_pte(vaddr, __pte(0));
+		BUG_ON(vaddr != __fix_to_virt(idx));
+		set_fixmap_pte(idx, __pte(0));
 #else
 		(void) idx;  /* to kill a warning */
 #endif
@@ -117,11 +132,11 @@
 
 	type = kmap_atomic_idx_push();
 	idx = type + KM_TYPE_NR * smp_processor_id();
-	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+	vaddr = __fix_to_virt(idx);
 #ifdef CONFIG_DEBUG_HIGHMEM
-	BUG_ON(!pte_none(get_top_pte(vaddr)));
+	BUG_ON(!pte_none(*(fixmap_page_table + idx)));
 #endif
-	set_top_pte(vaddr, pfn_pte(pfn, kmap_prot));
+	set_fixmap_pte(idx, pfn_pte(pfn, kmap_prot));
 
 	return (void *)vaddr;
 }
@@ -133,5 +148,5 @@
 	if (vaddr < FIXADDR_START)
 		return virt_to_page(ptr);
 
-	return pte_page(get_top_pte(vaddr));
+	return pte_page(get_fixmap_pte(vaddr));
 }
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 2a77ba8..5958ac0 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -23,6 +23,7 @@
 #include <linux/dma-contiguous.h>
 #include <linux/sizes.h>
 
+#include <asm/cp15.h>
 #include <asm/mach-types.h>
 #include <asm/memblock.h>
 #include <asm/prom.h>
@@ -36,6 +37,14 @@
 
 #include "mm.h"
 
+#ifdef CONFIG_CPU_CP15_MMU
+unsigned long __init __clear_cr(unsigned long mask)
+{
+	cr_alignment = cr_alignment & ~mask;
+	return cr_alignment;
+}
+#endif
+
 static phys_addr_t phys_initrd_start __initdata = 0;
 static unsigned long phys_initrd_size __initdata = 0;
 
@@ -81,24 +90,21 @@
  * initialization functions, as well as show_mem() for the skipping
  * of holes in the memory map.  It is populated by arm_add_memory().
  */
-struct meminfo meminfo;
-
 void show_mem(unsigned int filter)
 {
 	int free = 0, total = 0, reserved = 0;
-	int shared = 0, cached = 0, slab = 0, i;
-	struct meminfo * mi = &meminfo;
+	int shared = 0, cached = 0, slab = 0;
+	struct memblock_region *reg;
 
 	printk("Mem-info:\n");
 	show_free_areas(filter);
 
-	for_each_bank (i, mi) {
-		struct membank *bank = &mi->bank[i];
+	for_each_memblock (memory, reg) {
 		unsigned int pfn1, pfn2;
 		struct page *page, *end;
 
-		pfn1 = bank_pfn_start(bank);
-		pfn2 = bank_pfn_end(bank);
+		pfn1 = memblock_region_memory_base_pfn(reg);
+		pfn2 = memblock_region_memory_end_pfn(reg);
 
 		page = pfn_to_page(pfn1);
 		end  = pfn_to_page(pfn2 - 1) + 1;
@@ -115,8 +121,9 @@
 				free++;
 			else
 				shared += page_count(page) - 1;
-			page++;
-		} while (page < end);
+			pfn1++;
+			page = pfn_to_page(pfn1);
+		} while (pfn1 < pfn2);
 	}
 
 	printk("%d pages of RAM\n", total);
@@ -130,16 +137,9 @@
 static void __init find_limits(unsigned long *min, unsigned long *max_low,
 			       unsigned long *max_high)
 {
-	struct meminfo *mi = &meminfo;
-	int i;
-
-	/* This assumes the meminfo array is properly sorted */
-	*min = bank_pfn_start(&mi->bank[0]);
-	for_each_bank (i, mi)
-		if (mi->bank[i].highmem)
-				break;
-	*max_low = bank_pfn_end(&mi->bank[i - 1]);
-	*max_high = bank_pfn_end(&mi->bank[mi->nr_banks - 1]);
+	*max_low = PFN_DOWN(memblock_get_current_limit());
+	*min = PFN_UP(memblock_start_of_DRAM());
+	*max_high = PFN_DOWN(memblock_end_of_DRAM());
 }
 
 #ifdef CONFIG_ZONE_DMA
@@ -274,14 +274,8 @@
 	return phys;
 }
 
-void __init arm_memblock_init(struct meminfo *mi,
-	const struct machine_desc *mdesc)
+void __init arm_memblock_init(const struct machine_desc *mdesc)
 {
-	int i;
-
-	for (i = 0; i < mi->nr_banks; i++)
-		memblock_add(mi->bank[i].start, mi->bank[i].size);
-
 	/* Register the kernel text, kernel data and initrd with memblock. */
 #ifdef CONFIG_XIP_KERNEL
 	memblock_reserve(__pa(_sdata), _end - _sdata);
@@ -413,54 +407,53 @@
 /*
  * The mem_map array can get very big.  Free the unused area of the memory map.
  */
-static void __init free_unused_memmap(struct meminfo *mi)
+static void __init free_unused_memmap(void)
 {
-	unsigned long bank_start, prev_bank_end = 0;
-	unsigned int i;
+	unsigned long start, prev_end = 0;
+	struct memblock_region *reg;
 
 	/*
 	 * This relies on each bank being in address order.
 	 * The banks are sorted previously in bootmem_init().
 	 */
-	for_each_bank(i, mi) {
-		struct membank *bank = &mi->bank[i];
-
-		bank_start = bank_pfn_start(bank);
+	for_each_memblock(memory, reg) {
+		start = memblock_region_memory_base_pfn(reg);
 
 #ifdef CONFIG_SPARSEMEM
 		/*
 		 * Take care not to free memmap entries that don't exist
 		 * due to SPARSEMEM sections which aren't present.
 		 */
-		bank_start = min(bank_start,
-				 ALIGN(prev_bank_end, PAGES_PER_SECTION));
+		start = min(start,
+				 ALIGN(prev_end, PAGES_PER_SECTION));
 #else
 		/*
 		 * Align down here since the VM subsystem insists that the
 		 * memmap entries are valid from the bank start aligned to
 		 * MAX_ORDER_NR_PAGES.
 		 */
-		bank_start = round_down(bank_start, MAX_ORDER_NR_PAGES);
+		start = round_down(start, MAX_ORDER_NR_PAGES);
 #endif
 		/*
 		 * If we had a previous bank, and there is a space
 		 * between the current bank and the previous, free it.
 		 */
-		if (prev_bank_end && prev_bank_end < bank_start)
-			free_memmap(prev_bank_end, bank_start);
+		if (prev_end && prev_end < start)
+			free_memmap(prev_end, start);
 
 		/*
 		 * Align up here since the VM subsystem insists that the
 		 * memmap entries are valid from the bank end aligned to
 		 * MAX_ORDER_NR_PAGES.
 		 */
-		prev_bank_end = ALIGN(bank_pfn_end(bank), MAX_ORDER_NR_PAGES);
+		prev_end = ALIGN(memblock_region_memory_end_pfn(reg),
+				 MAX_ORDER_NR_PAGES);
 	}
 
 #ifdef CONFIG_SPARSEMEM
-	if (!IS_ALIGNED(prev_bank_end, PAGES_PER_SECTION))
-		free_memmap(prev_bank_end,
-			    ALIGN(prev_bank_end, PAGES_PER_SECTION));
+	if (!IS_ALIGNED(prev_end, PAGES_PER_SECTION))
+		free_memmap(prev_end,
+			    ALIGN(prev_end, PAGES_PER_SECTION));
 #endif
 }
 
@@ -536,7 +529,7 @@
 	set_max_mapnr(pfn_to_page(max_pfn) - mem_map);
 
 	/* this will put all unused low memory onto the freelists */
-	free_unused_memmap(&meminfo);
+	free_unused_memmap();
 	free_all_bootmem();
 
 #ifdef CONFIG_SA1111
diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
index f9c32ba..d1e5ad7 100644
--- a/arch/arm/mm/ioremap.c
+++ b/arch/arm/mm/ioremap.c
@@ -438,6 +438,13 @@
 EXPORT_SYMBOL(__arm_iounmap);
 
 #ifdef CONFIG_PCI
+static int pci_ioremap_mem_type = MT_DEVICE;
+
+void pci_ioremap_set_mem_type(int mem_type)
+{
+	pci_ioremap_mem_type = mem_type;
+}
+
 int pci_ioremap_io(unsigned int offset, phys_addr_t phys_addr)
 {
 	BUG_ON(offset + SZ_64K > IO_SPACE_LIMIT);
@@ -445,7 +452,7 @@
 	return ioremap_page_range(PCI_IO_VIRT_BASE + offset,
 				  PCI_IO_VIRT_BASE + offset + SZ_64K,
 				  phys_addr,
-				  __pgprot(get_mem_type(MT_DEVICE)->prot_pte));
+				  __pgprot(get_mem_type(pci_ioremap_mem_type)->prot_pte));
 }
 EXPORT_SYMBOL_GPL(pci_ioremap_io);
 #endif
diff --git a/arch/arm/mm/l2c-common.c b/arch/arm/mm/l2c-common.c
new file mode 100644
index 0000000..10a3cf2
--- /dev/null
+++ b/arch/arm/mm/l2c-common.c
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2010 ARM Ltd.
+ * Written by Catalin Marinas <catalin.marinas@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/bug.h>
+#include <linux/smp.h>
+#include <asm/outercache.h>
+
+void outer_disable(void)
+{
+	WARN_ON(!irqs_disabled());
+	WARN_ON(num_online_cpus() > 1);
+
+	if (outer_cache.disable)
+		outer_cache.disable();
+}
diff --git a/arch/arm/mm/l2c-l2x0-resume.S b/arch/arm/mm/l2c-l2x0-resume.S
new file mode 100644
index 0000000..99b05f2
--- /dev/null
+++ b/arch/arm/mm/l2c-l2x0-resume.S
@@ -0,0 +1,58 @@
+/*
+ * L2C-310 early resume code.  This can be used by platforms to restore
+ * the settings of their L2 cache controller before restoring the
+ * processor state.
+ *
+ * This code can only be used to if you are running in the secure world.
+ */
+#include <linux/linkage.h>
+#include <asm/hardware/cache-l2x0.h>
+
+	.text
+
+ENTRY(l2c310_early_resume)
+	adr	r0, 1f
+	ldr	r2, [r0]
+	add	r0, r2, r0
+
+	ldmia	r0, {r1, r2, r3, r4, r5, r6, r7, r8}
+	@ r1 = phys address of L2C-310 controller
+	@ r2 = aux_ctrl
+	@ r3 = tag_latency
+	@ r4 = data_latency
+	@ r5 = filter_start
+	@ r6 = filter_end
+	@ r7 = prefetch_ctrl
+	@ r8 = pwr_ctrl
+
+	@ Check that the address has been initialised
+	teq	r1, #0
+	moveq	pc, lr
+
+	@ The prefetch and power control registers are revision dependent
+	@ and can be written whether or not the L2 cache is enabled
+	ldr	r0, [r1, #L2X0_CACHE_ID]
+	and	r0, r0, #L2X0_CACHE_ID_RTL_MASK
+	cmp	r0, #L310_CACHE_ID_RTL_R2P0
+	strcs	r7, [r1, #L310_PREFETCH_CTRL]
+	cmp	r0, #L310_CACHE_ID_RTL_R3P0
+	strcs	r8, [r1, #L310_POWER_CTRL]
+
+	@ Don't setup the L2 cache if it is already enabled
+	ldr	r0, [r1, #L2X0_CTRL]
+	tst	r0, #L2X0_CTRL_EN
+	movne	pc, lr
+
+	str	r3, [r1, #L310_TAG_LATENCY_CTRL]
+	str	r4, [r1, #L310_DATA_LATENCY_CTRL]
+	str	r6, [r1, #L310_ADDR_FILTER_END]
+	str	r5, [r1, #L310_ADDR_FILTER_START]
+
+	str	r2, [r1, #L2X0_AUX_CTRL]
+	mov	r9, #L2X0_CTRL_EN
+	str	r9, [r1, #L2X0_CTRL]
+	mov	pc, lr
+ENDPROC(l2c310_early_resume)
+
+	.align
+1:	.long	l2x0_saved_regs - .
diff --git a/arch/arm/mm/mm.h b/arch/arm/mm/mm.h
index 7ea641b..ce727d4 100644
--- a/arch/arm/mm/mm.h
+++ b/arch/arm/mm/mm.h
@@ -2,6 +2,8 @@
 #include <linux/list.h>
 #include <linux/vmalloc.h>
 
+#include <asm/pgtable.h>
+
 /* the upper-most page table pointer */
 extern pmd_t *top_pmd;
 
@@ -93,3 +95,5 @@
 void __init bootmem_init(void);
 void arm_mm_memblock_reserve(void);
 void dma_contiguous_remap(void);
+
+unsigned long __clear_cr(unsigned long mask);
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index b68c6b2..ab14b79 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -35,6 +35,7 @@
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
 #include <asm/mach/pci.h>
+#include <asm/fixmap.h>
 
 #include "mm.h"
 #include "tcm.h"
@@ -117,28 +118,54 @@
 };
 
 #ifdef CONFIG_CPU_CP15
+static unsigned long initial_pmd_value __initdata = 0;
+
 /*
- * These are useful for identifying cache coherency
- * problems by allowing the cache or the cache and
- * writebuffer to be turned off.  (Note: the write
- * buffer should not be on and the cache off).
+ * Initialise the cache_policy variable with the initial state specified
+ * via the "pmd" value.  This is used to ensure that on ARMv6 and later,
+ * the C code sets the page tables up with the same policy as the head
+ * assembly code, which avoids an illegal state where the TLBs can get
+ * confused.  See comments in early_cachepolicy() for more information.
+ */
+void __init init_default_cache_policy(unsigned long pmd)
+{
+	int i;
+
+	initial_pmd_value = pmd;
+
+	pmd &= PMD_SECT_TEX(1) | PMD_SECT_BUFFERABLE | PMD_SECT_CACHEABLE;
+
+	for (i = 0; i < ARRAY_SIZE(cache_policies); i++)
+		if (cache_policies[i].pmd == pmd) {
+			cachepolicy = i;
+			break;
+		}
+
+	if (i == ARRAY_SIZE(cache_policies))
+		pr_err("ERROR: could not find cache policy\n");
+}
+
+/*
+ * These are useful for identifying cache coherency problems by allowing
+ * the cache or the cache and writebuffer to be turned off.  (Note: the
+ * write buffer should not be on and the cache off).
  */
 static int __init early_cachepolicy(char *p)
 {
-	int i;
+	int i, selected = -1;
 
 	for (i = 0; i < ARRAY_SIZE(cache_policies); i++) {
 		int len = strlen(cache_policies[i].policy);
 
 		if (memcmp(p, cache_policies[i].policy, len) == 0) {
-			cachepolicy = i;
-			cr_alignment &= ~cache_policies[i].cr_mask;
-			cr_no_alignment &= ~cache_policies[i].cr_mask;
+			selected = i;
 			break;
 		}
 	}
-	if (i == ARRAY_SIZE(cache_policies))
-		printk(KERN_ERR "ERROR: unknown or unsupported cache policy\n");
+
+	if (selected == -1)
+		pr_err("ERROR: unknown or unsupported cache policy\n");
+
 	/*
 	 * This restriction is partly to do with the way we boot; it is
 	 * unpredictable to have memory mapped using two different sets of
@@ -146,12 +173,18 @@
 	 * change these attributes once the initial assembly has setup the
 	 * page tables.
 	 */
-	if (cpu_architecture() >= CPU_ARCH_ARMv6) {
-		printk(KERN_WARNING "Only cachepolicy=writeback supported on ARMv6 and later\n");
-		cachepolicy = CPOLICY_WRITEBACK;
+	if (cpu_architecture() >= CPU_ARCH_ARMv6 && selected != cachepolicy) {
+		pr_warn("Only cachepolicy=%s supported on ARMv6 and later\n",
+			cache_policies[cachepolicy].policy);
+		return 0;
 	}
-	flush_cache_all();
-	set_cr(cr_alignment);
+
+	if (selected != cachepolicy) {
+		unsigned long cr = __clear_cr(cache_policies[selected].cr_mask);
+		cachepolicy = selected;
+		flush_cache_all();
+		set_cr(cr);
+	}
 	return 0;
 }
 early_param("cachepolicy", early_cachepolicy);
@@ -186,35 +219,6 @@
 early_param("ecc", early_ecc);
 #endif
 
-static int __init noalign_setup(char *__unused)
-{
-	cr_alignment &= ~CR_A;
-	cr_no_alignment &= ~CR_A;
-	set_cr(cr_alignment);
-	return 1;
-}
-__setup("noalign", noalign_setup);
-
-#ifndef CONFIG_SMP
-void adjust_cr(unsigned long mask, unsigned long set)
-{
-	unsigned long flags;
-
-	mask &= ~CR_A;
-
-	set &= mask;
-
-	local_irq_save(flags);
-
-	cr_no_alignment = (cr_no_alignment & ~mask) | set;
-	cr_alignment = (cr_alignment & ~mask) | set;
-
-	set_cr((get_cr() & ~mask) | set);
-
-	local_irq_restore(flags);
-}
-#endif
-
 #else /* ifdef CONFIG_CPU_CP15 */
 
 static int __init early_cachepolicy(char *p)
@@ -414,8 +418,17 @@
 			cachepolicy = CPOLICY_WRITEBACK;
 		ecc_mask = 0;
 	}
-	if (is_smp())
-		cachepolicy = CPOLICY_WRITEALLOC;
+
+	if (is_smp()) {
+		if (cachepolicy != CPOLICY_WRITEALLOC) {
+			pr_warn("Forcing write-allocate cache policy for SMP\n");
+			cachepolicy = CPOLICY_WRITEALLOC;
+		}
+		if (!(initial_pmd_value & PMD_SECT_S)) {
+			pr_warn("Forcing shared mappings for SMP\n");
+			initial_pmd_value |= PMD_SECT_S;
+		}
+	}
 
 	/*
 	 * Strip out features not present on earlier architectures.
@@ -539,11 +552,12 @@
 		mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
 #endif
 
-		if (is_smp()) {
-			/*
-			 * Mark memory with the "shared" attribute
-			 * for SMP systems
-			 */
+		/*
+		 * If the initial page tables were created with the S bit
+		 * set, then we need to do the same here for the same
+		 * reasons given in early_cachepolicy().
+		 */
+		if (initial_pmd_value & PMD_SECT_S) {
 			user_pgprot |= L_PTE_SHARED;
 			kern_pgprot |= L_PTE_SHARED;
 			vecs_pgprot |= L_PTE_SHARED;
@@ -1061,74 +1075,47 @@
 void __init sanity_check_meminfo(void)
 {
 	phys_addr_t memblock_limit = 0;
-	int i, j, highmem = 0;
+	int highmem = 0;
 	phys_addr_t vmalloc_limit = __pa(vmalloc_min - 1) + 1;
+	struct memblock_region *reg;
 
-	for (i = 0, j = 0; i < meminfo.nr_banks; i++) {
-		struct membank *bank = &meminfo.bank[j];
-		phys_addr_t size_limit;
+	for_each_memblock(memory, reg) {
+		phys_addr_t block_start = reg->base;
+		phys_addr_t block_end = reg->base + reg->size;
+		phys_addr_t size_limit = reg->size;
 
-		*bank = meminfo.bank[i];
-		size_limit = bank->size;
-
-		if (bank->start >= vmalloc_limit)
+		if (reg->base >= vmalloc_limit)
 			highmem = 1;
 		else
-			size_limit = vmalloc_limit - bank->start;
+			size_limit = vmalloc_limit - reg->base;
 
-		bank->highmem = highmem;
 
-#ifdef CONFIG_HIGHMEM
-		/*
-		 * Split those memory banks which are partially overlapping
-		 * the vmalloc area greatly simplifying things later.
-		 */
-		if (!highmem && bank->size > size_limit) {
-			if (meminfo.nr_banks >= NR_BANKS) {
-				printk(KERN_CRIT "NR_BANKS too low, "
-						 "ignoring high memory\n");
-			} else {
-				memmove(bank + 1, bank,
-					(meminfo.nr_banks - i) * sizeof(*bank));
-				meminfo.nr_banks++;
-				i++;
-				bank[1].size -= size_limit;
-				bank[1].start = vmalloc_limit;
-				bank[1].highmem = highmem = 1;
-				j++;
+		if (!IS_ENABLED(CONFIG_HIGHMEM) || cache_is_vipt_aliasing()) {
+
+			if (highmem) {
+				pr_notice("Ignoring RAM at %pa-%pa (!CONFIG_HIGHMEM)\n",
+					&block_start, &block_end);
+				memblock_remove(reg->base, reg->size);
+				continue;
 			}
-			bank->size = size_limit;
-		}
-#else
-		/*
-		 * Highmem banks not allowed with !CONFIG_HIGHMEM.
-		 */
-		if (highmem) {
-			printk(KERN_NOTICE "Ignoring RAM at %.8llx-%.8llx "
-			       "(!CONFIG_HIGHMEM).\n",
-			       (unsigned long long)bank->start,
-			       (unsigned long long)bank->start + bank->size - 1);
-			continue;
+
+			if (reg->size > size_limit) {
+				phys_addr_t overlap_size = reg->size - size_limit;
+
+				pr_notice("Truncating RAM at %pa-%pa to -%pa",
+				      &block_start, &block_end, &vmalloc_limit);
+				memblock_remove(vmalloc_limit, overlap_size);
+				block_end = vmalloc_limit;
+			}
 		}
 
-		/*
-		 * Check whether this memory bank would partially overlap
-		 * the vmalloc area.
-		 */
-		if (bank->size > size_limit) {
-			printk(KERN_NOTICE "Truncating RAM at %.8llx-%.8llx "
-			       "to -%.8llx (vmalloc region overlap).\n",
-			       (unsigned long long)bank->start,
-			       (unsigned long long)bank->start + bank->size - 1,
-			       (unsigned long long)bank->start + size_limit - 1);
-			bank->size = size_limit;
-		}
-#endif
-		if (!bank->highmem) {
-			phys_addr_t bank_end = bank->start + bank->size;
-
-			if (bank_end > arm_lowmem_limit)
-				arm_lowmem_limit = bank_end;
+		if (!highmem) {
+			if (block_end > arm_lowmem_limit) {
+				if (reg->size > size_limit)
+					arm_lowmem_limit = vmalloc_limit;
+				else
+					arm_lowmem_limit = block_end;
+			}
 
 			/*
 			 * Find the first non-section-aligned page, and point
@@ -1144,35 +1131,15 @@
 			 * occurs before any free memory is mapped.
 			 */
 			if (!memblock_limit) {
-				if (!IS_ALIGNED(bank->start, SECTION_SIZE))
-					memblock_limit = bank->start;
-				else if (!IS_ALIGNED(bank_end, SECTION_SIZE))
-					memblock_limit = bank_end;
+				if (!IS_ALIGNED(block_start, SECTION_SIZE))
+					memblock_limit = block_start;
+				else if (!IS_ALIGNED(block_end, SECTION_SIZE))
+					memblock_limit = arm_lowmem_limit;
 			}
-		}
-		j++;
-	}
-#ifdef CONFIG_HIGHMEM
-	if (highmem) {
-		const char *reason = NULL;
 
-		if (cache_is_vipt_aliasing()) {
-			/*
-			 * Interactions between kmap and other mappings
-			 * make highmem support with aliasing VIPT caches
-			 * rather difficult.
-			 */
-			reason = "with VIPT aliasing cache";
-		}
-		if (reason) {
-			printk(KERN_CRIT "HIGHMEM is not supported %s, ignoring high memory\n",
-				reason);
-			while (j > 0 && meminfo.bank[j - 1].highmem)
-				j--;
 		}
 	}
-#endif
-	meminfo.nr_banks = j;
+
 	high_memory = __va(arm_lowmem_limit - 1) + 1;
 
 	/*
@@ -1359,6 +1326,9 @@
 #ifdef CONFIG_HIGHMEM
 	pkmap_page_table = early_pte_alloc(pmd_off_k(PKMAP_BASE),
 		PKMAP_BASE, _PAGE_KERNEL_TABLE);
+
+	fixmap_page_table = early_pte_alloc(pmd_off_k(FIXADDR_START),
+		FIXADDR_START, _PAGE_KERNEL_TABLE);
 #endif
 }
 
@@ -1461,7 +1431,7 @@
 	 * just complicate the code.
 	 */
 	flush_cache_louis();
-	dsb();
+	dsb(ishst);
 	isb();
 
 	/* remap level 1 table */
diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c
index 55764a7e..da1874f 100644
--- a/arch/arm/mm/nommu.c
+++ b/arch/arm/mm/nommu.c
@@ -88,30 +88,35 @@
 void __init sanity_check_meminfo_mpu(void)
 {
 	int i;
-	struct membank *bank = meminfo.bank;
 	phys_addr_t phys_offset = PHYS_OFFSET;
 	phys_addr_t aligned_region_size, specified_mem_size, rounded_mem_size;
+	struct memblock_region *reg;
+	bool first = true;
+	phys_addr_t mem_start;
+	phys_addr_t mem_end;
 
-	/* Initially only use memory continuous from PHYS_OFFSET */
-	if (bank_phys_start(&bank[0]) != phys_offset)
-		panic("First memory bank must be contiguous from PHYS_OFFSET");
+	for_each_memblock(memory, reg) {
+		if (first) {
+			/*
+			 * Initially only use memory continuous from
+			 * PHYS_OFFSET */
+			if (reg->base != phys_offset)
+				panic("First memory bank must be contiguous from PHYS_OFFSET");
 
-	/* Banks have already been sorted by start address */
-	for (i = 1; i < meminfo.nr_banks; i++) {
-		if (bank[i].start <= bank_phys_end(&bank[0]) &&
-		    bank_phys_end(&bank[i]) > bank_phys_end(&bank[0])) {
-			bank[0].size = bank_phys_end(&bank[i]) - bank[0].start;
+			mem_start = reg->base;
+			mem_end = reg->base + reg->size;
+			specified_mem_size = reg->size;
+			first = false;
 		} else {
-			pr_notice("Ignoring RAM after 0x%.8lx. "
-			"First non-contiguous (ignored) bank start: 0x%.8lx\n",
-				(unsigned long)bank_phys_end(&bank[0]),
-				(unsigned long)bank_phys_start(&bank[i]));
-			break;
+			/*
+			 * memblock auto merges contiguous blocks, remove
+			 * all blocks afterwards
+			 */
+			pr_notice("Ignoring RAM after %pa, memory at %pa ignored\n",
+				  &mem_start, &reg->base);
+			memblock_remove(reg->base, reg->size);
 		}
 	}
-	/* All contiguous banks are now merged in to the first bank */
-	meminfo.nr_banks = 1;
-	specified_mem_size = bank[0].size;
 
 	/*
 	 * MPU has curious alignment requirements: Size must be power of 2, and
@@ -128,23 +133,24 @@
 	 */
 	aligned_region_size = (phys_offset - 1) ^ (phys_offset);
 	/* Find the max power-of-two sized region that fits inside our bank */
-	rounded_mem_size = (1 <<  __fls(bank[0].size)) - 1;
+	rounded_mem_size = (1 <<  __fls(specified_mem_size)) - 1;
 
 	/* The actual region size is the smaller of the two */
 	aligned_region_size = aligned_region_size < rounded_mem_size
 				? aligned_region_size + 1
 				: rounded_mem_size + 1;
 
-	if (aligned_region_size != specified_mem_size)
-		pr_warn("Truncating memory from 0x%.8lx to 0x%.8lx (MPU region constraints)",
-				(unsigned long)specified_mem_size,
-				(unsigned long)aligned_region_size);
+	if (aligned_region_size != specified_mem_size) {
+		pr_warn("Truncating memory from %pa to %pa (MPU region constraints)",
+				&specified_mem_size, &aligned_region_size);
+		memblock_remove(mem_start + aligned_region_size,
+				specified_mem_size - aligned_round_size);
 
-	meminfo.bank[0].size = aligned_region_size;
-	pr_debug("MPU Region from 0x%.8lx size 0x%.8lx (end 0x%.8lx))\n",
-		(unsigned long)phys_offset,
-		(unsigned long)aligned_region_size,
-		(unsigned long)bank_phys_end(&bank[0]));
+		mem_end = mem_start + aligned_region_size;
+	}
+
+	pr_debug("MPU Region from %pa size %pa (end %pa))\n",
+		&phys_offset, &aligned_region_size, &mem_end);
 
 }
 
@@ -292,7 +298,7 @@
 {
 	phys_addr_t end;
 	sanity_check_meminfo_mpu();
-	end = bank_phys_end(&meminfo.bank[meminfo.nr_banks - 1]);
+	end = memblock_end_of_DRAM();
 	high_memory = __va(end - 1) + 1;
 }
 
diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S
index 01a719e..22e3ad6 100644
--- a/arch/arm/mm/proc-v7-3level.S
+++ b/arch/arm/mm/proc-v7-3level.S
@@ -64,6 +64,14 @@
 	mov	pc, lr
 ENDPROC(cpu_v7_switch_mm)
 
+#ifdef __ARMEB__
+#define rl r3
+#define rh r2
+#else
+#define rl r2
+#define rh r3
+#endif
+
 /*
  * cpu_v7_set_pte_ext(ptep, pte)
  *
@@ -73,13 +81,13 @@
  */
 ENTRY(cpu_v7_set_pte_ext)
 #ifdef CONFIG_MMU
-	tst	r2, #L_PTE_VALID
+	tst	rl, #L_PTE_VALID
 	beq	1f
-	tst	r3, #1 << (57 - 32)		@ L_PTE_NONE
-	bicne	r2, #L_PTE_VALID
+	tst	rh, #1 << (57 - 32)		@ L_PTE_NONE
+	bicne	rl, #L_PTE_VALID
 	bne	1f
-	tst	r3, #1 << (55 - 32)		@ L_PTE_DIRTY
-	orreq	r2, #L_PTE_RDONLY
+	tst	rh, #1 << (55 - 32)		@ L_PTE_DIRTY
+	orreq	rl, #L_PTE_RDONLY
 1:	strd	r2, r3, [r0]
 	ALT_SMP(W(nop))
 	ALT_UP (mcr	p15, 0, r0, c7, c10, 1)		@ flush_pte
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 195731d..3db2c2f 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -169,9 +169,31 @@
 	globl_equ	cpu_pj4b_do_idle,  	cpu_v7_do_idle
 #endif
 	globl_equ	cpu_pj4b_dcache_clean_area,	cpu_v7_dcache_clean_area
-	globl_equ	cpu_pj4b_do_suspend,	cpu_v7_do_suspend
-	globl_equ	cpu_pj4b_do_resume,	cpu_v7_do_resume
-	globl_equ	cpu_pj4b_suspend_size,	cpu_v7_suspend_size
+#ifdef CONFIG_ARM_CPU_SUSPEND
+ENTRY(cpu_pj4b_do_suspend)
+	stmfd	sp!, {r6 - r10}
+	mrc	p15, 1, r6, c15, c1, 0  @ save CP15 - extra features
+	mrc	p15, 1, r7, c15, c2, 0	@ save CP15 - Aux Func Modes Ctrl 0
+	mrc	p15, 1, r8, c15, c1, 2	@ save CP15 - Aux Debug Modes Ctrl 2
+	mrc	p15, 1, r9, c15, c1, 1  @ save CP15 - Aux Debug Modes Ctrl 1
+	mrc	p15, 0, r10, c9, c14, 0  @ save CP15 - PMC
+	stmia	r0!, {r6 - r10}
+	ldmfd	sp!, {r6 - r10}
+	b cpu_v7_do_suspend
+ENDPROC(cpu_pj4b_do_suspend)
+
+ENTRY(cpu_pj4b_do_resume)
+	ldmia	r0!, {r6 - r10}
+	mcr	p15, 1, r6, c15, c1, 0  @ save CP15 - extra features
+	mcr	p15, 1, r7, c15, c2, 0	@ save CP15 - Aux Func Modes Ctrl 0
+	mcr	p15, 1, r8, c15, c1, 2	@ save CP15 - Aux Debug Modes Ctrl 2
+	mcr	p15, 1, r9, c15, c1, 1  @ save CP15 - Aux Debug Modes Ctrl 1
+	mcr	p15, 0, r10, c9, c14, 0  @ save CP15 - PMC
+	b cpu_v7_do_resume
+ENDPROC(cpu_pj4b_do_resume)
+#endif
+.globl	cpu_pj4b_suspend_size
+.equ	cpu_pj4b_suspend_size, 4 * 14
 
 #endif
 
@@ -194,6 +216,7 @@
 __v7_ca7mp_setup:
 __v7_ca12mp_setup:
 __v7_ca15mp_setup:
+__v7_ca17mp_setup:
 	mov	r10, #0
 1:
 #ifdef CONFIG_SMP
@@ -505,6 +528,16 @@
 	.size	__v7_ca15mp_proc_info, . - __v7_ca15mp_proc_info
 
 	/*
+	 * ARM Ltd. Cortex A17 processor.
+	 */
+	.type	__v7_ca17mp_proc_info, #object
+__v7_ca17mp_proc_info:
+	.long	0x410fc0e0
+	.long	0xff0ffff0
+	__v7_proc __v7_ca17mp_setup
+	.size	__v7_ca17mp_proc_info, . - __v7_ca17mp_proc_info
+
+	/*
 	 * Qualcomm Inc. Krait processors.
 	 */
 	.type	__krait_proc_info, #object
diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S
index 0c93588..1ca37c7 100644
--- a/arch/arm/mm/proc-v7m.S
+++ b/arch/arm/mm/proc-v7m.S
@@ -123,6 +123,11 @@
 	mov	pc, lr
 ENDPROC(__v7m_setup)
 
+	.align 2
+__v7m_setup_stack:
+	.space	4 * 8				@ 8 registers
+__v7m_setup_stack_top:
+
 	define_processor_functions v7m, dabort=nommu_early_abort, pabort=legacy_pabort, nommu=1
 
 	.section ".rodata"
@@ -152,6 +157,3 @@
 	.long	nop_cache_fns		@ proc_info_list.cache
 	.size	__v7m_proc_info, . - __v7m_proc_info
 
-__v7m_setup_stack:
-	.space	4 * 8				@ 8 registers
-__v7m_setup_stack_top:
diff --git a/arch/arm/plat-samsung/s5p-sleep.S b/arch/arm/plat-samsung/s5p-sleep.S
index c500165..25c68ceb 100644
--- a/arch/arm/plat-samsung/s5p-sleep.S
+++ b/arch/arm/plat-samsung/s5p-sleep.S
@@ -22,7 +22,6 @@
 */
 
 #include <linux/linkage.h>
-#include <asm/asm-offsets.h>
 
 	.data
 	.align
diff --git a/arch/arm/vfp/entry.S b/arch/arm/vfp/entry.S
index f0759e7..fe6ca57 100644
--- a/arch/arm/vfp/entry.S
+++ b/arch/arm/vfp/entry.S
@@ -22,11 +22,10 @@
 @  r9  = normal "successful" return address
 @  r10 = this threads thread_info structure
 @  lr  = unrecognised instruction return address
-@  IRQs disabled.
+@  IRQs enabled.
 @
 ENTRY(do_vfp)
 	inc_preempt_count r10, r4
-	enable_irq
  	ldr	r4, .LCvfp
 	ldr	r11, [r10, #TI_CPU]	@ CPU number
 	add	r10, r10, #TI_VFPSTATE	@ r10 = workspace
diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h
index 63b5eff..fdd7e1b 100644
--- a/include/linux/amba/bus.h
+++ b/include/linux/amba/bus.h
@@ -47,6 +47,7 @@
 enum amba_vendor {
 	AMBA_VENDOR_ARM = 0x41,
 	AMBA_VENDOR_ST = 0x80,
+	AMBA_VENDOR_QCOM = 0x51,
 };
 
 extern struct bus_type amba_bustype;
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index f73cabf..38bbf95 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -320,6 +320,8 @@
 extern void hibernation_set_ops(const struct platform_hibernation_ops *ops);
 extern int hibernate(void);
 extern bool system_entering_hibernation(void);
+asmlinkage int swsusp_save(void);
+extern struct pbe *restore_pblist;
 #else /* CONFIG_HIBERNATION */
 static inline void register_nosave_region(unsigned long b, unsigned long e) {}
 static inline void register_nosave_region_late(unsigned long b, unsigned long e) {}
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index edff2b9..c52f827 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -32,6 +32,7 @@
 struct mm_struct;
 struct inode;
 struct notifier_block;
+struct page;
 
 #define UPROBE_HANDLER_REMOVE		1
 #define UPROBE_HANDLER_MASK		1
@@ -127,6 +128,8 @@
 extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs);
 extern unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs);
 extern bool __weak arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs);
+extern void __weak arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
+					 void *src, unsigned long len);
 #else /* !CONFIG_UPROBES */
 struct uprobes_state {
 };
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 04709b6..4968213 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1296,14 +1296,8 @@
 	if (unlikely(!xol_vaddr))
 		return 0;
 
-	/* Initialize the slot */
-	copy_to_page(area->page, xol_vaddr,
-			&uprobe->arch.ixol, sizeof(uprobe->arch.ixol));
-	/*
-	 * We probably need flush_icache_user_range() but it needs vma.
-	 * This should work on supported architectures too.
-	 */
-	flush_dcache_page(area->page);
+	arch_uprobe_copy_ixol(area->page, xol_vaddr,
+			      &uprobe->arch.ixol, sizeof(uprobe->arch.ixol));
 
 	return xol_vaddr;
 }
@@ -1346,6 +1340,21 @@
 	}
 }
 
+void __weak arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
+				  void *src, unsigned long len)
+{
+	/* Initialize the slot */
+	copy_to_page(page, vaddr, src, len);
+
+	/*
+	 * We probably need flush_icache_user_range() but it needs vma.
+	 * This should work on most of architectures by default. If
+	 * architecture needs to do something different it can define
+	 * its own version of the function.
+	 */
+	flush_dcache_page(page);
+}
+
 /**
  * uprobe_get_swbp_addr - compute address of swbp given post-swbp regs
  * @regs: Reflects the saved state of the task after it has hit a breakpoint