Merge branch 'socfpga/hw' into next/soc

From Dinh Nguyen, this is a series of patches introducing support for
socfpga hardware (Altera Cyclone5). It also includes a cleanup that
moves some of the ARMv7 cache maintenance functions to a common location,
since three other platforms aready implemented it separately.

* socfpga/hw:
  arm: socfpga: Add SMP support for actual socfpga harware
  arm: Add v7_invalidate_l1 to cache-v7.S
  arm: socfpga: Add entries to enable make dtbs socfpga
  arm: socfpga: Add new device tree source for actual socfpga HW

Trivial conflict in arch/arm/mach-tegra/headsmp.S.

Signed-off-by: Olof Johansson <olof@lixom.net>
diff --git a/Documentation/devicetree/bindings/arm/altera/socfpga-system.txt b/Documentation/devicetree/bindings/arm/altera/socfpga-system.txt
index 07c65e3..f4d04a0 100644
--- a/Documentation/devicetree/bindings/arm/altera/socfpga-system.txt
+++ b/Documentation/devicetree/bindings/arm/altera/socfpga-system.txt
@@ -3,9 +3,11 @@
 Required properties:
 - compatible : "altr,sys-mgr"
 - reg : Should contain 1 register ranges(address and length)
+- cpu1-start-addr : CPU1 start address in hex.
 
 Example:
 	 sysmgr@ffd08000 {
 		compatible = "altr,sys-mgr";
 		reg = <0xffd08000 0x1000>;
+		cpu1-start-addr = <0xffd080c4>;
 	};
diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile
index 2e7a7e2..042f211 100644
--- a/arch/arm/boot/dts/Makefile
+++ b/arch/arm/boot/dts/Makefile
@@ -125,6 +125,8 @@
 	r8a7740-armadillo800eva.dtb \
 	sh73a0-kzm9g.dtb \
 	sh7372-mackerel.dtb
+dtb-$(CONFIG_ARCH_SOCFPGA) += socfpga_cyclone5.dtb \
+	socfpga_vt.dtb
 dtb-$(CONFIG_ARCH_SPEAR13XX) += spear1310-evb.dtb \
 	spear1340-evb.dtb
 dtb-$(CONFIG_ARCH_SPEAR3XX)+= spear300-evb.dtb \
diff --git a/arch/arm/boot/dts/socfpga.dtsi b/arch/arm/boot/dts/socfpga.dtsi
index 19aec42..936d230 100644
--- a/arch/arm/boot/dts/socfpga.dtsi
+++ b/arch/arm/boot/dts/socfpga.dtsi
@@ -25,6 +25,10 @@
 		ethernet0 = &gmac0;
 		serial0 = &uart0;
 		serial1 = &uart1;
+		timer0 = &timer0;
+		timer1 = &timer1;
+		timer2 = &timer2;
+		timer3 = &timer3;
 	};
 
 	cpus {
@@ -98,47 +102,41 @@
 			interrupts = <1 13 0xf04>;
 		};
 
-		timer0: timer@ffc08000 {
+		timer0: timer0@ffc08000 {
 			compatible = "snps,dw-apb-timer-sp";
 			interrupts = <0 167 4>;
-			clock-frequency = <200000000>;
 			reg = <0xffc08000 0x1000>;
 		};
 
-		timer1: timer@ffc09000 {
+		timer1: timer1@ffc09000 {
 			compatible = "snps,dw-apb-timer-sp";
 			interrupts = <0 168 4>;
-			clock-frequency = <200000000>;
 			reg = <0xffc09000 0x1000>;
 		};
 
-		timer2: timer@ffd00000 {
+		timer2: timer2@ffd00000 {
 			compatible = "snps,dw-apb-timer-osc";
 			interrupts = <0 169 4>;
-			clock-frequency = <200000000>;
 			reg = <0xffd00000 0x1000>;
 		};
 
-		timer3: timer@ffd01000 {
+		timer3: timer3@ffd01000 {
 			compatible = "snps,dw-apb-timer-osc";
 			interrupts = <0 170 4>;
-			clock-frequency = <200000000>;
 			reg = <0xffd01000 0x1000>;
 		};
 
-		uart0: uart@ffc02000 {
+		uart0: serial0@ffc02000 {
 			compatible = "snps,dw-apb-uart";
 			reg = <0xffc02000 0x1000>;
-			clock-frequency = <7372800>;
 			interrupts = <0 162 4>;
 			reg-shift = <2>;
 			reg-io-width = <4>;
 		};
 
-		uart1: uart@ffc03000 {
+		uart1: serial1@ffc03000 {
 			compatible = "snps,dw-apb-uart";
 			reg = <0xffc03000 0x1000>;
-			clock-frequency = <7372800>;
 			interrupts = <0 163 4>;
 			reg-shift = <2>;
 			reg-io-width = <4>;
diff --git a/arch/arm/boot/dts/socfpga_cyclone5.dts b/arch/arm/boot/dts/socfpga_cyclone5.dts
index ab7e4a9..3ae8a83 100644
--- a/arch/arm/boot/dts/socfpga_cyclone5.dts
+++ b/arch/arm/boot/dts/socfpga_cyclone5.dts
@@ -20,7 +20,7 @@
 
 / {
 	model = "Altera SOCFPGA Cyclone V";
-	compatible = "altr,socfpga-cyclone5";
+	compatible = "altr,socfpga-cyclone5", "altr,socfpga";
 
 	chosen {
 		bootargs = "console=ttyS0,57600";
@@ -29,6 +29,36 @@
 	memory {
 		name = "memory";
 		device_type = "memory";
-		reg = <0x0 0x10000000>; /* 256MB */
+		reg = <0x0 0x40000000>; /* 1GB */
+	};
+
+	soc {
+		timer0@ffc08000 {
+			clock-frequency = <100000000>;
+		};
+
+		timer1@ffc09000 {
+			clock-frequency = <100000000>;
+		};
+
+		timer2@ffd00000 {
+			clock-frequency = <25000000>;
+		};
+
+		timer3@ffd01000 {
+			clock-frequency = <25000000>;
+		};
+
+		serial0@ffc02000 {
+			clock-frequency = <100000000>;
+		};
+
+		serial1@ffc03000 {
+			clock-frequency = <100000000>;
+		};
+
+		sysmgr@ffd08000 {
+			cpu1-start-addr = <0xffd080c4>;
+		};
 	};
 };
diff --git a/arch/arm/boot/dts/socfpga_vt.dts b/arch/arm/boot/dts/socfpga_vt.dts
new file mode 100644
index 0000000..1036eba
--- /dev/null
+++ b/arch/arm/boot/dts/socfpga_vt.dts
@@ -0,0 +1,64 @@
+/*
+ *  Copyright (C) 2013 Altera Corporation <www.altera.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/dts-v1/;
+/include/ "socfpga.dtsi"
+
+/ {
+	model = "Altera SOCFPGA VT";
+	compatible = "altr,socfpga-vt", "altr,socfpga";
+
+	chosen {
+		bootargs = "console=ttyS0,57600";
+	};
+
+	memory {
+		name = "memory";
+		device_type = "memory";
+		reg = <0x0 0x40000000>; /* 1 GB */
+	};
+
+	soc {
+		timer0@ffc08000 {
+			clock-frequency = <7000000>;
+		};
+
+		timer1@ffc09000 {
+			clock-frequency = <7000000>;
+		};
+
+		timer2@ffd00000 {
+			clock-frequency = <7000000>;
+		};
+
+		timer3@ffd01000 {
+			clock-frequency = <7000000>;
+		};
+
+		serial0@ffc02000 {
+			clock-frequency = <7372800>;
+		};
+
+		serial1@ffc03000 {
+			clock-frequency = <7372800>;
+		};
+
+		sysmgr@ffd08000 {
+			cpu1-start-addr = <0xffd08010>;
+		};
+	};
+};
diff --git a/arch/arm/mach-imx/headsmp.S b/arch/arm/mach-imx/headsmp.S
index 7e49deb..921fc15 100644
--- a/arch/arm/mach-imx/headsmp.S
+++ b/arch/arm/mach-imx/headsmp.S
@@ -17,53 +17,6 @@
 
 	.section ".text.head", "ax"
 
-/*
- * The secondary kernel init calls v7_flush_dcache_all before it enables
- * the L1; however, the L1 comes out of reset in an undefined state, so
- * the clean + invalidate performed by v7_flush_dcache_all causes a bunch
- * of cache lines with uninitialized data and uninitialized tags to get
- * written out to memory, which does really unpleasant things to the main
- * processor.  We fix this by performing an invalidate, rather than a
- * clean + invalidate, before jumping into the kernel.
- *
- * This funciton is cloned from arch/arm/mach-tegra/headsmp.S, and needs
- * to be called for both secondary cores startup and primary core resume
- * procedures.  Ideally, it should be moved into arch/arm/mm/cache-v7.S.
- */
-ENTRY(v7_invalidate_l1)
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c5, 0	@ invalidate I cache
-	mcr	p15, 2, r0, c0, c0, 0
-	mrc	p15, 1, r0, c0, c0, 0
-
-	ldr	r1, =0x7fff
-	and	r2, r1, r0, lsr #13
-
-	ldr	r1, =0x3ff
-
-	and	r3, r1, r0, lsr #3	@ NumWays - 1
-	add	r2, r2, #1		@ NumSets
-
-	and	r0, r0, #0x7
-	add	r0, r0, #4	@ SetShift
-
-	clz	r1, r3		@ WayShift
-	add	r4, r3, #1	@ NumWays
-1:	sub	r2, r2, #1	@ NumSets--
-	mov	r3, r4		@ Temp = NumWays
-2:	subs	r3, r3, #1	@ Temp--
-	mov	r5, r3, lsl r1
-	mov	r6, r2, lsl r0
-	orr	r5, r5, r6	@ Reg = (Temp<<WayShift)|(NumSets<<SetShift)
-	mcr	p15, 0, r5, c7, c6, 2
-	bgt	2b
-	cmp	r2, #0
-	bgt	1b
-	dsb
-	isb
-	mov	pc, lr
-ENDPROC(v7_invalidate_l1)
-
 #ifdef CONFIG_SMP
 ENTRY(v7_secondary_startup)
 	bl	v7_invalidate_l1
diff --git a/arch/arm/mach-shmobile/headsmp.S b/arch/arm/mach-shmobile/headsmp.S
index b202c12..96001fd 100644
--- a/arch/arm/mach-shmobile/headsmp.S
+++ b/arch/arm/mach-shmobile/headsmp.S
@@ -16,54 +16,6 @@
 
 	__CPUINIT
 
-/* Cache invalidation nicked from arch/arm/mach-imx/head-v7.S, thanks!
- *
- * The secondary kernel init calls v7_flush_dcache_all before it enables
- * the L1; however, the L1 comes out of reset in an undefined state, so
- * the clean + invalidate performed by v7_flush_dcache_all causes a bunch
- * of cache lines with uninitialized data and uninitialized tags to get
- * written out to memory, which does really unpleasant things to the main
- * processor.  We fix this by performing an invalidate, rather than a
- * clean + invalidate, before jumping into the kernel.
- *
- * This funciton is cloned from arch/arm/mach-tegra/headsmp.S, and needs
- * to be called for both secondary cores startup and primary core resume
- * procedures.  Ideally, it should be moved into arch/arm/mm/cache-v7.S.
- */
-ENTRY(v7_invalidate_l1)
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c5, 0	@ invalidate I cache
-	mcr	p15, 2, r0, c0, c0, 0
-	mrc	p15, 1, r0, c0, c0, 0
-
-	ldr	r1, =0x7fff
-	and	r2, r1, r0, lsr #13
-
-	ldr	r1, =0x3ff
-
-	and	r3, r1, r0, lsr #3	@ NumWays - 1
-	add	r2, r2, #1		@ NumSets
-
-	and	r0, r0, #0x7
-	add	r0, r0, #4	@ SetShift
-
-	clz	r1, r3		@ WayShift
-	add	r4, r3, #1	@ NumWays
-1:	sub	r2, r2, #1	@ NumSets--
-	mov	r3, r4		@ Temp = NumWays
-2:	subs	r3, r3, #1	@ Temp--
-	mov	r5, r3, lsl r1
-	mov	r6, r2, lsl r0
-	orr	r5, r5, r6	@ Reg = (Temp<<WayShift)|(NumSets<<SetShift)
-	mcr	p15, 0, r5, c7, c6, 2
-	bgt	2b
-	cmp	r2, #0
-	bgt	1b
-	dsb
-	isb
-	mov	pc, lr
-ENDPROC(v7_invalidate_l1)
-
 ENTRY(shmobile_invalidate_start)
 	bl	v7_invalidate_l1
 	b	secondary_startup
diff --git a/arch/arm/mach-socfpga/core.h b/arch/arm/mach-socfpga/core.h
index 9941caa..315edff 100644
--- a/arch/arm/mach-socfpga/core.h
+++ b/arch/arm/mach-socfpga/core.h
@@ -20,7 +20,7 @@
 #ifndef __MACH_CORE_H
 #define __MACH_CORE_H
 
-extern void secondary_startup(void);
+extern void socfpga_secondary_startup(void);
 extern void __iomem *socfpga_scu_base_addr;
 
 extern void socfpga_init_clocks(void);
@@ -29,6 +29,8 @@
 extern struct smp_operations socfpga_smp_ops;
 extern char secondary_trampoline, secondary_trampoline_end;
 
+extern unsigned long cpu1start_addr;
+
 #define SOCFPGA_SCU_VIRT_BASE   0xfffec000
 
 #endif
diff --git a/arch/arm/mach-socfpga/headsmp.S b/arch/arm/mach-socfpga/headsmp.S
index f09b128..9004bfb 100644
--- a/arch/arm/mach-socfpga/headsmp.S
+++ b/arch/arm/mach-socfpga/headsmp.S
@@ -13,13 +13,21 @@
 	__CPUINIT
 	.arch	armv7-a
 
-#define CPU1_START_ADDR 	        0xffd08010
-
 ENTRY(secondary_trampoline)
-	movw	r0, #:lower16:CPU1_START_ADDR
-	movt  r0, #:upper16:CPU1_START_ADDR
+	movw	r2, #:lower16:cpu1start_addr
+	movt  r2, #:upper16:cpu1start_addr
 
+	/* The socfpga VT cannot handle a 0xC0000000 page offset when loading
+		the cpu1start_addr, we bit clear it. Tested on HW and VT. */
+	bic	r2, r2, #0x40000000
+
+	ldr	r0, [r2]
 	ldr	r1, [r0]
 	bx	r1
 
 ENTRY(secondary_trampoline_end)
+
+ENTRY(socfpga_secondary_startup)
+       bl      v7_invalidate_l1
+       b       secondary_startup
+ENDPROC(socfpga_secondary_startup)
diff --git a/arch/arm/mach-socfpga/platsmp.c b/arch/arm/mach-socfpga/platsmp.c
index 4e9e69d..84c60fa 100644
--- a/arch/arm/mach-socfpga/platsmp.c
+++ b/arch/arm/mach-socfpga/platsmp.c
@@ -47,16 +47,19 @@
 {
 	int trampoline_size = &secondary_trampoline_end - &secondary_trampoline;
 
-	memcpy(phys_to_virt(0), &secondary_trampoline, trampoline_size);
+	if (cpu1start_addr) {
+		memcpy(phys_to_virt(0), &secondary_trampoline, trampoline_size);
 
-	__raw_writel(virt_to_phys(secondary_startup), (sys_manager_base_addr+0x10));
+		__raw_writel(virt_to_phys(socfpga_secondary_startup),
+			(sys_manager_base_addr + (cpu1start_addr & 0x000000ff)));
 
-	flush_cache_all();
-	smp_wmb();
-	outer_clean_range(0, trampoline_size);
+		flush_cache_all();
+		smp_wmb();
+		outer_clean_range(0, trampoline_size);
 
-	/* This will release CPU #1 out of reset.*/
-	__raw_writel(0, rst_manager_base_addr + 0x10);
+		/* This will release CPU #1 out of reset.*/
+		__raw_writel(0, rst_manager_base_addr + 0x10);
+	}
 
 	return 0;
 }
diff --git a/arch/arm/mach-socfpga/socfpga.c b/arch/arm/mach-socfpga/socfpga.c
index 27d6846..1042c02 100644
--- a/arch/arm/mach-socfpga/socfpga.c
+++ b/arch/arm/mach-socfpga/socfpga.c
@@ -29,6 +29,7 @@
 void __iomem *socfpga_scu_base_addr = ((void __iomem *)(SOCFPGA_SCU_VIRT_BASE));
 void __iomem *sys_manager_base_addr;
 void __iomem *rst_manager_base_addr;
+unsigned long cpu1start_addr;
 
 static struct map_desc scu_io_desc __initdata = {
 	.virtual	= SOCFPGA_SCU_VIRT_BASE,
@@ -67,6 +68,11 @@
 	struct device_node *np;
 
 	np = of_find_compatible_node(NULL, NULL, "altr,sys-mgr");
+
+	if (of_property_read_u32(np, "cpu1-start-addr",
+			(u32 *) &cpu1start_addr))
+		pr_err("SMP: Need cpu1-start-addr in device tree.\n");
+
 	sys_manager_base_addr = of_iomap(np, 0);
 
 	np = of_find_compatible_node(NULL, NULL, "altr,rst-mgr");
@@ -93,7 +99,6 @@
 
 static const char *altera_dt_match[] = {
 	"altr,socfpga",
-	"altr,socfpga-cyclone5",
 	NULL
 };
 
diff --git a/arch/arm/mach-tegra/headsmp.S b/arch/arm/mach-tegra/headsmp.S
index b283481..fd473f2 100644
--- a/arch/arm/mach-tegra/headsmp.S
+++ b/arch/arm/mach-tegra/headsmp.S
@@ -5,49 +5,6 @@
 
         .section ".text.head", "ax"
 
-/*
- * Tegra specific entry point for secondary CPUs.
- *   The secondary kernel init calls v7_flush_dcache_all before it enables
- *   the L1; however, the L1 comes out of reset in an undefined state, so
- *   the clean + invalidate performed by v7_flush_dcache_all causes a bunch
- *   of cache lines with uninitialized data and uninitialized tags to get
- *   written out to memory, which does really unpleasant things to the main
- *   processor.  We fix this by performing an invalidate, rather than a
- *   clean + invalidate, before jumping into the kernel.
- */
-ENTRY(v7_invalidate_l1)
-        mov     r0, #0
-        mcr     p15, 2, r0, c0, c0, 0
-        mrc     p15, 1, r0, c0, c0, 0
-
-        ldr     r1, =0x7fff
-        and     r2, r1, r0, lsr #13
-
-        ldr     r1, =0x3ff
-
-        and     r3, r1, r0, lsr #3  @ NumWays - 1
-        add     r2, r2, #1          @ NumSets
-
-        and     r0, r0, #0x7
-        add     r0, r0, #4          @ SetShift
-
-        clz     r1, r3              @ WayShift
-        add     r4, r3, #1          @ NumWays
-1:      sub     r2, r2, #1          @ NumSets--
-        mov     r3, r4              @ Temp = NumWays
-2:      subs    r3, r3, #1          @ Temp--
-        mov     r5, r3, lsl r1
-        mov     r6, r2, lsl r0
-        orr     r5, r5, r6          @ Reg = (Temp<<WayShift)|(NumSets<<SetShift)
-        mcr     p15, 0, r5, c7, c6, 2
-        bgt     2b
-        cmp     r2, #0
-        bgt     1b
-        dsb
-        isb
-        mov     pc, lr
-ENDPROC(v7_invalidate_l1)
-
 ENTRY(tegra_secondary_startup)
         bl      v7_invalidate_l1
 	/* Enable coresight */
diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
index 7539ec2..15451ee 100644
--- a/arch/arm/mm/cache-v7.S
+++ b/arch/arm/mm/cache-v7.S
@@ -19,6 +19,52 @@
 #include "proc-macros.S"
 
 /*
+ * The secondary kernel init calls v7_flush_dcache_all before it enables
+ * the L1; however, the L1 comes out of reset in an undefined state, so
+ * the clean + invalidate performed by v7_flush_dcache_all causes a bunch
+ * of cache lines with uninitialized data and uninitialized tags to get
+ * written out to memory, which does really unpleasant things to the main
+ * processor.  We fix this by performing an invalidate, rather than a
+ * clean + invalidate, before jumping into the kernel.
+ *
+ * This function is cloned from arch/arm/mach-tegra/headsmp.S, and needs
+ * to be called for both secondary cores startup and primary core resume
+ * procedures.
+ */
+ENTRY(v7_invalidate_l1)
+       mov     r0, #0
+       mcr     p15, 2, r0, c0, c0, 0
+       mrc     p15, 1, r0, c0, c0, 0
+
+       ldr     r1, =0x7fff
+       and     r2, r1, r0, lsr #13
+
+       ldr     r1, =0x3ff
+
+       and     r3, r1, r0, lsr #3      @ NumWays - 1
+       add     r2, r2, #1              @ NumSets
+
+       and     r0, r0, #0x7
+       add     r0, r0, #4      @ SetShift
+
+       clz     r1, r3          @ WayShift
+       add     r4, r3, #1      @ NumWays
+1:     sub     r2, r2, #1      @ NumSets--
+       mov     r3, r4          @ Temp = NumWays
+2:     subs    r3, r3, #1      @ Temp--
+       mov     r5, r3, lsl r1
+       mov     r6, r2, lsl r0
+       orr     r5, r5, r6      @ Reg = (Temp<<WayShift)|(NumSets<<SetShift)
+       mcr     p15, 0, r5, c7, c6, 2
+       bgt     2b
+       cmp     r2, #0
+       bgt     1b
+       dsb
+       isb
+       mov     pc, lr
+ENDPROC(v7_invalidate_l1)
+
+/*
  *	v7_flush_icache_all()
  *
  *	Flush the whole I-cache.