Merge branch 'for-rmk' of git://linux-arm.org/linux-2.6 into devel
diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index 84a1e04..7b1f312 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -717,6 +717,9 @@
 		bl	__armv7_mmu_cache_flush
 		mov	r0, #0
 		mcr	p15, 0, r0, c8, c7, 0	@ invalidate whole TLB
+		mcr	p15, 0, r0, c7, c5, 6	@ invalidate BTC
+		mcr	p15, 0, r0, c7, c10, 4	@ DSB
+		mcr	p15, 0, r0, c7, c5, 4	@ ISB
 		mov	pc, r12
 
 __arm6_mmu_cache_off:
@@ -778,12 +781,13 @@
 __armv7_mmu_cache_flush:
 		mrc	p15, 0, r10, c0, c1, 5	@ read ID_MMFR1
 		tst	r10, #0xf << 16		@ hierarchical cache (ARMv7)
-		beq	hierarchical
 		mov	r10, #0
+		beq	hierarchical
 		mcr	p15, 0, r10, c7, c14, 0	@ clean+invalidate D
 		b	iflush
 hierarchical:
-		stmfd	sp!, {r0-r5, r7, r9-r11}
+		mcr	p15, 0, r10, c7, c10, 5	@ DMB
+		stmfd	sp!, {r0-r5, r7, r9, r11}
 		mrc	p15, 1, r0, c0, c0, 1	@ read clidr
 		ands	r3, r0, #0x7000000	@ extract loc from clidr
 		mov	r3, r3, lsr #23		@ left align loc bit field
@@ -820,12 +824,14 @@
 		cmp	r3, r10
 		bgt	loop1
 finished:
+		ldmfd	sp!, {r0-r5, r7, r9, r11}
 		mov	r10, #0			@ swith back to cache level 0
 		mcr	p15, 2, r10, c0, c0, 0	@ select current cache level in cssr
-		ldmfd	sp!, {r0-r5, r7, r9-r11}
 iflush:
+		mcr	p15, 0, r10, c7, c10, 4	@ DSB
 		mcr	p15, 0, r10, c7, c5, 0	@ invalidate I+BTB
-		mcr	p15, 0, r10, c7, c10, 4	@ drain WB
+		mcr	p15, 0, r10, c7, c10, 4	@ DSB
+		mcr	p15, 0, r10, c7, c5, 4	@ ISB
 		mov	pc, lr
 
 __armv5tej_mmu_cache_flush:
diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index de6c59f..85a2514 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -15,6 +15,7 @@
 
 #include <asm/glue.h>
 #include <asm/shmparam.h>
+#include <asm/cachetype.h>
 
 #define CACHE_COLOUR(vaddr)	((vaddr & (SHMLBA - 1)) >> PAGE_SHIFT)
 
@@ -296,16 +297,6 @@
 #endif
 
 /*
- * flush_cache_vmap() is used when creating mappings (eg, via vmap,
- * vmalloc, ioremap etc) in kernel space for pages.  Since the
- * direct-mappings of these pages may contain cached data, we need
- * to do a full cache flush to ensure that writebacks don't corrupt
- * data placed into these pages via the new mappings.
- */
-#define flush_cache_vmap(start, end)		flush_cache_all()
-#define flush_cache_vunmap(start, end)		flush_cache_all()
-
-/*
  * Copy user data from/to a page which is mapped into a different
  * processes address space.  Really, we want to allow our "user
  * space" model to handle this.
@@ -444,4 +435,29 @@
 	dmac_inv_range(start, start + size);
 }
 
+/*
+ * flush_cache_vmap() is used when creating mappings (eg, via vmap,
+ * vmalloc, ioremap etc) in kernel space for pages.  On non-VIPT
+ * caches, since the direct-mappings of these pages may contain cached
+ * data, we need to do a full cache flush to ensure that writebacks
+ * don't corrupt data placed into these pages via the new mappings.
+ */
+static inline void flush_cache_vmap(unsigned long start, unsigned long end)
+{
+	if (!cache_is_vipt_nonaliasing())
+		flush_cache_all();
+	else
+		/*
+		 * set_pte_at() called from vmap_pte_range() does not
+		 * have a DSB after cleaning the cache line.
+		 */
+		dsb();
+}
+
+static inline void flush_cache_vunmap(unsigned long start, unsigned long end)
+{
+	if (!cache_is_vipt_nonaliasing())
+		flush_cache_all();
+}
+
 #endif
diff --git a/arch/arm/include/asm/hwcap.h b/arch/arm/include/asm/hwcap.h
index 81f4c89..bda489f 100644
--- a/arch/arm/include/asm/hwcap.h
+++ b/arch/arm/include/asm/hwcap.h
@@ -16,6 +16,7 @@
 #define HWCAP_IWMMXT	512
 #define HWCAP_CRUNCH	1024
 #define HWCAP_THUMBEE	2048
+#define HWCAP_NEON	4096
 
 #if defined(__KERNEL__) && !defined(__ASSEMBLY__)
 /*
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 1f1eecc..d4dae3e 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -772,6 +772,8 @@
 	"java",
 	"iwmmxt",
 	"crunch",
+	"thumbee",
+	"neon",
 	NULL
 };
 
diff --git a/arch/arm/kernel/thumbee.c b/arch/arm/kernel/thumbee.c
index df3f6b7..9cb7aac 100644
--- a/arch/arm/kernel/thumbee.c
+++ b/arch/arm/kernel/thumbee.c
@@ -25,7 +25,7 @@
 /*
  * Access to the ThumbEE Handler Base register
  */
-static inline unsigned long teehbr_read()
+static inline unsigned long teehbr_read(void)
 {
 	unsigned long v;
 	asm("mrc	p14, 6, %0, c1, c0, 0\n" : "=r" (v));
diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
index d19c2be..be93ff0 100644
--- a/arch/arm/mm/cache-v7.S
+++ b/arch/arm/mm/cache-v7.S
@@ -26,6 +26,7 @@
  *	- mm    - mm_struct describing address space
  */
 ENTRY(v7_flush_dcache_all)
+	dmb					@ ensure ordering with previous memory accesses
 	mrc	p15, 1, r0, c0, c0, 1		@ read clidr
 	ands	r3, r0, #0x7000000		@ extract loc from clidr
 	mov	r3, r3, lsr #23			@ left align loc bit field
@@ -64,6 +65,7 @@
 finished:
 	mov	r10, #0				@ swith back to cache level 0
 	mcr	p15, 2, r10, c0, c0, 0		@ select current cache level in cssr
+	dsb
 	isb
 	mov	pc, lr
 ENDPROC(v7_flush_dcache_all)
diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S
index 294943b..f0cc599 100644
--- a/arch/arm/mm/proc-v6.S
+++ b/arch/arm/mm/proc-v6.S
@@ -71,6 +71,8 @@
  *	IRQs are already disabled.
  */
 ENTRY(cpu_v6_do_idle)
+	mov	r1, #0
+	mcr	p15, 0, r1, c7, c10, 4		@ DWB - WFI may enter a low-power mode
 	mcr	p15, 0, r1, c7, c0, 4		@ wait for interrupt
 	mov	pc, lr
 
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 4d3c0a7..d1ebec4 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -20,9 +20,17 @@
 
 #define TTB_C		(1 << 0)
 #define TTB_S		(1 << 1)
+#define TTB_RGN_NC	(0 << 3)
+#define TTB_RGN_OC_WBWA	(1 << 3)
 #define TTB_RGN_OC_WT	(2 << 3)
 #define TTB_RGN_OC_WB	(3 << 3)
 
+#ifndef CONFIG_SMP
+#define TTB_FLAGS	TTB_C|TTB_RGN_OC_WB		@ mark PTWs cacheable, outer WB
+#else
+#define TTB_FLAGS	TTB_C|TTB_S|TTB_RGN_OC_WBWA	@ mark PTWs cacheable and shared, outer WBWA
+#endif
+
 ENTRY(cpu_v7_proc_init)
 	mov	pc, lr
 ENDPROC(cpu_v7_proc_init)
@@ -55,6 +63,7 @@
  *	IRQs are already disabled.
  */
 ENTRY(cpu_v7_do_idle)
+	dsb					@ WFI may enter a low-power mode
 	wfi
 	mov	pc, lr
 ENDPROC(cpu_v7_do_idle)
@@ -85,7 +94,7 @@
 #ifdef CONFIG_MMU
 	mov	r2, #0
 	ldr	r1, [r1, #MM_CONTEXT_ID]	@ get mm->context.id
-	orr	r0, r0, #TTB_RGN_OC_WB		@ mark PTWs outer cacheable, WB
+	orr	r0, r0, #TTB_FLAGS
 	mcr	p15, 0, r2, c13, c0, 1		@ set reserved context ID
 	isb
 1:	mcr	p15, 0, r0, c2, c0, 0		@ set TTB 0
@@ -162,6 +171,11 @@
  *	- cache type register is implemented
  */
 __v7_setup:
+#ifdef CONFIG_SMP
+	mrc	p15, 0, r0, c1, c0, 1		@ Enable SMP/nAMP mode
+	orr	r0, r0, #(0x1 << 6)
+	mcr	p15, 0, r0, c1, c0, 1
+#endif
 	adr	r12, __v7_setup_stack		@ the local stack
 	stmia	r12, {r0-r5, r7, r9, r11, lr}
 	bl	v7_flush_dcache_all
@@ -174,8 +188,7 @@
 #ifdef CONFIG_MMU
 	mcr	p15, 0, r10, c8, c7, 0		@ invalidate I + D TLBs
 	mcr	p15, 0, r10, c2, c0, 2		@ TTB control register
-	orr	r4, r4, #TTB_RGN_OC_WB		@ mark PTWs outer cacheable, WB
-	mcr	p15, 0, r4, c2, c0, 0		@ load TTB0
+	orr	r4, r4, #TTB_FLAGS
 	mcr	p15, 0, r4, c2, c0, 1		@ load TTB1
 	mov	r10, #0x1f			@ domains 0, 1 = manager
 	mcr	p15, 0, r10, c3, c0, 0		@ load domain access register
diff --git a/arch/arm/vfp/vfphw.S b/arch/arm/vfp/vfphw.S
index a62dcf7..3c73aaf 100644
--- a/arch/arm/vfp/vfphw.S
+++ b/arch/arm/vfp/vfphw.S
@@ -101,9 +101,12 @@
 	VFPFSTMIA r4, r5		@ save the working registers
 	VFPFMRX	r5, FPSCR		@ current status
 	tst	r1, #FPEXC_EX		@ is there additional state to save?
-	VFPFMRX	r6, FPINST, NE		@ FPINST (only if FPEXC.EX is set)
-	tstne	r1, #FPEXC_FP2V		@ is there an FPINST2 to read?
-	VFPFMRX	r8, FPINST2, NE		@ FPINST2 if needed (and present)
+	beq	1f
+	VFPFMRX	r6, FPINST		@ FPINST (only if FPEXC.EX is set)
+	tst	r1, #FPEXC_FP2V		@ is there an FPINST2 to read?
+	beq	1f
+	VFPFMRX	r8, FPINST2		@ FPINST2 if needed (and present)
+1:
 	stmia	r4, {r1, r5, r6, r8}	@ save FPEXC, FPSCR, FPINST, FPINST2
 					@ and point r4 at the word at the
 					@ start of the register dump
@@ -117,9 +120,12 @@
 					@ FPEXC is in a safe state
 	ldmia	r10, {r1, r5, r6, r8}	@ load FPEXC, FPSCR, FPINST, FPINST2
 	tst	r1, #FPEXC_EX		@ is there additional state to restore?
-	VFPFMXR	FPINST, r6, NE		@ restore FPINST (only if FPEXC.EX is set)
-	tstne	r1, #FPEXC_FP2V		@ is there an FPINST2 to write?
-	VFPFMXR	FPINST2, r8, NE		@ FPINST2 if needed (and present)
+	beq	1f
+	VFPFMXR	FPINST, r6		@ restore FPINST (only if FPEXC.EX is set)
+	tst	r1, #FPEXC_FP2V		@ is there an FPINST2 to write?
+	beq	1f
+	VFPFMXR	FPINST2, r8		@ FPINST2 if needed (and present)
+1:
 	VFPFMXR	FPSCR, r5		@ restore status
 
 check_for_exception:
@@ -175,9 +181,12 @@
 	VFPFSTMIA r0, r2		@ save the working registers
 	VFPFMRX	r2, FPSCR		@ current status
 	tst	r1, #FPEXC_EX		@ is there additional state to save?
-	VFPFMRX	r3, FPINST, NE		@ FPINST (only if FPEXC.EX is set)
-	tstne	r1, #FPEXC_FP2V		@ is there an FPINST2 to read?
-	VFPFMRX	r12, FPINST2, NE	@ FPINST2 if needed (and present)
+	beq	1f
+	VFPFMRX	r3, FPINST		@ FPINST (only if FPEXC.EX is set)
+	tst	r1, #FPEXC_FP2V		@ is there an FPINST2 to read?
+	beq	1f
+	VFPFMRX	r12, FPINST2		@ FPINST2 if needed (and present)
+1:
 	stmia	r0, {r1, r2, r3, r12}	@ save FPEXC, FPSCR, FPINST, FPINST2
 	mov	pc, lr
 ENDPROC(vfp_save_state)
diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
index c0d2c9b..67ca340 100644
--- a/arch/arm/vfp/vfpmodule.c
+++ b/arch/arm/vfp/vfpmodule.c
@@ -371,6 +371,15 @@
 		 * in place; report VFP support to userspace.
 		 */
 		elf_hwcap |= HWCAP_VFP;
+#ifdef CONFIG_NEON
+		/*
+		 * Check for the presence of the Advanced SIMD
+		 * load/store instructions, integer and single
+		 * precision floating point operations.
+		 */
+		if ((fmrx(MVFR1) & 0x000fff00) == 0x00011100)
+			elf_hwcap |= HWCAP_NEON;
+#endif
 	}
 	return 0;
 }