[ARM] Feroceon: Feroceon-specific WA-cache compatible {copy,clear}_user_page()
This patch implements a set of Feroceon-specific
{copy,clear}_user_page() routines that perform more optimally than
the generic implementations. This also deals with write-allocate
caches (Feroceon can run L1 D in WA mode) which otherwise prevents
Linux from booting.
[nico: optimized the code even further]
Signed-off-by: Lennert Buytenhek <buytenh@marvell.com>
Tested-by: Sylver Bruneau <sylver.bruneau@googlemail.com>
Tested-by: Martin Michlmayr <tbm@cyrius.com>
Signed-off-by: Nicolas Pitre <nico@marvell.com>
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index a92a577..33ed048 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -372,7 +372,7 @@
select CPU_PABRT_NOIFAR
select CPU_CACHE_VIVT
select CPU_CP15_MMU
- select CPU_COPY_V4WB if MMU
+ select CPU_COPY_FEROCEON if MMU
select CPU_TLB_V4WBI if MMU
config CPU_FEROCEON_OLD_ID
@@ -523,6 +523,9 @@
config CPU_COPY_V4WB
bool
+config CPU_COPY_FEROCEON
+ bool
+
config CPU_COPY_V6
bool
diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile
index 44536a0..32b2d2d 100644
--- a/arch/arm/mm/Makefile
+++ b/arch/arm/mm/Makefile
@@ -36,6 +36,7 @@
obj-$(CONFIG_CPU_COPY_V3) += copypage-v3.o
obj-$(CONFIG_CPU_COPY_V4WT) += copypage-v4wt.o
obj-$(CONFIG_CPU_COPY_V4WB) += copypage-v4wb.o
+obj-$(CONFIG_CPU_COPY_FEROCEON) += copypage-feroceon.o
obj-$(CONFIG_CPU_COPY_V6) += copypage-v6.o context.o
obj-$(CONFIG_CPU_SA1100) += copypage-v4mc.o
obj-$(CONFIG_CPU_XSCALE) += copypage-xscale.o
diff --git a/arch/arm/mm/copypage-feroceon.S b/arch/arm/mm/copypage-feroceon.S
new file mode 100644
index 0000000..7eb0d32
--- /dev/null
+++ b/arch/arm/mm/copypage-feroceon.S
@@ -0,0 +1,95 @@
+/*
+ * linux/arch/arm/lib/copypage-feroceon.S
+ *
+ * Copyright (C) 2008 Marvell Semiconductors
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This handles copy_user_page and clear_user_page on Feroceon
+ * more optimally than the generic implementations.
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/asm-offsets.h>
+
+ .text
+ .align 5
+
+ENTRY(feroceon_copy_user_page)
+ stmfd sp!, {r4-r9, lr}
+ mov ip, #PAGE_SZ
+1: mov lr, r1
+ ldmia r1!, {r2 - r9}
+ pld [lr, #32]
+ pld [lr, #64]
+ pld [lr, #96]
+ pld [lr, #128]
+ pld [lr, #160]
+ pld [lr, #192]
+ pld [lr, #224]
+ stmia r0, {r2 - r9}
+ ldmia r1!, {r2 - r9}
+ mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D line
+ add r0, r0, #32
+ stmia r0, {r2 - r9}
+ ldmia r1!, {r2 - r9}
+ mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D line
+ add r0, r0, #32
+ stmia r0, {r2 - r9}
+ ldmia r1!, {r2 - r9}
+ mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D line
+ add r0, r0, #32
+ stmia r0, {r2 - r9}
+ ldmia r1!, {r2 - r9}
+ mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D line
+ add r0, r0, #32
+ stmia r0, {r2 - r9}
+ ldmia r1!, {r2 - r9}
+ mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D line
+ add r0, r0, #32
+ stmia r0, {r2 - r9}
+ ldmia r1!, {r2 - r9}
+ mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D line
+ add r0, r0, #32
+ stmia r0, {r2 - r9}
+ ldmia r1!, {r2 - r9}
+ mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D line
+ add r0, r0, #32
+ stmia r0, {r2 - r9}
+ subs ip, ip, #(32 * 8)
+ mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D line
+ add r0, r0, #32
+ bne 1b
+ mcr p15, 0, ip, c7, c10, 4 @ drain WB
+ ldmfd sp!, {r4-r9, pc}
+
+ .align 5
+
+ENTRY(feroceon_clear_user_page)
+ stmfd sp!, {r4-r7, lr}
+ mov r1, #PAGE_SZ/32
+ mov r2, #0
+ mov r3, #0
+ mov r4, #0
+ mov r5, #0
+ mov r6, #0
+ mov r7, #0
+ mov ip, #0
+ mov lr, #0
+1: stmia r0, {r2-r7, ip, lr}
+ subs r1, r1, #1
+ mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D line
+ add r0, r0, #32
+ bne 1b
+ mcr p15, 0, r1, c7, c10, 4 @ drain WB
+ ldmfd sp!, {r4-r7, pc}
+
+ __INITDATA
+
+ .type feroceon_user_fns, #object
+ENTRY(feroceon_user_fns)
+ .long feroceon_clear_user_page
+ .long feroceon_copy_user_page
+ .size feroceon_user_fns, . - feroceon_user_fns
diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S
index f37abd7..a02c171 100644
--- a/arch/arm/mm/proc-feroceon.S
+++ b/arch/arm/mm/proc-feroceon.S
@@ -440,7 +440,7 @@
.long cpu_feroceon_name
.long feroceon_processor_functions
.long v4wbi_tlb_fns
- .long v4wb_user_fns
+ .long feroceon_user_fns
.long feroceon_cache_fns
.size __feroceon_old_id_proc_info, . - __feroceon_old_id_proc_info
#endif
@@ -466,6 +466,6 @@
.long cpu_feroceon_name
.long feroceon_processor_functions
.long v4wbi_tlb_fns
- .long v4wb_user_fns
+ .long feroceon_user_fns
.long feroceon_cache_fns
.size __feroceon_proc_info, . - __feroceon_proc_info