ARM: Add support for FA526 v2
Adds support for Faraday FA526 core. This core is used at least by:
Cortina Systems Gemini and Centroid family
Cavium Networks ECONA family
Grain Media GM8120
Pixelplus ImageARM
Prolific PL-1029
Faraday IP evaluation boards
v2:
- move TLB_BTB to separate patch
- update copyrights
Signed-off-by: Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 24e0f01..d29f926 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -72,6 +72,7 @@
tune-$(CONFIG_CPU_ARM922T) :=-mtune=arm9tdmi
tune-$(CONFIG_CPU_ARM925T) :=-mtune=arm9tdmi
tune-$(CONFIG_CPU_ARM926T) :=-mtune=arm9tdmi
+tune-$(CONFIG_CPU_FA526) :=-mtune=arm9tdmi
tune-$(CONFIG_CPU_SA110) :=-mtune=strongarm110
tune-$(CONFIG_CPU_SA1100) :=-mtune=strongarm1100
tune-$(CONFIG_CPU_XSCALE) :=$(call cc-option,-mtune=xscale,-mtune=strongarm110) -Wa,-mcpu=xscale
diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index 77d61423..def0248 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -459,6 +459,20 @@
mcr p15, 0, r0, c7, c5, 4 @ ISB
mov pc, r12
+__fa526_cache_on:
+ mov r12, lr
+ bl __setup_mmu
+ mov r0, #0
+ mcr p15, 0, r0, c7, c7, 0 @ Invalidate whole cache
+ mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
+ mcr p15, 0, r0, c8, c7, 0 @ flush UTLB
+ mrc p15, 0, r0, c1, c0, 0 @ read control reg
+ orr r0, r0, #0x1000 @ I-cache enable
+ bl __common_mmu_cache_on
+ mov r0, #0
+ mcr p15, 0, r0, c8, c7, 0 @ flush UTLB
+ mov pc, r12
+
__arm6_mmu_cache_on:
mov r12, lr
bl __setup_mmu
@@ -636,6 +650,12 @@
b __armv4_mmu_cache_off
b __armv5tej_mmu_cache_flush
+ .word 0x66015261 @ FA526
+ .word 0xff01fff1
+ b __fa526_cache_on
+ b __armv4_mmu_cache_off
+ b __fa526_cache_flush
+
@ These match on the architecture ID
.word 0x00020000 @ ARMv4T
@@ -775,6 +795,12 @@
mcr p15, 0, ip, c7, c10, 4 @ drain WB
mov pc, lr
+__fa526_cache_flush:
+ mov r1, #0
+ mcr p15, 0, r1, c7, c14, 0 @ clean and invalidate D cache
+ mcr p15, 0, r1, c7, c5, 0 @ flush I cache
+ mcr p15, 0, r1, c7, c10, 4 @ drain WB
+ mov pc, lr
__armv6_mmu_cache_flush:
mov r1, #0
diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index 6cbd8fd..a6b8b90 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -46,6 +46,14 @@
# define MULTI_CACHE 1
#endif
+#if defined(CONFIG_CPU_FA526)
+# ifdef _CACHE
+# define MULTI_CACHE 1
+# else
+# define _CACHE fa
+# endif
+#endif
+
#if defined(CONFIG_CPU_ARM926T)
# ifdef _CACHE
# define MULTI_CACHE 1
diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h
index f341c9d..e6eb8a6 100644
--- a/arch/arm/include/asm/page.h
+++ b/arch/arm/include/asm/page.h
@@ -76,6 +76,14 @@
# endif
#endif
+#ifdef CONFIG_CPU_COPY_FA
+# ifdef _USER
+# define MULTI_USER 1
+# else
+# define _USER fa
+# endif
+#endif
+
#ifdef CONFIG_CPU_SA1100
# ifdef _USER
# define MULTI_USER 1
diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h
index db80203..0094928 100644
--- a/arch/arm/include/asm/proc-fns.h
+++ b/arch/arm/include/asm/proc-fns.h
@@ -89,6 +89,14 @@
# define CPU_NAME cpu_arm922
# endif
# endif
+# ifdef CONFIG_CPU_FA526
+# ifdef CPU_NAME
+# undef MULTI_CPU
+# define MULTI_CPU
+# else
+# define CPU_NAME cpu_fa526
+# endif
+# endif
# ifdef CONFIG_CPU_ARM925T
# ifdef CPU_NAME
# undef MULTI_CPU
diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h
index 811be55..d6a4dad 100644
--- a/arch/arm/include/asm/system.h
+++ b/arch/arm/include/asm/system.h
@@ -125,6 +125,12 @@
: : "r" (0) : "memory")
#define dmb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 5" \
: : "r" (0) : "memory")
+#elif defined(CONFIG_CPU_FA526)
+#define isb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c5, 4" \
+ : : "r" (0) : "memory")
+#define dsb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \
+ : : "r" (0) : "memory")
+#define dmb() __asm__ __volatile__ ("" : : : "memory")
#else
#define isb() __asm__ __volatile__ ("" : : : "memory")
#define dsb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \
diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h
index ffedd24..a622180 100644
--- a/arch/arm/include/asm/tlbflush.h
+++ b/arch/arm/include/asm/tlbflush.h
@@ -54,6 +54,7 @@
* v4wb - ARMv4 with write buffer without I TLB flush entry instruction
* v4wbi - ARMv4 with write buffer with I TLB flush entry instruction
* fr - Feroceon (v4wbi with non-outer-cacheable page table walks)
+ * fa - Faraday (v4 with write buffer with UTLB and branch target buffer (BTB))
* v6wbi - ARMv6 with write buffer with I TLB flush entry instruction
* v7wbi - identical to v6wbi
*/
@@ -90,6 +91,22 @@
# define v4_always_flags (-1UL)
#endif
+#define fa_tlb_flags (TLB_WB | TLB_BTB | TLB_DCLEAN | \
+ TLB_V4_U_FULL | TLB_V4_U_PAGE)
+
+#ifdef CONFIG_CPU_TLB_FA
+# define fa_possible_flags fa_tlb_flags
+# define fa_always_flags fa_tlb_flags
+# ifdef _TLB
+# define MULTI_TLB 1
+# else
+# define _TLB fa
+# endif
+#else
+# define fa_possible_flags 0
+# define fa_always_flags (-1UL)
+#endif
+
#define v4wbi_tlb_flags (TLB_WB | TLB_DCLEAN | \
TLB_V4_I_FULL | TLB_V4_D_FULL | \
TLB_V4_I_PAGE | TLB_V4_D_PAGE)
@@ -268,6 +285,7 @@
v4wbi_possible_flags | \
fr_possible_flags | \
v4wb_possible_flags | \
+ fa_possible_flags | \
v6wbi_possible_flags | \
v7wbi_possible_flags)
@@ -276,6 +294,7 @@
v4wbi_always_flags & \
fr_always_flags & \
v4wb_always_flags & \
+ fa_always_flags & \
v6wbi_always_flags & \
v7wbi_always_flags)
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index d490f37..bc333186 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -186,6 +186,24 @@
Say Y if you want support for the ARM926T processor.
Otherwise, say N.
+# FA526
+config CPU_FA526
+ bool
+ select CPU_32v4
+ select CPU_ABRT_EV4
+ select CPU_PABRT_NOIFAR
+ select CPU_CACHE_VIVT
+ select CPU_CP15_MMU
+ select CPU_CACHE_FA
+ select CPU_COPY_FA if MMU
+ select CPU_TLB_FA if MMU
+ help
+ The FA526 is a version of the ARMv4 compatible processor with
+ Branch Target Buffer, Unified TLB and cache line size 16.
+
+ Say Y if you want support for the FA526 processor.
+ Otherwise, say N.
+
# ARM940T
config CPU_ARM940T
bool "Support ARM940T processor" if ARCH_INTEGRATOR
@@ -484,6 +502,9 @@
config CPU_CACHE_VIPT
bool
+config CPU_CACHE_FA
+ bool
+
if MMU
# The copy-page model
config CPU_COPY_V3
@@ -498,6 +519,9 @@
config CPU_COPY_FEROCEON
bool
+config CPU_COPY_FA
+ bool
+
config CPU_COPY_V6
bool
@@ -528,6 +552,13 @@
help
Feroceon TLB (v4wbi with non-outer-cachable page table walks).
+config CPU_TLB_FA
+ bool
+ help
+ Faraday ARM FA526 architecture, unified TLB with writeback cache
+ and invalidate instruction cache entry. Branch target buffer is
+ also supported.
+
config CPU_TLB_V6
bool
@@ -638,7 +669,7 @@
config CPU_DCACHE_WRITETHROUGH
bool "Force write through D-cache"
- depends on (CPU_ARM740T || CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM940T || CPU_ARM946E || CPU_ARM1020) && !CPU_DCACHE_DISABLE
+ depends on (CPU_ARM740T || CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM940T || CPU_ARM946E || CPU_ARM1020 || CPU_FA526) && !CPU_DCACHE_DISABLE
default y if CPU_ARM925T
help
Say Y here to use the data cache in writethrough mode. Unless you
@@ -653,7 +684,7 @@
config CPU_BPREDICT_DISABLE
bool "Disable branch prediction"
- depends on CPU_ARM1020 || CPU_V6 || CPU_XSC3 || CPU_V7
+ depends on CPU_ARM1020 || CPU_V6 || CPU_XSC3 || CPU_V7 || CPU_FA526
help
Say Y here to disable branch prediction. If unsure, say N.
diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile
index 480f78a..40f941c 100644
--- a/arch/arm/mm/Makefile
+++ b/arch/arm/mm/Makefile
@@ -32,6 +32,7 @@
obj-$(CONFIG_CPU_CACHE_V4WB) += cache-v4wb.o
obj-$(CONFIG_CPU_CACHE_V6) += cache-v6.o
obj-$(CONFIG_CPU_CACHE_V7) += cache-v7.o
+obj-$(CONFIG_CPU_CACHE_FA) += cache-fa.o
obj-$(CONFIG_CPU_COPY_V3) += copypage-v3.o
obj-$(CONFIG_CPU_COPY_V4WT) += copypage-v4wt.o
@@ -41,6 +42,7 @@
obj-$(CONFIG_CPU_SA1100) += copypage-v4mc.o
obj-$(CONFIG_CPU_XSCALE) += copypage-xscale.o
obj-$(CONFIG_CPU_XSC3) += copypage-xsc3.o
+obj-$(CONFIG_CPU_COPY_FA) += copypage-fa.o
obj-$(CONFIG_CPU_TLB_V3) += tlb-v3.o
obj-$(CONFIG_CPU_TLB_V4WT) += tlb-v4.o
@@ -49,6 +51,7 @@
obj-$(CONFIG_CPU_TLB_FEROCEON) += tlb-v4wbi.o # reuse v4wbi TLB functions
obj-$(CONFIG_CPU_TLB_V6) += tlb-v6.o
obj-$(CONFIG_CPU_TLB_V7) += tlb-v7.o
+obj-$(CONFIG_CPU_TLB_FA) += tlb-fa.o
obj-$(CONFIG_CPU_ARM610) += proc-arm6_7.o
obj-$(CONFIG_CPU_ARM710) += proc-arm6_7.o
@@ -62,6 +65,7 @@
obj-$(CONFIG_CPU_ARM926T) += proc-arm926.o
obj-$(CONFIG_CPU_ARM940T) += proc-arm940.o
obj-$(CONFIG_CPU_ARM946E) += proc-arm946.o
+obj-$(CONFIG_CPU_FA526) += proc-fa526.o
obj-$(CONFIG_CPU_ARM1020) += proc-arm1020.o
obj-$(CONFIG_CPU_ARM1020E) += proc-arm1020e.o
obj-$(CONFIG_CPU_ARM1022) += proc-arm1022.o
diff --git a/arch/arm/mm/cache-fa.S b/arch/arm/mm/cache-fa.S
new file mode 100644
index 0000000..b63a8f7
--- /dev/null
+++ b/arch/arm/mm/cache-fa.S
@@ -0,0 +1,220 @@
+/*
+ * linux/arch/arm/mm/cache-fa.S
+ *
+ * Copyright (C) 2005 Faraday Corp.
+ * Copyright (C) 2008-2009 Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
+ *
+ * Based on cache-v4wb.S:
+ * Copyright (C) 1997-2002 Russell king
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Processors: FA520 FA526 FA626
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/memory.h>
+#include <asm/page.h>
+
+#include "proc-macros.S"
+
+/*
+ * The size of one data cache line.
+ */
+#define CACHE_DLINESIZE 16
+
+/*
+ * The total size of the data cache.
+ */
+#ifdef CONFIG_ARCH_GEMINI
+#define CACHE_DSIZE 8192
+#else
+#define CACHE_DSIZE 16384
+#endif
+
+/* FIXME: put optimal value here. Current one is just estimation */
+#define CACHE_DLIMIT (CACHE_DSIZE * 2)
+
+/*
+ * flush_user_cache_all()
+ *
+ * Clean and invalidate all cache entries in a particular address
+ * space.
+ */
+ENTRY(fa_flush_user_cache_all)
+ /* FALLTHROUGH */
+/*
+ * flush_kern_cache_all()
+ *
+ * Clean and invalidate the entire cache.
+ */
+ENTRY(fa_flush_kern_cache_all)
+ mov ip, #0
+ mov r2, #VM_EXEC
+__flush_whole_cache:
+ mcr p15, 0, ip, c7, c14, 0 @ clean/invalidate D cache
+ tst r2, #VM_EXEC
+ mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
+ mcrne p15, 0, ip, c7, c5, 6 @ invalidate BTB
+ mcrne p15, 0, ip, c7, c10, 4 @ drain write buffer
+ mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush
+ mov pc, lr
+
+/*
+ * flush_user_cache_range(start, end, flags)
+ *
+ * Invalidate a range of cache entries in the specified
+ * address space.
+ *
+ * - start - start address (inclusive, page aligned)
+ * - end - end address (exclusive, page aligned)
+ * - flags - vma_area_struct flags describing address space
+ */
+ENTRY(fa_flush_user_cache_range)
+ mov ip, #0
+ sub r3, r1, r0 @ calculate total size
+ cmp r3, #CACHE_DLIMIT @ total size >= limit?
+ bhs __flush_whole_cache @ flush whole D cache
+
+1: tst r2, #VM_EXEC
+ mcrne p15, 0, r0, c7, c5, 1 @ invalidate I line
+ mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry
+ add r0, r0, #CACHE_DLINESIZE
+ cmp r0, r1
+ blo 1b
+ tst r2, #VM_EXEC
+ mcrne p15, 0, ip, c7, c5, 6 @ invalidate BTB
+ mcrne p15, 0, ip, c7, c10, 4 @ data write barrier
+ mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush
+ mov pc, lr
+
+/*
+ * coherent_kern_range(start, end)
+ *
+ * Ensure coherency between the Icache and the Dcache in the
+ * region described by start. If you have non-snooping
+ * Harvard caches, you need to implement this function.
+ *
+ * - start - virtual start address
+ * - end - virtual end address
+ */
+ENTRY(fa_coherent_kern_range)
+ /* fall through */
+
+/*
+ * coherent_user_range(start, end)
+ *
+ * Ensure coherency between the Icache and the Dcache in the
+ * region described by start. If you have non-snooping
+ * Harvard caches, you need to implement this function.
+ *
+ * - start - virtual start address
+ * - end - virtual end address
+ */
+ENTRY(fa_coherent_user_range)
+ bic r0, r0, #CACHE_DLINESIZE - 1
+1: mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry
+ mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry
+ add r0, r0, #CACHE_DLINESIZE
+ cmp r0, r1
+ blo 1b
+ mov r0, #0
+ mcr p15, 0, r0, c7, c5, 6 @ invalidate BTB
+ mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
+ mcr p15, 0, r0, c7, c5, 4 @ prefetch flush
+ mov pc, lr
+
+/*
+ * flush_kern_dcache_page(kaddr)
+ *
+ * Ensure that the data held in the page kaddr is written back
+ * to the page in question.
+ *
+ * - kaddr - kernel address (guaranteed to be page aligned)
+ */
+ENTRY(fa_flush_kern_dcache_page)
+ add r1, r0, #PAGE_SZ
+1: mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D line
+ add r0, r0, #CACHE_DLINESIZE
+ cmp r0, r1
+ blo 1b
+ mov r0, #0
+ mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
+ mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
+ mov pc, lr
+
+/*
+ * dma_inv_range(start, end)
+ *
+ * Invalidate (discard) the specified virtual address range.
+ * May not write back any entries. If 'start' or 'end'
+ * are not cache line aligned, those lines must be written
+ * back.
+ *
+ * - start - virtual start address
+ * - end - virtual end address
+ */
+ENTRY(fa_dma_inv_range)
+ tst r0, #CACHE_DLINESIZE - 1
+ bic r0, r0, #CACHE_DLINESIZE - 1
+ mcrne p15, 0, r0, c7, c14, 1 @ clean & invalidate D entry
+ tst r1, #CACHE_DLINESIZE - 1
+ bic r1, r1, #CACHE_DLINESIZE - 1
+ mcrne p15, 0, r1, c7, c14, 1 @ clean & invalidate D entry
+1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry
+ add r0, r0, #CACHE_DLINESIZE
+ cmp r0, r1
+ blo 1b
+ mov r0, #0
+ mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
+ mov pc, lr
+
+/*
+ * dma_clean_range(start, end)
+ *
+ * Clean (write back) the specified virtual address range.
+ *
+ * - start - virtual start address
+ * - end - virtual end address
+ */
+ENTRY(fa_dma_clean_range)
+ bic r0, r0, #CACHE_DLINESIZE - 1
+1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry
+ add r0, r0, #CACHE_DLINESIZE
+ cmp r0, r1
+ blo 1b
+ mov r0, #0
+ mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
+ mov pc, lr
+
+/*
+ * dma_flush_range(start,end)
+ * - start - virtual start address of region
+ * - end - virtual end address of region
+ */
+ENTRY(fa_dma_flush_range)
+ bic r0, r0, #CACHE_DLINESIZE - 1
+1: mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D entry
+ add r0, r0, #CACHE_DLINESIZE
+ cmp r0, r1
+ blo 1b
+ mov r0, #0
+ mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
+ mov pc, lr
+
+ __INITDATA
+
+ .type fa_cache_fns, #object
+ENTRY(fa_cache_fns)
+ .long fa_flush_kern_cache_all
+ .long fa_flush_user_cache_all
+ .long fa_flush_user_cache_range
+ .long fa_coherent_kern_range
+ .long fa_coherent_user_range
+ .long fa_flush_kern_dcache_page
+ .long fa_dma_inv_range
+ .long fa_dma_clean_range
+ .long fa_dma_flush_range
+ .size fa_cache_fns, . - fa_cache_fns
diff --git a/arch/arm/mm/copypage-fa.c b/arch/arm/mm/copypage-fa.c
new file mode 100644
index 0000000..b2a6008
--- /dev/null
+++ b/arch/arm/mm/copypage-fa.c
@@ -0,0 +1,86 @@
+/*
+ * linux/arch/arm/lib/copypage-fa.S
+ *
+ * Copyright (C) 2005 Faraday Corp.
+ * Copyright (C) 2008-2009 Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
+ *
+ * Based on copypage-v4wb.S:
+ * Copyright (C) 1995-1999 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/init.h>
+#include <linux/highmem.h>
+
+/*
+ * Faraday optimised copy_user_page
+ */
+static void __naked
+fa_copy_user_page(void *kto, const void *kfrom)
+{
+ asm("\
+ stmfd sp!, {r4, lr} @ 2\n\
+ mov r2, %0 @ 1\n\
+1: ldmia r1!, {r3, r4, ip, lr} @ 4\n\
+ stmia r0, {r3, r4, ip, lr} @ 4\n\
+ mcr p15, 0, r0, c7, c14, 1 @ 1 clean and invalidate D line\n\
+ add r0, r0, #16 @ 1\n\
+ ldmia r1!, {r3, r4, ip, lr} @ 4\n\
+ stmia r0, {r3, r4, ip, lr} @ 4\n\
+ mcr p15, 0, r0, c7, c14, 1 @ 1 clean and invalidate D line\n\
+ add r0, r0, #16 @ 1\n\
+ subs r2, r2, #1 @ 1\n\
+ bne 1b @ 1\n\
+ mcr p15, 0, r2, c7, c10, 4 @ 1 drain WB\n\
+ ldmfd sp!, {r4, pc} @ 3"
+ :
+ : "I" (PAGE_SIZE / 32));
+}
+
+void fa_copy_user_highpage(struct page *to, struct page *from,
+ unsigned long vaddr)
+{
+ void *kto, *kfrom;
+
+ kto = kmap_atomic(to, KM_USER0);
+ kfrom = kmap_atomic(from, KM_USER1);
+ fa_copy_user_page(kto, kfrom);
+ kunmap_atomic(kfrom, KM_USER1);
+ kunmap_atomic(kto, KM_USER0);
+}
+
+/*
+ * Faraday optimised clear_user_page
+ *
+ * Same story as above.
+ */
+void fa_clear_user_highpage(struct page *page, unsigned long vaddr)
+{
+ void *ptr, *kaddr = kmap_atomic(page, KM_USER0);
+ asm volatile("\
+ mov r1, %2 @ 1\n\
+ mov r2, #0 @ 1\n\
+ mov r3, #0 @ 1\n\
+ mov ip, #0 @ 1\n\
+ mov lr, #0 @ 1\n\
+1: stmia %0, {r2, r3, ip, lr} @ 4\n\
+ mcr p15, 0, %0, c7, c14, 1 @ 1 clean and invalidate D line\n\
+ add %0, %0, #16 @ 1\n\
+ stmia %0, {r2, r3, ip, lr} @ 4\n\
+ mcr p15, 0, %0, c7, c14, 1 @ 1 clean and invalidate D line\n\
+ add %0, %0, #16 @ 1\n\
+ subs r1, r1, #1 @ 1\n\
+ bne 1b @ 1\n\
+ mcr p15, 0, r1, c7, c10, 4 @ 1 drain WB"
+ : "=r" (ptr)
+ : "0" (kaddr), "I" (PAGE_SIZE / 32)
+ : "r1", "r2", "r3", "ip", "lr");
+ kunmap_atomic(kaddr, KM_USER0);
+}
+
+struct cpu_user_fns fa_user_fns __initdata = {
+ .cpu_clear_user_highpage = fa_clear_user_highpage,
+ .cpu_copy_user_highpage = fa_copy_user_highpage,
+};
diff --git a/arch/arm/mm/proc-fa526.S b/arch/arm/mm/proc-fa526.S
new file mode 100644
index 0000000..08b8a95
--- /dev/null
+++ b/arch/arm/mm/proc-fa526.S
@@ -0,0 +1,248 @@
+/*
+ * linux/arch/arm/mm/proc-fa526.S: MMU functions for FA526
+ *
+ * Written by : Luke Lee
+ * Copyright (C) 2005 Faraday Corp.
+ * Copyright (C) 2008-2009 Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ *
+ * These are the low level assembler for performing cache and TLB
+ * functions on the fa526.
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/hwcap.h>
+#include <asm/pgtable-hwdef.h>
+#include <asm/pgtable.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+
+#include "proc-macros.S"
+
+#define CACHE_DLINESIZE 16
+
+ .text
+/*
+ * cpu_fa526_proc_init()
+ */
+ENTRY(cpu_fa526_proc_init)
+ mov pc, lr
+
+/*
+ * cpu_fa526_proc_fin()
+ */
+ENTRY(cpu_fa526_proc_fin)
+ stmfd sp!, {lr}
+ mov ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
+ msr cpsr_c, ip
+ bl fa_flush_kern_cache_all
+ mrc p15, 0, r0, c1, c0, 0 @ ctrl register
+ bic r0, r0, #0x1000 @ ...i............
+ bic r0, r0, #0x000e @ ............wca.
+ mcr p15, 0, r0, c1, c0, 0 @ disable caches
+ nop
+ nop
+ ldmfd sp!, {pc}
+
+/*
+ * cpu_fa526_reset(loc)
+ *
+ * Perform a soft reset of the system. Put the CPU into the
+ * same state as it would be if it had been reset, and branch
+ * to what would be the reset vector.
+ *
+ * loc: location to jump to for soft reset
+ */
+ .align 4
+ENTRY(cpu_fa526_reset)
+/* TODO: Use CP8 if possible... */
+ mov ip, #0
+ mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches
+ mcr p15, 0, ip, c7, c10, 4 @ drain WB
+#ifdef CONFIG_MMU
+ mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs
+#endif
+ mrc p15, 0, ip, c1, c0, 0 @ ctrl register
+ bic ip, ip, #0x000f @ ............wcam
+ bic ip, ip, #0x1100 @ ...i...s........
+ bic ip, ip, #0x0800 @ BTB off
+ mcr p15, 0, ip, c1, c0, 0 @ ctrl register
+ nop
+ nop
+ mov pc, r0
+
+/*
+ * cpu_fa526_do_idle()
+ */
+ .align 4
+ENTRY(cpu_fa526_do_idle)
+ mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt
+ mov pc, lr
+
+
+ENTRY(cpu_fa526_dcache_clean_area)
+1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry
+ add r0, r0, #CACHE_DLINESIZE
+ subs r1, r1, #CACHE_DLINESIZE
+ bhi 1b
+ mcr p15, 0, r0, c7, c10, 4 @ drain WB
+ mov pc, lr
+
+/* =============================== PageTable ============================== */
+
+/*
+ * cpu_fa526_switch_mm(pgd)
+ *
+ * Set the translation base pointer to be as described by pgd.
+ *
+ * pgd: new page tables
+ */
+ .align 4
+ENTRY(cpu_fa526_switch_mm)
+#ifdef CONFIG_MMU
+ mov ip, #0
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+ mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache
+#else
+ mcr p15, 0, ip, c7, c14, 0 @ clean and invalidate whole D cache
+#endif
+ mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache
+ mcr p15, 0, ip, c7, c5, 6 @ invalidate BTB since mm changed
+ mcr p15, 0, ip, c7, c10, 4 @ data write barrier
+ mcr p15, 0, ip, c7, c5, 4 @ prefetch flush
+ mcr p15, 0, r0, c2, c0, 0 @ load page table pointer
+ mcr p15, 0, ip, c8, c7, 0 @ invalidate UTLB
+#endif
+ mov pc, lr
+
+/*
+ * cpu_fa526_set_pte_ext(ptep, pte, ext)
+ *
+ * Set a PTE and flush it out
+ */
+ .align 4
+ENTRY(cpu_fa526_set_pte_ext)
+#ifdef CONFIG_MMU
+ armv3_set_pte_ext
+ mov r0, r0
+ mcr p15, 0, r0, c7, c10, 1 @ clean D entry
+ mov r0, #0
+ mcr p15, 0, r0, c7, c10, 4 @ drain WB
+#endif
+ mov pc, lr
+
+ __INIT
+
+ .type __fa526_setup, #function
+__fa526_setup:
+ /* On return of this routine, r0 must carry correct flags for CFG register */
+ mov r0, #0
+ mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4
+ mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4
+#ifdef CONFIG_MMU
+ mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4
+#endif
+ mcr p15, 0, r0, c7, c5, 5 @ invalidate IScratchpad RAM
+
+ mov r0, #1
+ mcr p15, 0, r0, c1, c1, 0 @ turn-on ECR
+
+ mov r0, #0
+ mcr p15, 0, r0, c7, c5, 6 @ invalidate BTB All
+ mcr p15, 0, r0, c7, c10, 4 @ data write barrier
+ mcr p15, 0, r0, c7, c5, 4 @ prefetch flush
+
+ mov r0, #0x1f @ Domains 0, 1 = manager, 2 = client
+ mcr p15, 0, r0, c3, c0 @ load domain access register
+
+ mrc p15, 0, r0, c1, c0 @ get control register v4
+ ldr r5, fa526_cr1_clear
+ bic r0, r0, r5
+ ldr r5, fa526_cr1_set
+ orr r0, r0, r5
+ mov pc, lr
+ .size __fa526_setup, . - __fa526_setup
+
+ /*
+ * .RVI ZFRS BLDP WCAM
+ * ..11 1001 .111 1101
+ *
+ */
+ .type fa526_cr1_clear, #object
+ .type fa526_cr1_set, #object
+fa526_cr1_clear:
+ .word 0x3f3f
+fa526_cr1_set:
+ .word 0x397D
+
+ __INITDATA
+
+/*
+ * Purpose : Function pointers used to access above functions - all calls
+ * come through these
+ */
+ .type fa526_processor_functions, #object
+fa526_processor_functions:
+ .word v4_early_abort
+ .word pabort_noifar
+ .word cpu_fa526_proc_init
+ .word cpu_fa526_proc_fin
+ .word cpu_fa526_reset
+ .word cpu_fa526_do_idle
+ .word cpu_fa526_dcache_clean_area
+ .word cpu_fa526_switch_mm
+ .word cpu_fa526_set_pte_ext
+ .size fa526_processor_functions, . - fa526_processor_functions
+
+ .section ".rodata"
+
+ .type cpu_arch_name, #object
+cpu_arch_name:
+ .asciz "armv4"
+ .size cpu_arch_name, . - cpu_arch_name
+
+ .type cpu_elf_name, #object
+cpu_elf_name:
+ .asciz "v4"
+ .size cpu_elf_name, . - cpu_elf_name
+
+ .type cpu_fa526_name, #object
+cpu_fa526_name:
+ .asciz "FA526"
+ .size cpu_fa526_name, . - cpu_fa526_name
+
+ .align
+
+ .section ".proc.info.init", #alloc, #execinstr
+
+ .type __fa526_proc_info,#object
+__fa526_proc_info:
+ .long 0x66015261
+ .long 0xff01fff1
+ .long PMD_TYPE_SECT | \
+ PMD_SECT_BUFFERABLE | \
+ PMD_SECT_CACHEABLE | \
+ PMD_BIT4 | \
+ PMD_SECT_AP_WRITE | \
+ PMD_SECT_AP_READ
+ .long PMD_TYPE_SECT | \
+ PMD_BIT4 | \
+ PMD_SECT_AP_WRITE | \
+ PMD_SECT_AP_READ
+ b __fa526_setup
+ .long cpu_arch_name
+ .long cpu_elf_name
+ .long HWCAP_SWP | HWCAP_HALF
+ .long cpu_fa526_name
+ .long fa526_processor_functions
+ .long fa_tlb_fns
+ .long fa_user_fns
+ .long fa_cache_fns
+ .size __fa526_proc_info, . - __fa526_proc_info
diff --git a/arch/arm/mm/tlb-fa.S b/arch/arm/mm/tlb-fa.S
new file mode 100644
index 0000000..9694f1f
--- /dev/null
+++ b/arch/arm/mm/tlb-fa.S
@@ -0,0 +1,75 @@
+/*
+ * linux/arch/arm/mm/tlb-fa.S
+ *
+ * Copyright (C) 2005 Faraday Corp.
+ * Copyright (C) 2008-2009 Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
+ *
+ * Based on tlb-v4wbi.S:
+ * Copyright (C) 1997-2002 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * ARM architecture version 4, Faraday variation.
+ * This assume an unified TLBs, with a write buffer, and branch target buffer (BTB)
+ *
+ * Processors: FA520 FA526 FA626
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/asm-offsets.h>
+#include <asm/tlbflush.h>
+#include "proc-macros.S"
+
+
+/*
+ * flush_user_tlb_range(start, end, mm)
+ *
+ * Invalidate a range of TLB entries in the specified address space.
+ *
+ * - start - range start address
+ * - end - range end address
+ * - mm - mm_struct describing address space
+ */
+ .align 4
+ENTRY(fa_flush_user_tlb_range)
+ vma_vm_mm ip, r2
+ act_mm r3 @ get current->active_mm
+ eors r3, ip, r3 @ == mm ?
+ movne pc, lr @ no, we dont do anything
+ mov r3, #0
+ mcr p15, 0, r3, c7, c10, 4 @ drain WB
+ bic r0, r0, #0x0ff
+ bic r0, r0, #0xf00
+1: mcr p15, 0, r0, c8, c7, 1 @ invalidate UTLB entry
+ add r0, r0, #PAGE_SZ
+ cmp r0, r1
+ blo 1b
+ mcr p15, 0, r3, c7, c5, 6 @ invalidate BTB
+ mcr p15, 0, r3, c7, c10, 4 @ data write barrier
+ mov pc, lr
+
+
+ENTRY(fa_flush_kern_tlb_range)
+ mov r3, #0
+ mcr p15, 0, r3, c7, c10, 4 @ drain WB
+ bic r0, r0, #0x0ff
+ bic r0, r0, #0xf00
+1: mcr p15, 0, r0, c8, c7, 1 @ invalidate UTLB entry
+ add r0, r0, #PAGE_SZ
+ cmp r0, r1
+ blo 1b
+ mcr p15, 0, r3, c7, c5, 6 @ invalidate BTB
+ mcr p15, 0, r3, c7, c10, 4 @ data write barrier
+ mcr p15, 0, r3, c7, c5, 4 @ prefetch flush
+ mov pc, lr
+
+ __INITDATA
+
+ .type fa_tlb_fns, #object
+ENTRY(fa_tlb_fns)
+ .long fa_flush_user_tlb_range
+ .long fa_flush_kern_tlb_range
+ .long fa_tlb_flags
+ .size fa_tlb_fns, . - fa_tlb_fns