Merge branch 'master' into sh/smp

Conflicts:
	arch/sh/mm/cache-sh4.c
diff --git a/arch/sh/include/asm/bugs.h b/arch/sh/include/asm/bugs.h
index 4924ff6..46260fc 100644
--- a/arch/sh/include/asm/bugs.h
+++ b/arch/sh/include/asm/bugs.h
@@ -21,25 +21,25 @@
 
 	current_cpu_data.loops_per_jiffy = loops_per_jiffy;
 
-	switch (current_cpu_data.type) {
-	case CPU_SH7619:
+	switch (current_cpu_data.family) {
+	case CPU_FAMILY_SH2:
 		*p++ = '2';
 		break;
-	case CPU_SH7201 ... CPU_MXG:
+	case CPU_FAMILY_SH2A:
 		*p++ = '2';
 		*p++ = 'a';
 		break;
-	case CPU_SH7705 ... CPU_SH7729:
+	case CPU_FAMILY_SH3:
 		*p++ = '3';
 		break;
-	case CPU_SH7750 ... CPU_SH4_501:
+	case CPU_FAMILY_SH4:
 		*p++ = '4';
 		break;
-	case CPU_SH7763 ... CPU_SHX3:
+	case CPU_FAMILY_SH4A:
 		*p++ = '4';
 		*p++ = 'a';
 		break;
-	case CPU_SH7343 ... CPU_SH7366:
+	case CPU_FAMILY_SH4AL_DSP:
 		*p++ = '4';
 		*p++ = 'a';
 		*p++ = 'l';
@@ -48,15 +48,15 @@
 		*p++ = 's';
 		*p++ = 'p';
 		break;
-	case CPU_SH5_101 ... CPU_SH5_103:
+	case CPU_FAMILY_SH5:
 		*p++ = '6';
 		*p++ = '4';
 		break;
-	case CPU_SH_NONE:
+	case CPU_FAMILY_UNKNOWN:
 		/*
-		 * Specifically use CPU_SH_NONE rather than default:,
-		 * so we're able to have the compiler whine about
-		 * unhandled enumerations.
+		 * Specifically use CPU_FAMILY_UNKNOWN rather than
+		 * default:, so we're able to have the compiler whine
+		 * about unhandled enumerations.
 		 */
 		break;
 	}
diff --git a/arch/sh/include/asm/cacheflush.h b/arch/sh/include/asm/cacheflush.h
index 4c5462d..11e4166 100644
--- a/arch/sh/include/asm/cacheflush.h
+++ b/arch/sh/include/asm/cacheflush.h
@@ -3,45 +3,65 @@
 
 #ifdef __KERNEL__
 
-#ifdef CONFIG_CACHE_OFF
+#include <linux/mm.h>
+
 /*
- * Nothing to do when the cache is disabled, initial flush and explicit
- * disabling is handled at CPU init time.
+ * Cache flushing:
  *
- * See arch/sh/kernel/cpu/init.c:cache_init().
+ *  - flush_cache_all() flushes entire cache
+ *  - flush_cache_mm(mm) flushes the specified mm context's cache lines
+ *  - flush_cache_dup mm(mm) handles cache flushing when forking
+ *  - flush_cache_page(mm, vmaddr, pfn) flushes a single page
+ *  - flush_cache_range(vma, start, end) flushes a range of pages
+ *
+ *  - flush_dcache_page(pg) flushes(wback&invalidates) a page for dcache
+ *  - flush_icache_range(start, end) flushes(invalidates) a range for icache
+ *  - flush_icache_page(vma, pg) flushes(invalidates) a page for icache
+ *  - flush_cache_sigtramp(vaddr) flushes the signal trampoline
  */
-#define p3_cache_init()				do { } while (0)
-#define flush_cache_all()			do { } while (0)
-#define flush_cache_mm(mm)			do { } while (0)
-#define flush_cache_dup_mm(mm)			do { } while (0)
-#define flush_cache_range(vma, start, end)	do { } while (0)
-#define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
-#define flush_dcache_page(page)			do { } while (0)
-#define flush_icache_range(start, end)		do { } while (0)
-#define flush_icache_page(vma,pg)		do { } while (0)
-#define flush_dcache_mmap_lock(mapping)		do { } while (0)
-#define flush_dcache_mmap_unlock(mapping)	do { } while (0)
-#define flush_cache_sigtramp(vaddr)		do { } while (0)
-#define flush_icache_user_range(vma,pg,adr,len)	do { } while (0)
-#define __flush_wback_region(start, size)	do { (void)(start); } while (0)
-#define __flush_purge_region(start, size)	do { (void)(start); } while (0)
-#define __flush_invalidate_region(start, size)	do { (void)(start); } while (0)
-#else
-#include <cpu/cacheflush.h>
+extern void (*local_flush_cache_all)(void *args);
+extern void (*local_flush_cache_mm)(void *args);
+extern void (*local_flush_cache_dup_mm)(void *args);
+extern void (*local_flush_cache_page)(void *args);
+extern void (*local_flush_cache_range)(void *args);
+extern void (*local_flush_dcache_page)(void *args);
+extern void (*local_flush_icache_range)(void *args);
+extern void (*local_flush_icache_page)(void *args);
+extern void (*local_flush_cache_sigtramp)(void *args);
 
-/*
- * Consistent DMA requires that the __flush_xxx() primitives must be set
- * for any of the enabled non-coherent caches (most of the UP CPUs),
- * regardless of PIPT or VIPT cache configurations.
- */
+static inline void cache_noop(void *args) { }
 
-/* Flush (write-back only) a region (smaller than a page) */
-extern void __flush_wback_region(void *start, int size);
-/* Flush (write-back & invalidate) a region (smaller than a page) */
-extern void __flush_purge_region(void *start, int size);
-/* Flush (invalidate only) a region (smaller than a page) */
-extern void __flush_invalidate_region(void *start, int size);
-#endif
+extern void (*__flush_wback_region)(void *start, int size);
+extern void (*__flush_purge_region)(void *start, int size);
+extern void (*__flush_invalidate_region)(void *start, int size);
+
+extern void flush_cache_all(void);
+extern void flush_cache_mm(struct mm_struct *mm);
+extern void flush_cache_dup_mm(struct mm_struct *mm);
+extern void flush_cache_page(struct vm_area_struct *vma,
+				unsigned long addr, unsigned long pfn);
+extern void flush_cache_range(struct vm_area_struct *vma,
+				 unsigned long start, unsigned long end);
+extern void flush_dcache_page(struct page *page);
+extern void flush_icache_range(unsigned long start, unsigned long end);
+extern void flush_icache_page(struct vm_area_struct *vma,
+				 struct page *page);
+extern void flush_cache_sigtramp(unsigned long address);
+
+struct flusher_data {
+	struct vm_area_struct *vma;
+	unsigned long addr1, addr2;
+};
+
+#define ARCH_HAS_FLUSH_ANON_PAGE
+extern void __flush_anon_page(struct page *page, unsigned long);
+
+static inline void flush_anon_page(struct vm_area_struct *vma,
+				   struct page *page, unsigned long vmaddr)
+{
+	if (boot_cpu_data.dcache.n_aliases && PageAnon(page))
+		__flush_anon_page(page, vmaddr);
+}
 
 #define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
 static inline void flush_kernel_dcache_page(struct page *page)
@@ -49,7 +69,6 @@
 	flush_dcache_page(page);
 }
 
-#if defined(CONFIG_CPU_SH4) && !defined(CONFIG_CACHE_OFF)
 extern void copy_to_user_page(struct vm_area_struct *vma,
 	struct page *page, unsigned long vaddr, void *dst, const void *src,
 	unsigned long len);
@@ -57,23 +76,20 @@
 extern void copy_from_user_page(struct vm_area_struct *vma,
 	struct page *page, unsigned long vaddr, void *dst, const void *src,
 	unsigned long len);
-#else
-#define copy_to_user_page(vma, page, vaddr, dst, src, len)	\
-	do {							\
-		flush_cache_page(vma, vaddr, page_to_pfn(page));\
-		memcpy(dst, src, len);				\
-		flush_icache_user_range(vma, page, vaddr, len);	\
-	} while (0)
-
-#define copy_from_user_page(vma, page, vaddr, dst, src, len)	\
-	do {							\
-		flush_cache_page(vma, vaddr, page_to_pfn(page));\
-		memcpy(dst, src, len);				\
-	} while (0)
-#endif
 
 #define flush_cache_vmap(start, end)		flush_cache_all()
 #define flush_cache_vunmap(start, end)		flush_cache_all()
 
+#define flush_dcache_mmap_lock(mapping)		do { } while (0)
+#define flush_dcache_mmap_unlock(mapping)	do { } while (0)
+
+void kmap_coherent_init(void);
+void *kmap_coherent(struct page *page, unsigned long addr);
+void kunmap_coherent(void);
+
+#define PG_dcache_dirty	PG_arch_1
+
+void cpu_cache_init(void);
+
 #endif /* __KERNEL__ */
 #endif /* __ASM_SH_CACHEFLUSH_H */
diff --git a/arch/sh/include/asm/mmu_context.h b/arch/sh/include/asm/mmu_context.h
index 67d8946..41080b1 100644
--- a/arch/sh/include/asm/mmu_context.h
+++ b/arch/sh/include/asm/mmu_context.h
@@ -69,7 +69,7 @@
 		 * We exhaust ASID of this version.
 		 * Flush all TLB and start new cycle.
 		 */
-		flush_tlb_all();
+		local_flush_tlb_all();
 
 #ifdef CONFIG_SUPERH64
 		/*
diff --git a/arch/sh/include/asm/page.h b/arch/sh/include/asm/page.h
index 49592c7..81bffc0 100644
--- a/arch/sh/include/asm/page.h
+++ b/arch/sh/include/asm/page.h
@@ -50,26 +50,24 @@
 extern unsigned long max_low_pfn, min_low_pfn;
 extern unsigned long memory_start, memory_end;
 
-extern void clear_page(void *to);
+static inline unsigned long
+pages_do_alias(unsigned long addr1, unsigned long addr2)
+{
+	return (addr1 ^ addr2) & shm_align_mask;
+}
+
+
+#define clear_page(page)	memset((void *)(page), 0, PAGE_SIZE)
 extern void copy_page(void *to, void *from);
 
-#if !defined(CONFIG_CACHE_OFF) && defined(CONFIG_MMU) && \
-	(defined(CONFIG_CPU_SH5) || defined(CONFIG_CPU_SH4) || \
-	 defined(CONFIG_SH7705_CACHE_32KB))
 struct page;
 struct vm_area_struct;
-extern void clear_user_page(void *to, unsigned long address, struct page *page);
-extern void copy_user_page(void *to, void *from, unsigned long address,
-			   struct page *page);
-#if defined(CONFIG_CPU_SH4)
+
 extern void copy_user_highpage(struct page *to, struct page *from,
 			       unsigned long vaddr, struct vm_area_struct *vma);
 #define __HAVE_ARCH_COPY_USER_HIGHPAGE
-#endif
-#else
-#define clear_user_page(page, vaddr, pg)	clear_page(page)
-#define copy_user_page(to, from, vaddr, pg)	copy_page(to, from)
-#endif
+extern void clear_user_highpage(struct page *page, unsigned long vaddr);
+#define clear_user_highpage	clear_user_highpage
 
 /*
  * These are used to make use of C type-checking..
diff --git a/arch/sh/include/asm/pgtable.h b/arch/sh/include/asm/pgtable.h
index 2a011b1..4f3efa7 100644
--- a/arch/sh/include/asm/pgtable.h
+++ b/arch/sh/include/asm/pgtable.h
@@ -36,6 +36,12 @@
 #define	NEFF_SIGN	(1LL << (NEFF - 1))
 #define	NEFF_MASK	(-1LL << NEFF)
 
+static inline unsigned long long neff_sign_extend(unsigned long val)
+{
+	unsigned long long extended = val;
+	return (extended & NEFF_SIGN) ? (extended | NEFF_MASK) : extended;
+}
+
 #ifdef CONFIG_29BIT
 #define NPHYS		29
 #else
@@ -133,27 +139,25 @@
  */
 #define pgtable_cache_init()	do { } while (0)
 
-#if !defined(CONFIG_CACHE_OFF) && (defined(CONFIG_CPU_SH4) || \
-	defined(CONFIG_SH7705_CACHE_32KB))
-struct mm_struct;
-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
-pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
-#endif
-
 struct vm_area_struct;
-extern void update_mmu_cache(struct vm_area_struct * vma,
-			     unsigned long address, pte_t pte);
+
+extern void __update_cache(struct vm_area_struct *vma,
+			   unsigned long address, pte_t pte);
+extern void __update_tlb(struct vm_area_struct *vma,
+			 unsigned long address, pte_t pte);
+
+static inline void
+update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t pte)
+{
+	__update_cache(vma, address, pte);
+	__update_tlb(vma, address, pte);
+}
+
 extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
 extern void paging_init(void);
 extern void page_table_range_init(unsigned long start, unsigned long end,
 				  pgd_t *pgd);
 
-#if !defined(CONFIG_CACHE_OFF) && defined(CONFIG_CPU_SH4) && defined(CONFIG_MMU)
-extern void kmap_coherent_init(void);
-#else
-#define kmap_coherent_init()	do { } while (0)
-#endif
-
 /* arch/sh/mm/mmap.c */
 #define HAVE_ARCH_UNMAPPED_AREA
 #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
diff --git a/arch/sh/include/asm/processor.h b/arch/sh/include/asm/processor.h
index 9d87868..017e0c1 100644
--- a/arch/sh/include/asm/processor.h
+++ b/arch/sh/include/asm/processor.h
@@ -44,6 +44,17 @@
 	CPU_SH_NONE
 };
 
+enum cpu_family {
+	CPU_FAMILY_SH2,
+	CPU_FAMILY_SH2A,
+	CPU_FAMILY_SH3,
+	CPU_FAMILY_SH4,
+	CPU_FAMILY_SH4A,
+	CPU_FAMILY_SH4AL_DSP,
+	CPU_FAMILY_SH5,
+	CPU_FAMILY_UNKNOWN,
+};
+
 /*
  * TLB information structure
  *
@@ -61,7 +72,7 @@
 };
 
 struct sh_cpuinfo {
-	unsigned int type;
+	unsigned int type, family;
 	int cut_major, cut_minor;
 	unsigned long loops_per_jiffy;
 	unsigned long asid_cache;
diff --git a/arch/sh/include/asm/system.h b/arch/sh/include/asm/system.h
index f9e2ceb..6b27223 100644
--- a/arch/sh/include/asm/system.h
+++ b/arch/sh/include/asm/system.h
@@ -14,18 +14,6 @@
 
 #define AT_VECTOR_SIZE_ARCH 5 /* entries in ARCH_DLINFO */
 
-#if defined(CONFIG_CPU_SH4A) || defined(CONFIG_CPU_SH5)
-#define __icbi()			\
-{					\
-	unsigned long __addr;		\
-	__addr = 0xa8000000;		\
-	__asm__ __volatile__(		\
-		"icbi   %0\n\t"		\
-		: /* no output */	\
-		: "m" (__m(__addr)));	\
-}
-#endif
-
 /*
  * A brief note on ctrl_barrier(), the control register write barrier.
  *
@@ -44,7 +32,7 @@
 #define mb()		__asm__ __volatile__ ("synco": : :"memory")
 #define rmb()		mb()
 #define wmb()		__asm__ __volatile__ ("synco": : :"memory")
-#define ctrl_barrier()	__icbi()
+#define ctrl_barrier()	__icbi(0xa8000000)
 #define read_barrier_depends()	do { } while(0)
 #else
 #define mb()		__asm__ __volatile__ ("": : :"memory")
diff --git a/arch/sh/include/asm/system_32.h b/arch/sh/include/asm/system_32.h
index d3ab269..607d413 100644
--- a/arch/sh/include/asm/system_32.h
+++ b/arch/sh/include/asm/system_32.h
@@ -63,6 +63,16 @@
 #define __restore_dsp(tsk)	do { } while (0)
 #endif
 
+#if defined(CONFIG_CPU_SH4A)
+#define __icbi(addr)	__asm__ __volatile__ ( "icbi @%0\n\t" : : "r" (addr))
+#else
+#define __icbi(addr)	mb()
+#endif
+
+#define __ocbp(addr)	__asm__ __volatile__ ( "ocbp @%0\n\t" : : "r" (addr))
+#define __ocbi(addr)	__asm__ __volatile__ ( "ocbi @%0\n\t" : : "r" (addr))
+#define __ocbwb(addr)	__asm__ __volatile__ ( "ocbwb @%0\n\t" : : "r" (addr))
+
 struct task_struct *__switch_to(struct task_struct *prev,
 				struct task_struct *next);
 
@@ -198,6 +208,11 @@
 })
 #endif
 
+static inline reg_size_t register_align(void *val)
+{
+	return (unsigned long)(signed long)val;
+}
+
 int handle_unaligned_access(insn_size_t instruction, struct pt_regs *regs,
 			    struct mem_access *ma, int);
 
diff --git a/arch/sh/include/asm/system_64.h b/arch/sh/include/asm/system_64.h
index 943acf5..8e4a03e 100644
--- a/arch/sh/include/asm/system_64.h
+++ b/arch/sh/include/asm/system_64.h
@@ -37,4 +37,14 @@
 #define jump_to_uncached()	do { } while (0)
 #define back_to_cached()	do { } while (0)
 
+#define __icbi(addr)	__asm__ __volatile__ ( "icbi %0, 0\n\t" : : "r" (addr))
+#define __ocbp(addr)	__asm__ __volatile__ ( "ocbp %0, 0\n\t" : : "r" (addr))
+#define __ocbi(addr)	__asm__ __volatile__ ( "ocbi %0, 0\n\t" : : "r" (addr))
+#define __ocbwb(addr)	__asm__ __volatile__ ( "ocbwb %0, 0\n\t" : : "r" (addr))
+
+static inline reg_size_t register_align(void *val)
+{
+	return (unsigned long long)(signed long long)(signed long)val;
+}
+
 #endif /* __ASM_SH_SYSTEM_64_H */
diff --git a/arch/sh/include/asm/types.h b/arch/sh/include/asm/types.h
index c7f3c94..f8421f7 100644
--- a/arch/sh/include/asm/types.h
+++ b/arch/sh/include/asm/types.h
@@ -11,8 +11,10 @@
 
 #ifdef CONFIG_SUPERH32
 typedef u16 insn_size_t;
+typedef u32 reg_size_t;
 #else
 typedef u32 insn_size_t;
+typedef u64 reg_size_t;
 #endif
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/sh/include/cpu-common/cpu/cacheflush.h b/arch/sh/include/cpu-common/cpu/cacheflush.h
deleted file mode 100644
index c3db00b..0000000
--- a/arch/sh/include/cpu-common/cpu/cacheflush.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * include/asm-sh/cpu-sh2/cacheflush.h
- *
- * Copyright (C) 2003 Paul Mundt
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-#ifndef __ASM_CPU_SH2_CACHEFLUSH_H
-#define __ASM_CPU_SH2_CACHEFLUSH_H
-
-/*
- * Cache flushing:
- *
- *  - flush_cache_all() flushes entire cache
- *  - flush_cache_mm(mm) flushes the specified mm context's cache lines
- *  - flush_cache_dup mm(mm) handles cache flushing when forking
- *  - flush_cache_page(mm, vmaddr, pfn) flushes a single page
- *  - flush_cache_range(vma, start, end) flushes a range of pages
- *
- *  - flush_dcache_page(pg) flushes(wback&invalidates) a page for dcache
- *  - flush_icache_range(start, end) flushes(invalidates) a range for icache
- *  - flush_icache_page(vma, pg) flushes(invalidates) a page for icache
- *
- *  Caches are indexed (effectively) by physical address on SH-2, so
- *  we don't need them.
- */
-#define flush_cache_all()			do { } while (0)
-#define flush_cache_mm(mm)			do { } while (0)
-#define flush_cache_dup_mm(mm)			do { } while (0)
-#define flush_cache_range(vma, start, end)	do { } while (0)
-#define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
-#define flush_dcache_page(page)			do { } while (0)
-#define flush_dcache_mmap_lock(mapping)		do { } while (0)
-#define flush_dcache_mmap_unlock(mapping)	do { } while (0)
-#define flush_icache_range(start, end)		do { } while (0)
-#define flush_icache_page(vma,pg)		do { } while (0)
-#define flush_icache_user_range(vma,pg,adr,len)	do { } while (0)
-#define flush_cache_sigtramp(vaddr)		do { } while (0)
-
-#define p3_cache_init()				do { } while (0)
-
-#endif /* __ASM_CPU_SH2_CACHEFLUSH_H */
diff --git a/arch/sh/include/cpu-sh2a/cpu/cacheflush.h b/arch/sh/include/cpu-sh2a/cpu/cacheflush.h
deleted file mode 100644
index 3d3b920..0000000
--- a/arch/sh/include/cpu-sh2a/cpu/cacheflush.h
+++ /dev/null
@@ -1,34 +0,0 @@
-#ifndef __ASM_CPU_SH2A_CACHEFLUSH_H
-#define __ASM_CPU_SH2A_CACHEFLUSH_H
-
-/* 
- * Cache flushing:
- *
- *  - flush_cache_all() flushes entire cache
- *  - flush_cache_mm(mm) flushes the specified mm context's cache lines
- *  - flush_cache_dup mm(mm) handles cache flushing when forking
- *  - flush_cache_page(mm, vmaddr, pfn) flushes a single page
- *  - flush_cache_range(vma, start, end) flushes a range of pages
- *
- *  - flush_dcache_page(pg) flushes(wback&invalidates) a page for dcache
- *  - flush_icache_range(start, end) flushes(invalidates) a range for icache
- *  - flush_icache_page(vma, pg) flushes(invalidates) a page for icache
- *
- *  Caches are indexed (effectively) by physical address on SH-2, so
- *  we don't need them.
- */
-#define flush_cache_all()			do { } while (0)
-#define flush_cache_mm(mm)			do { } while (0)
-#define flush_cache_dup_mm(mm)			do { } while (0)
-#define flush_cache_range(vma, start, end)	do { } while (0)
-#define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
-#define flush_dcache_page(page)			do { } while (0)
-#define flush_dcache_mmap_lock(mapping)		do { } while (0)
-#define flush_dcache_mmap_unlock(mapping)	do { } while (0)
-void flush_icache_range(unsigned long start, unsigned long end);
-#define flush_icache_page(vma,pg)		do { } while (0)
-#define flush_icache_user_range(vma,pg,adr,len)	do { } while (0)
-#define flush_cache_sigtramp(vaddr)		do { } while (0)
-
-#define p3_cache_init()				do { } while (0)
-#endif /* __ASM_CPU_SH2A_CACHEFLUSH_H */
diff --git a/arch/sh/include/cpu-sh3/cpu/cacheflush.h b/arch/sh/include/cpu-sh3/cpu/cacheflush.h
deleted file mode 100644
index 1ac27aa..0000000
--- a/arch/sh/include/cpu-sh3/cpu/cacheflush.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * include/asm-sh/cpu-sh3/cacheflush.h
- *
- * Copyright (C) 1999 Niibe Yutaka
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-#ifndef __ASM_CPU_SH3_CACHEFLUSH_H
-#define __ASM_CPU_SH3_CACHEFLUSH_H
-
-#if defined(CONFIG_SH7705_CACHE_32KB)
-/* SH7705 is an SH3 processor with 32KB cache. This has alias issues like the
- * SH4. Unlike the SH4 this is a unified cache so we need to do some work
- * in mmap when 'exec'ing a new binary
- */
- /* 32KB cache, 4kb PAGE sizes need to check bit 12 */
-#define CACHE_ALIAS 0x00001000
-
-#define PG_mapped	PG_arch_1
-
-void flush_cache_all(void);
-void flush_cache_mm(struct mm_struct *mm);
-#define flush_cache_dup_mm(mm) flush_cache_mm(mm)
-void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
-                              unsigned long end);
-void flush_cache_page(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn);
-void flush_dcache_page(struct page *pg);
-void flush_icache_range(unsigned long start, unsigned long end);
-void flush_icache_page(struct vm_area_struct *vma, struct page *page);
-
-#define flush_dcache_mmap_lock(mapping)		do { } while (0)
-#define flush_dcache_mmap_unlock(mapping)	do { } while (0)
-
-/* SH3 has unified cache so no special action needed here */
-#define flush_cache_sigtramp(vaddr)		do { } while (0)
-#define flush_icache_user_range(vma,pg,adr,len)	do { } while (0)
-
-#define p3_cache_init()				do { } while (0)
-
-#else
-#include <cpu-common/cpu/cacheflush.h>
-#endif
-
-#endif /* __ASM_CPU_SH3_CACHEFLUSH_H */
diff --git a/arch/sh/include/cpu-sh4/cpu/cacheflush.h b/arch/sh/include/cpu-sh4/cpu/cacheflush.h
deleted file mode 100644
index 065306d..0000000
--- a/arch/sh/include/cpu-sh4/cpu/cacheflush.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * include/asm-sh/cpu-sh4/cacheflush.h
- *
- * Copyright (C) 1999 Niibe Yutaka
- * Copyright (C) 2003 Paul Mundt
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-#ifndef __ASM_CPU_SH4_CACHEFLUSH_H
-#define __ASM_CPU_SH4_CACHEFLUSH_H
-
-/*
- *  Caches are broken on SH-4 (unless we use write-through
- *  caching; in which case they're only semi-broken),
- *  so we need them.
- */
-void flush_cache_all(void);
-void flush_dcache_all(void);
-void flush_cache_mm(struct mm_struct *mm);
-#define flush_cache_dup_mm(mm) flush_cache_mm(mm)
-void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
-		       unsigned long end);
-void flush_cache_page(struct vm_area_struct *vma, unsigned long addr,
-		      unsigned long pfn);
-void flush_dcache_page(struct page *pg);
-
-#define flush_dcache_mmap_lock(mapping)		do { } while (0)
-#define flush_dcache_mmap_unlock(mapping)	do { } while (0)
-
-void flush_icache_range(unsigned long start, unsigned long end);
-void flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
-			     unsigned long addr, int len);
-
-#define flush_icache_page(vma,pg)		do { } while (0)
-
-/* Initialization of P3 area for copy_user_page */
-void p3_cache_init(void);
-
-#define PG_mapped	PG_arch_1
-
-#endif /* __ASM_CPU_SH4_CACHEFLUSH_H */
diff --git a/arch/sh/include/cpu-sh5/cpu/cacheflush.h b/arch/sh/include/cpu-sh5/cpu/cacheflush.h
deleted file mode 100644
index 5a11f0b..0000000
--- a/arch/sh/include/cpu-sh5/cpu/cacheflush.h
+++ /dev/null
@@ -1,33 +0,0 @@
-#ifndef __ASM_SH_CPU_SH5_CACHEFLUSH_H
-#define __ASM_SH_CPU_SH5_CACHEFLUSH_H
-
-#ifndef __ASSEMBLY__
-
-struct vm_area_struct;
-struct page;
-struct mm_struct;
-
-extern void flush_cache_all(void);
-extern void flush_cache_mm(struct mm_struct *mm);
-extern void flush_cache_sigtramp(unsigned long vaddr);
-extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
-			      unsigned long end);
-extern void flush_cache_page(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn);
-extern void flush_dcache_page(struct page *pg);
-extern void flush_icache_range(unsigned long start, unsigned long end);
-extern void flush_icache_user_range(struct vm_area_struct *vma,
-				    struct page *page, unsigned long addr,
-				    int len);
-
-#define flush_cache_dup_mm(mm)	flush_cache_mm(mm)
-
-#define flush_dcache_mmap_lock(mapping)		do { } while (0)
-#define flush_dcache_mmap_unlock(mapping)	do { } while (0)
-
-#define flush_icache_page(vma, page)	do { } while (0)
-void p3_cache_init(void);
-
-#endif /* __ASSEMBLY__ */
-
-#endif /* __ASM_SH_CPU_SH5_CACHEFLUSH_H */
-
diff --git a/arch/sh/kernel/cpu/init.c b/arch/sh/kernel/cpu/init.c
index d40b9db..e932ebe 100644
--- a/arch/sh/kernel/cpu/init.c
+++ b/arch/sh/kernel/cpu/init.c
@@ -299,11 +299,9 @@
 	cache_init();
 
 	if (raw_smp_processor_id() == 0) {
-#ifdef CONFIG_MMU
 		shm_align_mask = max_t(unsigned long,
 				       current_cpu_data.dcache.way_size - 1,
 				       PAGE_SIZE - 1);
-#endif
 
 		/* Boot CPU sets the cache shape */
 		detect_cache_shape();
diff --git a/arch/sh/kernel/cpu/sh2/probe.c b/arch/sh/kernel/cpu/sh2/probe.c
index 5916d90..1db6d888 100644
--- a/arch/sh/kernel/cpu/sh2/probe.c
+++ b/arch/sh/kernel/cpu/sh2/probe.c
@@ -29,6 +29,7 @@
 	 */
 	boot_cpu_data.dcache.flags |= SH_CACHE_COMBINED;
 	boot_cpu_data.icache = boot_cpu_data.dcache;
+	boot_cpu_data.family = CPU_FAMILY_SH2;
 
 	return 0;
 }
diff --git a/arch/sh/kernel/cpu/sh2a/probe.c b/arch/sh/kernel/cpu/sh2a/probe.c
index e098e2f..6825d65 100644
--- a/arch/sh/kernel/cpu/sh2a/probe.c
+++ b/arch/sh/kernel/cpu/sh2a/probe.c
@@ -15,6 +15,8 @@
 
 int __init detect_cpu_and_cache_system(void)
 {
+	boot_cpu_data.family			= CPU_FAMILY_SH2A;
+
 	/* All SH-2A CPUs have support for 16 and 32-bit opcodes.. */
 	boot_cpu_data.flags			|= CPU_HAS_OP32;
 
diff --git a/arch/sh/kernel/cpu/sh3/entry.S b/arch/sh/kernel/cpu/sh3/entry.S
index 9421ec7..aebd33d 100644
--- a/arch/sh/kernel/cpu/sh3/entry.S
+++ b/arch/sh/kernel/cpu/sh3/entry.S
@@ -113,35 +113,34 @@
 #if defined(CONFIG_MMU)
 	.align	2
 ENTRY(tlb_miss_load)
-	bra	call_dpf
+	bra	call_handle_tlbmiss
 	 mov	#0, r5
 
 	.align	2
 ENTRY(tlb_miss_store)
-	bra	call_dpf
+	bra	call_handle_tlbmiss
 	 mov	#1, r5
 
 	.align	2
 ENTRY(initial_page_write)
-	bra	call_dpf
-	 mov	#1, r5
+	bra	call_handle_tlbmiss
+	 mov	#2, r5
 
 	.align	2
 ENTRY(tlb_protection_violation_load)
-	bra	call_dpf
+	bra	call_do_page_fault
 	 mov	#0, r5
 
 	.align	2
 ENTRY(tlb_protection_violation_store)
-	bra	call_dpf
+	bra	call_do_page_fault
 	 mov	#1, r5
 
-call_dpf:
+call_handle_tlbmiss:
 	setup_frame_reg
 	mov.l	1f, r0
 	mov	r5, r8
 	mov.l	@r0, r6
-	mov	r6, r9
 	mov.l	2f, r0
 	sts	pr, r10
 	jsr	@r0
@@ -152,16 +151,25 @@
 	 lds	r10, pr
 	rts
 	 nop
-0:	mov.l	3f, r0
-	mov	r9, r6
+0:
 	mov	r8, r5
+call_do_page_fault:
+	mov.l	1f, r0
+	mov.l	@r0, r6
+
+	sti
+
+	mov.l	3f, r0
+	mov.l	4f, r1
+	mov	r15, r4
 	jmp	@r0
-	 mov	r15, r4
+	 lds	r1, pr
 
 	.align 2
 1:	.long	MMU_TEA
-2:	.long	__do_page_fault
+2:	.long	handle_tlbmiss
 3:	.long	do_page_fault
+4:	.long	ret_from_exception
 
 	.align	2
 ENTRY(address_error_load)
diff --git a/arch/sh/kernel/cpu/sh3/probe.c b/arch/sh/kernel/cpu/sh3/probe.c
index 10f2a76..f9c7df6 100644
--- a/arch/sh/kernel/cpu/sh3/probe.c
+++ b/arch/sh/kernel/cpu/sh3/probe.c
@@ -107,5 +107,7 @@
 	boot_cpu_data.dcache.flags |= SH_CACHE_COMBINED;
 	boot_cpu_data.icache = boot_cpu_data.dcache;
 
+	boot_cpu_data.family = CPU_FAMILY_SH3;
+
 	return 0;
 }
diff --git a/arch/sh/kernel/cpu/sh4/probe.c b/arch/sh/kernel/cpu/sh4/probe.c
index afd3e73..d36f0c4 100644
--- a/arch/sh/kernel/cpu/sh4/probe.c
+++ b/arch/sh/kernel/cpu/sh4/probe.c
@@ -57,8 +57,12 @@
 	 * Setup some generic flags we can probe on SH-4A parts
 	 */
 	if (((pvr >> 16) & 0xff) == 0x10) {
-		if ((cvr & 0x10000000) == 0)
+		boot_cpu_data.family = CPU_FAMILY_SH4A;
+
+		if ((cvr & 0x10000000) == 0) {
 			boot_cpu_data.flags |= CPU_HAS_DSP;
+			boot_cpu_data.family = CPU_FAMILY_SH4AL_DSP;
+		}
 
 		boot_cpu_data.flags |= CPU_HAS_LLSC | CPU_HAS_PERF_COUNTER;
 		boot_cpu_data.cut_major = pvr & 0x7f;
@@ -68,6 +72,7 @@
 	} else {
 		/* And some SH-4 defaults.. */
 		boot_cpu_data.flags |= CPU_HAS_PTEA;
+		boot_cpu_data.family = CPU_FAMILY_SH4;
 	}
 
 	/* FPU detection works for everyone */
@@ -180,9 +185,6 @@
 		boot_cpu_data.dcache.ways = 2;
 
 		break;
-	default:
-		boot_cpu_data.type = CPU_SH_NONE;
-		break;
 	}
 
 	/*
diff --git a/arch/sh/kernel/cpu/sh4a/smp-shx3.c b/arch/sh/kernel/cpu/sh4a/smp-shx3.c
index 2b6b0d5..185ec39 100644
--- a/arch/sh/kernel/cpu/sh4a/smp-shx3.c
+++ b/arch/sh/kernel/cpu/sh4a/smp-shx3.c
@@ -57,6 +57,8 @@
 {
 	int i;
 
+	local_timer_setup(0);
+
 	BUILD_BUG_ON(SMP_MSG_NR >= 8);
 
 	for (i = 0; i < SMP_MSG_NR; i++)
diff --git a/arch/sh/kernel/cpu/sh5/probe.c b/arch/sh/kernel/cpu/sh5/probe.c
index 92ad844..521d05b 100644
--- a/arch/sh/kernel/cpu/sh5/probe.c
+++ b/arch/sh/kernel/cpu/sh5/probe.c
@@ -34,6 +34,8 @@
 		/* CPU.VCR aliased at CIR address on SH5-101 */
 		boot_cpu_data.type = CPU_SH5_101;
 
+	boot_cpu_data.family = CPU_FAMILY_SH5;
+
 	/*
 	 * First, setup some sane values for the I-cache.
 	 */
diff --git a/arch/sh/kernel/localtimer.c b/arch/sh/kernel/localtimer.c
index 96e8eae..0b04e7d 100644
--- a/arch/sh/kernel/localtimer.c
+++ b/arch/sh/kernel/localtimer.c
@@ -22,6 +22,7 @@
 #include <linux/jiffies.h>
 #include <linux/percpu.h>
 #include <linux/clockchips.h>
+#include <linux/hardirq.h>
 #include <linux/irq.h>
 
 static DEFINE_PER_CPU(struct clock_event_device, local_clockevent);
@@ -33,7 +34,9 @@
 {
 	struct clock_event_device *clk = &__get_cpu_var(local_clockevent);
 
+	irq_enter();
 	clk->event_handler(clk);
+	irq_exit();
 }
 
 static void dummy_timer_set_mode(enum clock_event_mode mode,
@@ -46,8 +49,10 @@
 	struct clock_event_device *clk = &per_cpu(local_clockevent, cpu);
 
 	clk->name		= "dummy_timer";
-	clk->features		= CLOCK_EVT_FEAT_DUMMY;
-	clk->rating		= 200;
+	clk->features		= CLOCK_EVT_FEAT_ONESHOT |
+				  CLOCK_EVT_FEAT_PERIODIC |
+				  CLOCK_EVT_FEAT_DUMMY;
+	clk->rating		= 400;
 	clk->mult		= 1;
 	clk->set_mode		= dummy_timer_set_mode;
 	clk->broadcast		= smp_timer_broadcast;
diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c
index 24de742..1192398 100644
--- a/arch/sh/kernel/process_64.c
+++ b/arch/sh/kernel/process_64.c
@@ -425,7 +425,6 @@
 		struct task_struct *p, struct pt_regs *regs)
 {
 	struct pt_regs *childregs;
-	unsigned long long se;			/* Sign extension */
 
 #ifdef CONFIG_SH_FPU
 	if(last_task_used_math == current) {
@@ -441,11 +440,19 @@
 
 	*childregs = *regs;
 
+	/*
+	 * Sign extend the edited stack.
+	 * Note that thread.pc and thread.pc will stay
+	 * 32-bit wide and context switch must take care
+	 * of NEFF sign extension.
+	 */
 	if (user_mode(regs)) {
-		childregs->regs[15] = usp;
+		childregs->regs[15] = neff_sign_extend(usp);
 		p->thread.uregs = childregs;
 	} else {
-		childregs->regs[15] = (unsigned long)task_stack_page(p) + THREAD_SIZE;
+		childregs->regs[15] =
+			neff_sign_extend((unsigned long)task_stack_page(p) +
+					 THREAD_SIZE);
 	}
 
 	childregs->regs[9] = 0; /* Set return value for child */
@@ -454,17 +461,6 @@
 	p->thread.sp = (unsigned long) childregs;
 	p->thread.pc = (unsigned long) ret_from_fork;
 
-	/*
-	 * Sign extend the edited stack.
-         * Note that thread.pc and thread.pc will stay
-	 * 32-bit wide and context switch must take care
-	 * of NEFF sign extension.
-	 */
-
-	se = childregs->regs[15];
-	se = (se & NEFF_SIGN) ? (se | NEFF_MASK) : se;
-	childregs->regs[15] = se;
-
 	return 0;
 }
 
diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c
index d13bbaf..f9d44f8 100644
--- a/arch/sh/kernel/setup.c
+++ b/arch/sh/kernel/setup.c
@@ -49,6 +49,7 @@
 struct sh_cpuinfo cpu_data[NR_CPUS] __read_mostly = {
 	[0] = {
 		.type			= CPU_SH_NONE,
+		.family			= CPU_FAMILY_UNKNOWN,
 		.loops_per_jiffy	= 10000000,
 	},
 };
diff --git a/arch/sh/kernel/sh_ksyms_32.c b/arch/sh/kernel/sh_ksyms_32.c
index cec6108..8dbe26b 100644
--- a/arch/sh/kernel/sh_ksyms_32.c
+++ b/arch/sh/kernel/sh_ksyms_32.c
@@ -101,11 +101,6 @@
 EXPORT_SYMBOL(flush_dcache_page);
 #endif
 
-#if !defined(CONFIG_CACHE_OFF) && defined(CONFIG_MMU) && \
-	(defined(CONFIG_CPU_SH4) || defined(CONFIG_SH7705_CACHE_32KB))
-EXPORT_SYMBOL(clear_user_page);
-#endif
-
 #ifdef CONFIG_MCOUNT
 DECLARE_EXPORT(mcount);
 #endif
@@ -114,7 +109,6 @@
 #ifdef CONFIG_IPV6
 EXPORT_SYMBOL(csum_ipv6_magic);
 #endif
-EXPORT_SYMBOL(clear_page);
 EXPORT_SYMBOL(copy_page);
 EXPORT_SYMBOL(__clear_user);
 EXPORT_SYMBOL(_ebss);
diff --git a/arch/sh/kernel/sh_ksyms_64.c b/arch/sh/kernel/sh_ksyms_64.c
index f5bd156..d008e17 100644
--- a/arch/sh/kernel/sh_ksyms_64.c
+++ b/arch/sh/kernel/sh_ksyms_64.c
@@ -30,14 +30,6 @@
 EXPORT_SYMBOL(dump_fpu);
 EXPORT_SYMBOL(kernel_thread);
 
-#if !defined(CONFIG_CACHE_OFF) && defined(CONFIG_MMU)
-EXPORT_SYMBOL(clear_user_page);
-#endif
-
-#ifndef CONFIG_CACHE_OFF
-EXPORT_SYMBOL(flush_dcache_page);
-#endif
-
 #ifdef CONFIG_VT
 EXPORT_SYMBOL(screen_info);
 #endif
@@ -52,7 +44,6 @@
 EXPORT_SYMBOL(__get_user_asm_q);
 EXPORT_SYMBOL(__strnlen_user);
 EXPORT_SYMBOL(__strncpy_from_user);
-EXPORT_SYMBOL(clear_page);
 EXPORT_SYMBOL(__clear_user);
 EXPORT_SYMBOL(copy_page);
 EXPORT_SYMBOL(__copy_user);
diff --git a/arch/sh/kernel/signal_64.c b/arch/sh/kernel/signal_64.c
index 0663a0e..026fd1c 100644
--- a/arch/sh/kernel/signal_64.c
+++ b/arch/sh/kernel/signal_64.c
@@ -561,13 +561,11 @@
 	/* Set up to return from userspace.  If provided, use a stub
 	   already in userspace.  */
 	if (ka->sa.sa_flags & SA_RESTORER) {
-		DEREF_REG_PR = (unsigned long) ka->sa.sa_restorer | 0x1;
-
 		/*
 		 * On SH5 all edited pointers are subject to NEFF
 		 */
-		DEREF_REG_PR = (DEREF_REG_PR & NEFF_SIGN) ?
-			(DEREF_REG_PR | NEFF_MASK) : DEREF_REG_PR;
+		DEREF_REG_PR = neff_sign_extend((unsigned long)
+			ka->sa.sa_restorer | 0x1);
 	} else {
 		/*
 		 * Different approach on SH5.
@@ -580,9 +578,8 @@
 		 * . being code, linker turns ShMedia bit on, always
 		 *   dereference index -1.
 		 */
-		DEREF_REG_PR = (unsigned long) frame->retcode | 0x01;
-		DEREF_REG_PR = (DEREF_REG_PR & NEFF_SIGN) ?
-			(DEREF_REG_PR | NEFF_MASK) : DEREF_REG_PR;
+		DEREF_REG_PR = neff_sign_extend((unsigned long)
+			frame->retcode | 0x01);
 
 		if (__copy_to_user(frame->retcode,
 			(void *)((unsigned long)sa_default_restorer & (~1)), 16) != 0)
@@ -596,9 +593,7 @@
 	 * Set up registers for signal handler.
 	 * All edited pointers are subject to NEFF.
 	 */
-	regs->regs[REG_SP] = (unsigned long) frame;
-	regs->regs[REG_SP] = (regs->regs[REG_SP] & NEFF_SIGN) ?
-		 (regs->regs[REG_SP] | NEFF_MASK) : regs->regs[REG_SP];
+	regs->regs[REG_SP] = neff_sign_extend((unsigned long)frame);
 	regs->regs[REG_ARG1] = signal; /* Arg for signal handler */
 
         /* FIXME:
@@ -613,8 +608,7 @@
 	regs->regs[REG_ARG2] = (unsigned long long)(unsigned long)(signed long)&frame->sc;
 	regs->regs[REG_ARG3] = (unsigned long long)(unsigned long)(signed long)&frame->sc;
 
-	regs->pc = (unsigned long) ka->sa.sa_handler;
-	regs->pc = (regs->pc & NEFF_SIGN) ? (regs->pc | NEFF_MASK) : regs->pc;
+	regs->pc = neff_sign_extend((unsigned long)ka->sa.sa_handler);
 
 	set_fs(USER_DS);
 
@@ -676,13 +670,11 @@
 	/* Set up to return from userspace.  If provided, use a stub
 	   already in userspace.  */
 	if (ka->sa.sa_flags & SA_RESTORER) {
-		DEREF_REG_PR = (unsigned long) ka->sa.sa_restorer | 0x1;
-
 		/*
 		 * On SH5 all edited pointers are subject to NEFF
 		 */
-		DEREF_REG_PR = (DEREF_REG_PR & NEFF_SIGN) ?
-			(DEREF_REG_PR | NEFF_MASK) : DEREF_REG_PR;
+		DEREF_REG_PR = neff_sign_extend((unsigned long)
+			ka->sa.sa_restorer | 0x1);
 	} else {
 		/*
 		 * Different approach on SH5.
@@ -695,15 +687,14 @@
 		 * . being code, linker turns ShMedia bit on, always
 		 *   dereference index -1.
 		 */
-
-		DEREF_REG_PR = (unsigned long) frame->retcode | 0x01;
-		DEREF_REG_PR = (DEREF_REG_PR & NEFF_SIGN) ?
-			(DEREF_REG_PR | NEFF_MASK) : DEREF_REG_PR;
+		DEREF_REG_PR = neff_sign_extend((unsigned long)
+			frame->retcode | 0x01);
 
 		if (__copy_to_user(frame->retcode,
 			(void *)((unsigned long)sa_default_rt_restorer & (~1)), 16) != 0)
 			goto give_sigsegv;
 
+		/* Cohere the trampoline with the I-cache. */
 		flush_icache_range(DEREF_REG_PR-1, DEREF_REG_PR-1+15);
 	}
 
@@ -711,14 +702,11 @@
 	 * Set up registers for signal handler.
 	 * All edited pointers are subject to NEFF.
 	 */
-	regs->regs[REG_SP] = (unsigned long) frame;
-	regs->regs[REG_SP] = (regs->regs[REG_SP] & NEFF_SIGN) ?
-		 (regs->regs[REG_SP] | NEFF_MASK) : regs->regs[REG_SP];
+	regs->regs[REG_SP] = neff_sign_extend((unsigned long)frame);
 	regs->regs[REG_ARG1] = signal; /* Arg for signal handler */
 	regs->regs[REG_ARG2] = (unsigned long long)(unsigned long)(signed long)&frame->info;
 	regs->regs[REG_ARG3] = (unsigned long long)(unsigned long)(signed long)&frame->uc.uc_mcontext;
-	regs->pc = (unsigned long) ka->sa.sa_handler;
-	regs->pc = (regs->pc & NEFF_SIGN) ? (regs->pc | NEFF_MASK) : regs->pc;
+	regs->pc = neff_sign_extend((unsigned long)ka->sa.sa_handler);
 
 	set_fs(USER_DS);
 
diff --git a/arch/sh/kernel/time.c b/arch/sh/kernel/time.c
index 7f95f47..632aff5 100644
--- a/arch/sh/kernel/time.c
+++ b/arch/sh/kernel/time.c
@@ -119,9 +119,5 @@
 	set_normalized_timespec(&wall_to_monotonic,
 				-xtime.tv_sec, -xtime.tv_nsec);
 
-#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
-	local_timer_setup(smp_processor_id());
-#endif
-
 	late_time_init = sh_late_time_init;
 }
diff --git a/arch/sh/lib/Makefile b/arch/sh/lib/Makefile
index c2b28d8..a969b47 100644
--- a/arch/sh/lib/Makefile
+++ b/arch/sh/lib/Makefile
@@ -23,7 +23,7 @@
 memcpy-y			:= memcpy.o
 memcpy-$(CONFIG_CPU_SH4)	:= memcpy-sh4.o
 
-lib-$(CONFIG_MMU)		+= copy_page.o clear_page.o
+lib-$(CONFIG_MMU)		+= copy_page.o __clear_user.o
 lib-$(CONFIG_MCOUNT)		+= mcount.o
 lib-y				+= $(memcpy-y) $(udivsi3-y)
 
diff --git a/arch/sh/lib/clear_page.S b/arch/sh/lib/__clear_user.S
similarity index 76%
rename from arch/sh/lib/clear_page.S
rename to arch/sh/lib/__clear_user.S
index c92244d..db1dca7 100644
--- a/arch/sh/lib/clear_page.S
+++ b/arch/sh/lib/__clear_user.S
@@ -8,52 +8,6 @@
 #include <linux/linkage.h>
 #include <asm/page.h>
 
-/*
- * clear_page
- * @to: P1 address
- *
- * void clear_page(void *to)
- */
-
-/*
- * r0 --- scratch
- * r4 --- to
- * r5 --- to + PAGE_SIZE
- */
-ENTRY(clear_page)
-	mov	r4,r5
-	mov.l	.Llimit,r0
-	add	r0,r5
-	mov	#0,r0
-	!
-1:
-#if defined(CONFIG_CPU_SH4)
-	movca.l	r0,@r4
-	mov	r4,r1
-#else
-	mov.l	r0,@r4
-#endif
-	add	#32,r4
-	mov.l	r0,@-r4
-	mov.l	r0,@-r4
-	mov.l	r0,@-r4
-	mov.l	r0,@-r4
-	mov.l	r0,@-r4
-	mov.l	r0,@-r4
-	mov.l	r0,@-r4
-#if defined(CONFIG_CPU_SH4)
-	ocbwb	@r1
-#endif
-	cmp/eq	r5,r4
-	bf/s	1b
-	 add	#28,r4
-	!
-	rts
-	 nop
-
-	.balign 4
-.Llimit:	.long	(PAGE_SIZE-28)
-
 ENTRY(__clear_user)
 	!
 	mov	#0, r0
diff --git a/arch/sh/lib64/Makefile b/arch/sh/lib64/Makefile
index 334bb2d..1fee75a 100644
--- a/arch/sh/lib64/Makefile
+++ b/arch/sh/lib64/Makefile
@@ -11,7 +11,7 @@
 
 # Panic should really be compiled as PIC
 lib-y  := udelay.o dbg.o panic.o memcpy.o memset.o \
-	  copy_user_memcpy.o copy_page.o clear_page.o strcpy.o strlen.o
+	  copy_user_memcpy.o copy_page.o strcpy.o strlen.o
 
 # Extracted from libgcc
 lib-y	+= udivsi3.o udivdi3.o sdivsi3.o
diff --git a/arch/sh/lib64/clear_page.S b/arch/sh/lib64/clear_page.S
deleted file mode 100644
index 007ab48..0000000
--- a/arch/sh/lib64/clear_page.S
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
-   Copyright 2003 Richard Curnow, SuperH (UK) Ltd.
-
-   This file is subject to the terms and conditions of the GNU General Public
-   License.  See the file "COPYING" in the main directory of this archive
-   for more details.
-
-   Tight version of memset for the case of just clearing a page.  It turns out
-   that having the alloco's spaced out slightly due to the increment/branch
-   pair causes them to contend less for access to the cache.  Similarly,
-   keeping the stores apart from the allocos causes less contention.  => Do two
-   separate loops.  Do multiple stores per loop to amortise the
-   increment/branch cost a little.
-
-   Parameters:
-   r2 : source effective address (start of page)
-
-   Always clears 4096 bytes.
-
-   Note : alloco guarded by synco to avoid TAKum03020 erratum
-
-*/
-
-	.section .text..SHmedia32,"ax"
-	.little
-
-	.balign 8
-	.global clear_page
-clear_page:
-	pta/l 1f, tr1
-	pta/l 2f, tr2
-	ptabs/l r18, tr0
-
-	movi 4096, r7
-	add  r2, r7, r7
-	add  r2, r63, r6
-1:
-	alloco r6, 0
-	synco	! TAKum03020
-	addi	r6, 32, r6
-	bgt/l	r7, r6, tr1
-
-	add  r2, r63, r6
-2:
-	st.q  r6,   0, r63
-	st.q  r6,   8, r63
-	st.q  r6,  16, r63
-	st.q  r6,  24, r63
-	addi r6, 32, r6
-	bgt/l r7, r6, tr2
-
-	blink tr0, r63
-
-
diff --git a/arch/sh/mm/Makefile b/arch/sh/mm/Makefile
index 9f4bc3d..3759bf8 100644
--- a/arch/sh/mm/Makefile
+++ b/arch/sh/mm/Makefile
@@ -1,5 +1,65 @@
-ifeq ($(CONFIG_SUPERH32),y)
-include ${srctree}/arch/sh/mm/Makefile_32
-else
-include ${srctree}/arch/sh/mm/Makefile_64
+#
+# Makefile for the Linux SuperH-specific parts of the memory manager.
+#
+
+obj-y			:= cache.o init.o consistent.o mmap.o
+
+cacheops-$(CONFIG_CPU_SH2)		:= cache-sh2.o
+cacheops-$(CONFIG_CPU_SH2A)		:= cache-sh2a.o
+cacheops-$(CONFIG_CPU_SH3)		:= cache-sh3.o
+cacheops-$(CONFIG_CPU_SH4)		:= cache-sh4.o flush-sh4.o
+cacheops-$(CONFIG_CPU_SH5)		:= cache-sh5.o flush-sh4.o
+cacheops-$(CONFIG_SH7705_CACHE_32KB)	+= cache-sh7705.o
+
+obj-y			+= $(cacheops-y)
+
+mmu-y			:= nommu.o extable_32.o
+mmu-$(CONFIG_MMU)	:= extable_$(BITS).o fault_$(BITS).o \
+			   ioremap_$(BITS).o kmap.o tlbflush_$(BITS).o
+
+obj-y			+= $(mmu-y)
+obj-$(CONFIG_DEBUG_FS)	+= asids-debugfs.o
+
+ifdef CONFIG_DEBUG_FS
+obj-$(CONFIG_CPU_SH4)	+= cache-debugfs.o
 endif
+
+ifdef CONFIG_MMU
+tlb-$(CONFIG_CPU_SH3)		:= tlb-sh3.o
+tlb-$(CONFIG_CPU_SH4)		:= tlb-sh4.o
+tlb-$(CONFIG_CPU_SH5)		:= tlb-sh5.o
+tlb-$(CONFIG_CPU_HAS_PTEAEX)	:= tlb-pteaex.o
+obj-y				+= $(tlb-y)
+endif
+
+obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
+obj-$(CONFIG_PMB)		+= pmb.o
+obj-$(CONFIG_PMB_FIXED)		+= pmb-fixed.o
+obj-$(CONFIG_NUMA)		+= numa.o
+
+# Special flags for fault_64.o.  This puts restrictions on the number of
+# caller-save registers that the compiler can target when building this file.
+# This is required because the code is called from a context in entry.S where
+# very few registers have been saved in the exception handler (for speed
+# reasons).
+# The caller save registers that have been saved and which can be used are
+# r2,r3,r4,r5 : argument passing
+# r15, r18 : SP and LINK
+# tr0-4 : allow all caller-save TR's.  The compiler seems to be able to make
+#         use of them, so it's probably beneficial to performance to save them
+#         and have them available for it.
+#
+# The resources not listed below are callee save, i.e. the compiler is free to
+# use any of them and will spill them to the stack itself.
+
+CFLAGS_fault_64.o += -ffixed-r7 \
+	-ffixed-r8 -ffixed-r9 -ffixed-r10 -ffixed-r11 -ffixed-r12 \
+	-ffixed-r13 -ffixed-r14 -ffixed-r16 -ffixed-r17 -ffixed-r19 \
+	-ffixed-r20 -ffixed-r21 -ffixed-r22 -ffixed-r23 \
+	-ffixed-r24 -ffixed-r25 -ffixed-r26 -ffixed-r27 \
+	-ffixed-r36 -ffixed-r37 -ffixed-r38 -ffixed-r39 -ffixed-r40 \
+	-ffixed-r41 -ffixed-r42 -ffixed-r43  \
+	-ffixed-r60 -ffixed-r61 -ffixed-r62 \
+	-fomit-frame-pointer
+
+EXTRA_CFLAGS += -Werror
diff --git a/arch/sh/mm/Makefile_32 b/arch/sh/mm/Makefile_32
deleted file mode 100644
index 986a1e0..0000000
--- a/arch/sh/mm/Makefile_32
+++ /dev/null
@@ -1,43 +0,0 @@
-#
-# Makefile for the Linux SuperH-specific parts of the memory manager.
-#
-
-obj-y			:= init.o extable_32.o consistent.o mmap.o
-
-ifndef CONFIG_CACHE_OFF
-cache-$(CONFIG_CPU_SH2)		:= cache-sh2.o
-cache-$(CONFIG_CPU_SH2A)	:= cache-sh2a.o
-cache-$(CONFIG_CPU_SH3)		:= cache-sh3.o
-cache-$(CONFIG_CPU_SH4)		:= cache-sh4.o
-cache-$(CONFIG_SH7705_CACHE_32KB)	+= cache-sh7705.o
-endif
-
-obj-y			+= $(cache-y)
-
-mmu-y			:= tlb-nommu.o pg-nommu.o
-mmu-$(CONFIG_MMU)	:= fault_32.o tlbflush_32.o ioremap_32.o
-
-obj-y			+= $(mmu-y)
-obj-$(CONFIG_DEBUG_FS)	+= asids-debugfs.o
-
-ifdef CONFIG_DEBUG_FS
-obj-$(CONFIG_CPU_SH4)	+= cache-debugfs.o
-endif
-
-ifdef CONFIG_MMU
-tlb-$(CONFIG_CPU_SH3)		:= tlb-sh3.o
-tlb-$(CONFIG_CPU_SH4)		:= tlb-sh4.o
-tlb-$(CONFIG_CPU_HAS_PTEAEX)	:= tlb-pteaex.o
-obj-y				+= $(tlb-y)
-ifndef CONFIG_CACHE_OFF
-obj-$(CONFIG_CPU_SH4)		+= pg-sh4.o
-obj-$(CONFIG_SH7705_CACHE_32KB)	+= pg-sh7705.o
-endif
-endif
-
-obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
-obj-$(CONFIG_PMB)		+= pmb.o
-obj-$(CONFIG_PMB_FIXED)		+= pmb-fixed.o
-obj-$(CONFIG_NUMA)		+= numa.o
-
-EXTRA_CFLAGS += -Werror
diff --git a/arch/sh/mm/Makefile_64 b/arch/sh/mm/Makefile_64
deleted file mode 100644
index 2863ffb..0000000
--- a/arch/sh/mm/Makefile_64
+++ /dev/null
@@ -1,46 +0,0 @@
-#
-# Makefile for the Linux SuperH-specific parts of the memory manager.
-#
-
-obj-y			:= init.o consistent.o mmap.o
-
-mmu-y			:= tlb-nommu.o pg-nommu.o extable_32.o
-mmu-$(CONFIG_MMU)	:= fault_64.o ioremap_64.o tlbflush_64.o tlb-sh5.o \
-			   extable_64.o
-
-ifndef CONFIG_CACHE_OFF
-obj-y			+= cache-sh5.o
-endif
-
-obj-y			+= $(mmu-y)
-obj-$(CONFIG_DEBUG_FS)	+= asids-debugfs.o
-
-obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
-obj-$(CONFIG_NUMA)		+= numa.o
-
-EXTRA_CFLAGS += -Werror
-
-# Special flags for fault_64.o.  This puts restrictions on the number of
-# caller-save registers that the compiler can target when building this file.
-# This is required because the code is called from a context in entry.S where
-# very few registers have been saved in the exception handler (for speed
-# reasons).
-# The caller save registers that have been saved and which can be used are
-# r2,r3,r4,r5 : argument passing
-# r15, r18 : SP and LINK
-# tr0-4 : allow all caller-save TR's.  The compiler seems to be able to make
-#         use of them, so it's probably beneficial to performance to save them
-#         and have them available for it.
-#
-# The resources not listed below are callee save, i.e. the compiler is free to
-# use any of them and will spill them to the stack itself.
-
-CFLAGS_fault_64.o += -ffixed-r7 \
-	-ffixed-r8 -ffixed-r9 -ffixed-r10 -ffixed-r11 -ffixed-r12 \
-	-ffixed-r13 -ffixed-r14 -ffixed-r16 -ffixed-r17 -ffixed-r19 \
-	-ffixed-r20 -ffixed-r21 -ffixed-r22 -ffixed-r23 \
-	-ffixed-r24 -ffixed-r25 -ffixed-r26 -ffixed-r27 \
-	-ffixed-r36 -ffixed-r37 -ffixed-r38 -ffixed-r39 -ffixed-r40 \
-	-ffixed-r41 -ffixed-r42 -ffixed-r43  \
-	-ffixed-r60 -ffixed-r61 -ffixed-r62 \
-	-fomit-frame-pointer
diff --git a/arch/sh/mm/cache-sh2.c b/arch/sh/mm/cache-sh2.c
index c4e80d2..699a71f 100644
--- a/arch/sh/mm/cache-sh2.c
+++ b/arch/sh/mm/cache-sh2.c
@@ -16,7 +16,7 @@
 #include <asm/cacheflush.h>
 #include <asm/io.h>
 
-void __flush_wback_region(void *start, int size)
+static void sh2__flush_wback_region(void *start, int size)
 {
 	unsigned long v;
 	unsigned long begin, end;
@@ -37,7 +37,7 @@
 	}
 }
 
-void __flush_purge_region(void *start, int size)
+static void sh2__flush_purge_region(void *start, int size)
 {
 	unsigned long v;
 	unsigned long begin, end;
@@ -51,7 +51,7 @@
 			  CACHE_OC_ADDRESS_ARRAY | (v & 0x00000ff0) | 0x00000008);
 }
 
-void __flush_invalidate_region(void *start, int size)
+static void sh2__flush_invalidate_region(void *start, int size)
 {
 #ifdef CONFIG_CACHE_WRITEBACK
 	/*
@@ -82,3 +82,10 @@
 			  CACHE_OC_ADDRESS_ARRAY | (v & 0x00000ff0) | 0x00000008);
 #endif
 }
+
+void __init sh2_cache_init(void)
+{
+	__flush_wback_region		= sh2__flush_wback_region;
+	__flush_purge_region		= sh2__flush_purge_region;
+	__flush_invalidate_region	= sh2__flush_invalidate_region;
+}
diff --git a/arch/sh/mm/cache-sh2a.c b/arch/sh/mm/cache-sh2a.c
index 24d86a7..d783361 100644
--- a/arch/sh/mm/cache-sh2a.c
+++ b/arch/sh/mm/cache-sh2a.c
@@ -15,7 +15,7 @@
 #include <asm/cacheflush.h>
 #include <asm/io.h>
 
-void __flush_wback_region(void *start, int size)
+static void sh2a__flush_wback_region(void *start, int size)
 {
 	unsigned long v;
 	unsigned long begin, end;
@@ -44,7 +44,7 @@
 	local_irq_restore(flags);
 }
 
-void __flush_purge_region(void *start, int size)
+static void sh2a__flush_purge_region(void *start, int size)
 {
 	unsigned long v;
 	unsigned long begin, end;
@@ -65,7 +65,7 @@
 	local_irq_restore(flags);
 }
 
-void __flush_invalidate_region(void *start, int size)
+static void sh2a__flush_invalidate_region(void *start, int size)
 {
 	unsigned long v;
 	unsigned long begin, end;
@@ -97,15 +97,15 @@
 }
 
 /* WBack O-Cache and flush I-Cache */
-void flush_icache_range(unsigned long start, unsigned long end)
+static void sh2a_flush_icache_range(void *args)
 {
+	struct flusher_data *data = args;
+	unsigned long start, end;
 	unsigned long v;
-	unsigned long flags;
 
-	start = start & ~(L1_CACHE_BYTES-1);
-	end = (end + L1_CACHE_BYTES-1) & ~(L1_CACHE_BYTES-1);
+	start = data->addr1 & ~(L1_CACHE_BYTES-1);
+	end = (data->addr2 + L1_CACHE_BYTES-1) & ~(L1_CACHE_BYTES-1);
 
-	local_irq_save(flags);
 	jump_to_uncached();
 
 	for (v = start; v < end; v+=L1_CACHE_BYTES) {
@@ -120,10 +120,17 @@
 			}
 		}
 		/* I-Cache invalidate */
-		ctrl_outl(addr,
-			  CACHE_IC_ADDRESS_ARRAY | addr | 0x00000008);
+		ctrl_outl(addr, CACHE_IC_ADDRESS_ARRAY | addr | 0x00000008);
 	}
 
 	back_to_cached();
-	local_irq_restore(flags);
+}
+
+void __init sh2a_cache_init(void)
+{
+	local_flush_icache_range	= sh2a_flush_icache_range;
+
+	__flush_wback_region		= sh2a__flush_wback_region;
+	__flush_purge_region		= sh2a__flush_purge_region;
+	__flush_invalidate_region	= sh2a__flush_invalidate_region;
 }
diff --git a/arch/sh/mm/cache-sh3.c b/arch/sh/mm/cache-sh3.c
index 6d1dbec..faef80c 100644
--- a/arch/sh/mm/cache-sh3.c
+++ b/arch/sh/mm/cache-sh3.c
@@ -32,7 +32,7 @@
  * SIZE: Size of the region.
  */
 
-void __flush_wback_region(void *start, int size)
+static void sh3__flush_wback_region(void *start, int size)
 {
 	unsigned long v, j;
 	unsigned long begin, end;
@@ -71,7 +71,7 @@
  * START: Virtual Address (U0, P1, or P3)
  * SIZE: Size of the region.
  */
-void __flush_purge_region(void *start, int size)
+static void sh3__flush_purge_region(void *start, int size)
 {
 	unsigned long v;
 	unsigned long begin, end;
@@ -90,11 +90,16 @@
 	}
 }
 
-/*
- * No write back please
- *
- * Except I don't think there's any way to avoid the writeback. So we
- * just alias it to __flush_purge_region(). dwmw2.
- */
-void __flush_invalidate_region(void *start, int size)
-	__attribute__((alias("__flush_purge_region")));
+void __init sh3_cache_init(void)
+{
+	__flush_wback_region = sh3__flush_wback_region;
+	__flush_purge_region = sh3__flush_purge_region;
+
+	/*
+	 * No write back please
+	 *
+	 * Except I don't think there's any way to avoid the writeback.
+	 * So we just alias it to sh3__flush_purge_region(). dwmw2.
+	 */
+	__flush_invalidate_region = sh3__flush_purge_region;
+}
diff --git a/arch/sh/mm/cache-sh4.c b/arch/sh/mm/cache-sh4.c
index b36a9c9..70fb906 100644
--- a/arch/sh/mm/cache-sh4.c
+++ b/arch/sh/mm/cache-sh4.c
@@ -14,6 +14,7 @@
 #include <linux/mm.h>
 #include <linux/io.h>
 #include <linux/mutex.h>
+#include <linux/fs.h>
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
 
@@ -25,14 +26,6 @@
 #define MAX_DCACHE_PAGES	64	/* XXX: Tune for ways */
 #define MAX_ICACHE_PAGES	32
 
-static void __flush_dcache_segment_writethrough(unsigned long start,
-						unsigned long extent);
-static void __flush_dcache_segment_1way(unsigned long start,
-					unsigned long extent);
-static void __flush_dcache_segment_2way(unsigned long start,
-					unsigned long extent);
-static void __flush_dcache_segment_4way(unsigned long start,
-					unsigned long extent);
 static void __flush_cache_4096(unsigned long addr, unsigned long phys,
 			       unsigned long exec_offset);
 
@@ -44,196 +37,55 @@
 static void (*__flush_dcache_segment_fn)(unsigned long, unsigned long) =
 	(void (*)(unsigned long, unsigned long))0xdeadbeef;
 
-static void compute_alias(struct cache_info *c)
-{
-	c->alias_mask = ((c->sets - 1) << c->entry_shift) & ~(PAGE_SIZE - 1);
-	c->n_aliases = c->alias_mask ? (c->alias_mask >> PAGE_SHIFT) + 1 : 0;
-}
-
-static void __init emit_cache_params(void)
-{
-	printk("PVR=%08x CVR=%08x PRR=%08x\n",
-		ctrl_inl(CCN_PVR),
-		ctrl_inl(CCN_CVR),
-		ctrl_inl(CCN_PRR));
-	printk("I-cache : n_ways=%d n_sets=%d way_incr=%d\n",
-		boot_cpu_data.icache.ways,
-		boot_cpu_data.icache.sets,
-		boot_cpu_data.icache.way_incr);
-	printk("I-cache : entry_mask=0x%08x alias_mask=0x%08x n_aliases=%d\n",
-		boot_cpu_data.icache.entry_mask,
-		boot_cpu_data.icache.alias_mask,
-		boot_cpu_data.icache.n_aliases);
-	printk("D-cache : n_ways=%d n_sets=%d way_incr=%d\n",
-		boot_cpu_data.dcache.ways,
-		boot_cpu_data.dcache.sets,
-		boot_cpu_data.dcache.way_incr);
-	printk("D-cache : entry_mask=0x%08x alias_mask=0x%08x n_aliases=%d\n",
-		boot_cpu_data.dcache.entry_mask,
-		boot_cpu_data.dcache.alias_mask,
-		boot_cpu_data.dcache.n_aliases);
-
-	/*
-	 * Emit Secondary Cache parameters if the CPU has a probed L2.
-	 */
-	if (boot_cpu_data.flags & CPU_HAS_L2_CACHE) {
-		printk("S-cache : n_ways=%d n_sets=%d way_incr=%d\n",
-			boot_cpu_data.scache.ways,
-			boot_cpu_data.scache.sets,
-			boot_cpu_data.scache.way_incr);
-		printk("S-cache : entry_mask=0x%08x alias_mask=0x%08x n_aliases=%d\n",
-			boot_cpu_data.scache.entry_mask,
-			boot_cpu_data.scache.alias_mask,
-			boot_cpu_data.scache.n_aliases);
-	}
-
-	if (!__flush_dcache_segment_fn)
-		panic("unknown number of cache ways\n");
-}
-
-/*
- * SH-4 has virtually indexed and physically tagged cache.
- */
-void __init p3_cache_init(void)
-{
-	unsigned int wt_enabled = !!(__raw_readl(CCR) & CCR_CACHE_WT);
-
-	compute_alias(&boot_cpu_data.icache);
-	compute_alias(&boot_cpu_data.dcache);
-	compute_alias(&boot_cpu_data.scache);
-
-	if (wt_enabled) {
-		__flush_dcache_segment_fn = __flush_dcache_segment_writethrough;
-		goto out;
-	}
-
-	switch (boot_cpu_data.dcache.ways) {
-	case 1:
-		__flush_dcache_segment_fn = __flush_dcache_segment_1way;
-		break;
-	case 2:
-		__flush_dcache_segment_fn = __flush_dcache_segment_2way;
-		break;
-	case 4:
-		__flush_dcache_segment_fn = __flush_dcache_segment_4way;
-		break;
-	default:
-		__flush_dcache_segment_fn = NULL;
-		break;
-	}
-
-out:
-	emit_cache_params();
-}
-
-/*
- * Write back the dirty D-caches, but not invalidate them.
- *
- * START: Virtual Address (U0, P1, or P3)
- * SIZE: Size of the region.
- */
-void __flush_wback_region(void *start, int size)
-{
-	unsigned long v;
-	unsigned long begin, end;
-
-	begin = (unsigned long)start & ~(L1_CACHE_BYTES-1);
-	end = ((unsigned long)start + size + L1_CACHE_BYTES-1)
-		& ~(L1_CACHE_BYTES-1);
-	for (v = begin; v < end; v+=L1_CACHE_BYTES) {
-		asm volatile("ocbwb	%0"
-			     : /* no output */
-			     : "m" (__m(v)));
-	}
-}
-
-/*
- * Write back the dirty D-caches and invalidate them.
- *
- * START: Virtual Address (U0, P1, or P3)
- * SIZE: Size of the region.
- */
-void __flush_purge_region(void *start, int size)
-{
-	unsigned long v;
-	unsigned long begin, end;
-
-	begin = (unsigned long)start & ~(L1_CACHE_BYTES-1);
-	end = ((unsigned long)start + size + L1_CACHE_BYTES-1)
-		& ~(L1_CACHE_BYTES-1);
-	for (v = begin; v < end; v+=L1_CACHE_BYTES) {
-		asm volatile("ocbp	%0"
-			     : /* no output */
-			     : "m" (__m(v)));
-	}
-}
-
-/*
- * No write back please
- */
-void __flush_invalidate_region(void *start, int size)
-{
-	unsigned long v;
-	unsigned long begin, end;
-
-	begin = (unsigned long)start & ~(L1_CACHE_BYTES-1);
-	end = ((unsigned long)start + size + L1_CACHE_BYTES-1)
-		& ~(L1_CACHE_BYTES-1);
-	for (v = begin; v < end; v+=L1_CACHE_BYTES) {
-		asm volatile("ocbi	%0"
-			     : /* no output */
-			     : "m" (__m(v)));
-	}
-}
-
 /*
  * Write back the range of D-cache, and purge the I-cache.
  *
  * Called from kernel/module.c:sys_init_module and routine for a.out format,
  * signal handler code and kprobes code
  */
-void flush_icache_range(unsigned long start, unsigned long end)
+static void sh4_flush_icache_range(void *args)
 {
+	struct flusher_data *data = args;
 	int icacheaddr;
-	unsigned long flags, v;
+	unsigned long start, end;
+	unsigned long v;
 	int i;
 
-       /* If there are too many pages then just blow the caches */
-        if (((end - start) >> PAGE_SHIFT) >= MAX_ICACHE_PAGES) {
-                flush_cache_all();
-       } else {
-               /* selectively flush d-cache then invalidate the i-cache */
-               /* this is inefficient, so only use for small ranges */
-               start &= ~(L1_CACHE_BYTES-1);
-               end += L1_CACHE_BYTES-1;
-               end &= ~(L1_CACHE_BYTES-1);
+	start = data->addr1;
+	end = data->addr2;
 
-               local_irq_save(flags);
-               jump_to_uncached();
+	/* If there are too many pages then just blow the caches */
+	if (((end - start) >> PAGE_SHIFT) >= MAX_ICACHE_PAGES) {
+		local_flush_cache_all(args);
+	} else {
+		/* selectively flush d-cache then invalidate the i-cache */
+		/* this is inefficient, so only use for small ranges */
+		start &= ~(L1_CACHE_BYTES-1);
+		end += L1_CACHE_BYTES-1;
+		end &= ~(L1_CACHE_BYTES-1);
 
-               for (v = start; v < end; v+=L1_CACHE_BYTES) {
-                       asm volatile("ocbwb     %0"
-                                    : /* no output */
-                                    : "m" (__m(v)));
+		jump_to_uncached();
 
-                       icacheaddr = CACHE_IC_ADDRESS_ARRAY | (
-                                       v & cpu_data->icache.entry_mask);
+		for (v = start; v < end; v+=L1_CACHE_BYTES) {
+			__ocbwb(v);
 
-                       for (i = 0; i < cpu_data->icache.ways;
-                               i++, icacheaddr += cpu_data->icache.way_incr)
-                                       /* Clear i-cache line valid-bit */
-                                       ctrl_outl(0, icacheaddr);
-               }
+			icacheaddr = CACHE_IC_ADDRESS_ARRAY |
+				(v & cpu_data->icache.entry_mask);
+
+			for (i = 0; i < cpu_data->icache.ways;
+				i++, icacheaddr += cpu_data->icache.way_incr)
+				/* Clear i-cache line valid-bit */
+				ctrl_outl(0, icacheaddr);
+		}
 
 		back_to_cached();
-		local_irq_restore(flags);
 	}
 }
 
 static inline void flush_cache_4096(unsigned long start,
 				    unsigned long phys)
 {
-	unsigned long flags, exec_offset = 0;
+	unsigned long exec_offset = 0;
 
 	/*
 	 * All types of SH-4 require PC to be in P2 to operate on the I-cache.
@@ -243,19 +95,25 @@
 	    (start < CACHE_OC_ADDRESS_ARRAY))
 		exec_offset = 0x20000000;
 
-	local_irq_save(flags);
 	__flush_cache_4096(start | SH_CACHE_ASSOC,
 			   P1SEGADDR(phys), exec_offset);
-	local_irq_restore(flags);
 }
 
 /*
  * Write back & invalidate the D-cache of the page.
  * (To avoid "alias" issues)
  */
-void flush_dcache_page(struct page *page)
+static void sh4_flush_dcache_page(void *arg)
 {
-	if (test_bit(PG_mapped, &page->flags)) {
+	struct page *page = arg;
+#ifndef CONFIG_SMP
+	struct address_space *mapping = page_mapping(page);
+
+	if (mapping && !mapping_mapped(mapping))
+		set_bit(PG_dcache_dirty, &page->flags);
+	else
+#endif
+	{
 		unsigned long phys = PHYSADDR(page_address(page));
 		unsigned long addr = CACHE_OC_ADDRESS_ARRAY;
 		int i, n;
@@ -272,9 +130,8 @@
 /* TODO: Selective icache invalidation through IC address array.. */
 static void __uses_jump_to_uncached flush_icache_all(void)
 {
-	unsigned long flags, ccr;
+	unsigned long ccr;
 
-	local_irq_save(flags);
 	jump_to_uncached();
 
 	/* Flush I-cache */
@@ -286,18 +143,16 @@
 	 * back_to_cached() will take care of the barrier for us, don't add
 	 * another one!
 	 */
-
 	back_to_cached();
-	local_irq_restore(flags);
 }
 
-void flush_dcache_all(void)
+static inline void flush_dcache_all(void)
 {
 	(*__flush_dcache_segment_fn)(0UL, boot_cpu_data.dcache.way_size);
 	wmb();
 }
 
-void flush_cache_all(void)
+static void sh4_flush_cache_all(void *unused)
 {
 	flush_dcache_all();
 	flush_icache_all();
@@ -389,8 +244,13 @@
  *
  * Caller takes mm->mmap_sem.
  */
-void flush_cache_mm(struct mm_struct *mm)
+static void sh4_flush_cache_mm(void *arg)
 {
+	struct mm_struct *mm = arg;
+
+	if (cpu_context(smp_processor_id(), mm) == NO_CONTEXT)
+		return;
+
 	/*
 	 * If cache is only 4k-per-way, there are never any 'aliases'.  Since
 	 * the cache is physically tagged, the data can just be left in there.
@@ -426,12 +286,21 @@
  * ADDR: Virtual Address (U0 address)
  * PFN: Physical page number
  */
-void flush_cache_page(struct vm_area_struct *vma, unsigned long address,
-		      unsigned long pfn)
+static void sh4_flush_cache_page(void *args)
 {
-	unsigned long phys = pfn << PAGE_SHIFT;
+	struct flusher_data *data = args;
+	struct vm_area_struct *vma;
+	unsigned long address, pfn, phys;
 	unsigned int alias_mask;
 
+	vma = data->vma;
+	address = data->addr1;
+	pfn = data->addr2;
+	phys = pfn << PAGE_SHIFT;
+
+	if (cpu_context(smp_processor_id(), vma->vm_mm) == NO_CONTEXT)
+		return;
+
 	alias_mask = boot_cpu_data.dcache.alias_mask;
 
 	/* We only need to flush D-cache when we have alias */
@@ -471,9 +340,19 @@
  * Flushing the cache lines for U0 only isn't enough.
  * We need to flush for P1 too, which may contain aliases.
  */
-void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
-		       unsigned long end)
+static void sh4_flush_cache_range(void *args)
 {
+	struct flusher_data *data = args;
+	struct vm_area_struct *vma;
+	unsigned long start, end;
+
+	vma = data->vma;
+	start = data->addr1;
+	end = data->addr2;
+
+	if (cpu_context(smp_processor_id(), vma->vm_mm) == NO_CONTEXT)
+		return;
+
 	/*
 	 * If cache is only 4k-per-way, there are never any 'aliases'.  Since
 	 * the cache is physically tagged, the data can just be left in there.
@@ -501,20 +380,6 @@
 	}
 }
 
-/*
- * flush_icache_user_range
- * @vma: VMA of the process
- * @page: page
- * @addr: U0 address
- * @len: length of the range (< page size)
- */
-void flush_icache_user_range(struct vm_area_struct *vma,
-			     struct page *page, unsigned long addr, int len)
-{
-	flush_cache_page(vma, addr, page_to_pfn(page));
-	mb();
-}
-
 /**
  * __flush_cache_4096
  *
@@ -824,3 +689,47 @@
 		a3 += linesz;
 	} while (a0 < a0e);
 }
+
+extern void __weak sh4__flush_region_init(void);
+
+/*
+ * SH-4 has virtually indexed and physically tagged cache.
+ */
+void __init sh4_cache_init(void)
+{
+	unsigned int wt_enabled = !!(__raw_readl(CCR) & CCR_CACHE_WT);
+
+	printk("PVR=%08x CVR=%08x PRR=%08x\n",
+		ctrl_inl(CCN_PVR),
+		ctrl_inl(CCN_CVR),
+		ctrl_inl(CCN_PRR));
+
+	if (wt_enabled)
+		__flush_dcache_segment_fn = __flush_dcache_segment_writethrough;
+	else {
+		switch (boot_cpu_data.dcache.ways) {
+		case 1:
+			__flush_dcache_segment_fn = __flush_dcache_segment_1way;
+			break;
+		case 2:
+			__flush_dcache_segment_fn = __flush_dcache_segment_2way;
+			break;
+		case 4:
+			__flush_dcache_segment_fn = __flush_dcache_segment_4way;
+			break;
+		default:
+			panic("unknown number of cache ways\n");
+			break;
+		}
+	}
+
+	local_flush_icache_range	= sh4_flush_icache_range;
+	local_flush_dcache_page		= sh4_flush_dcache_page;
+	local_flush_cache_all		= sh4_flush_cache_all;
+	local_flush_cache_mm		= sh4_flush_cache_mm;
+	local_flush_cache_dup_mm	= sh4_flush_cache_mm;
+	local_flush_cache_page		= sh4_flush_cache_page;
+	local_flush_cache_range		= sh4_flush_cache_range;
+
+	sh4__flush_region_init();
+}
diff --git a/arch/sh/mm/cache-sh5.c b/arch/sh/mm/cache-sh5.c
index 8676209..2f9dd6d 100644
--- a/arch/sh/mm/cache-sh5.c
+++ b/arch/sh/mm/cache-sh5.c
@@ -20,23 +20,11 @@
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
 
+extern void __weak sh4__flush_region_init(void);
+
 /* Wired TLB entry for the D-cache */
 static unsigned long long dtlb_cache_slot;
 
-void __init p3_cache_init(void)
-{
-	/* Reserve a slot for dcache colouring in the DTLB */
-	dtlb_cache_slot	= sh64_get_wired_dtlb_entry();
-}
-
-#ifdef CONFIG_DCACHE_DISABLED
-#define sh64_dcache_purge_all()					do { } while (0)
-#define sh64_dcache_purge_coloured_phy_page(paddr, eaddr)	do { } while (0)
-#define sh64_dcache_purge_user_range(mm, start, end)		do { } while (0)
-#define sh64_dcache_purge_phy_page(paddr)			do { } while (0)
-#define sh64_dcache_purge_virt_page(mm, eaddr)			do { } while (0)
-#endif
-
 /*
  * The following group of functions deal with mapping and unmapping a
  * temporary page into a DTLB slot that has been set aside for exclusive
@@ -46,29 +34,22 @@
 sh64_setup_dtlb_cache_slot(unsigned long eaddr, unsigned long asid,
 			   unsigned long paddr)
 {
-	local_irq_disable();
 	sh64_setup_tlb_slot(dtlb_cache_slot, eaddr, asid, paddr);
 }
 
 static inline void sh64_teardown_dtlb_cache_slot(void)
 {
 	sh64_teardown_tlb_slot(dtlb_cache_slot);
-	local_irq_enable();
 }
 
-#ifndef CONFIG_ICACHE_DISABLED
 static inline void sh64_icache_inv_all(void)
 {
 	unsigned long long addr, flag, data;
-	unsigned long flags;
 
 	addr = ICCR0;
 	flag = ICCR0_ICI;
 	data = 0;
 
-	/* Make this a critical section for safety (probably not strictly necessary.) */
-	local_irq_save(flags);
-
 	/* Without %1 it gets unexplicably wrong */
 	__asm__ __volatile__ (
 		"getcfg	%3, 0, %0\n\t"
@@ -77,8 +58,6 @@
 		"synci"
 		: "=&r" (data)
 		: "0" (data), "r" (flag), "r" (addr));
-
-	local_irq_restore(flags);
 }
 
 static void sh64_icache_inv_kernel_range(unsigned long start, unsigned long end)
@@ -103,7 +82,6 @@
 	   Also, eaddr is page-aligned. */
 	unsigned int cpu = smp_processor_id();
 	unsigned long long addr, end_addr;
-	unsigned long flags = 0;
 	unsigned long running_asid, vma_asid;
 	addr = eaddr;
 	end_addr = addr + PAGE_SIZE;
@@ -124,10 +102,9 @@
 
 	running_asid = get_asid();
 	vma_asid = cpu_asid(cpu, vma->vm_mm);
-	if (running_asid != vma_asid) {
-		local_irq_save(flags);
+	if (running_asid != vma_asid)
 		switch_and_save_asid(vma_asid);
-	}
+
 	while (addr < end_addr) {
 		/* Worth unrolling a little */
 		__asm__ __volatile__("icbi %0,  0" : : "r" (addr));
@@ -136,10 +113,9 @@
 		__asm__ __volatile__("icbi %0, 96" : : "r" (addr));
 		addr += 128;
 	}
-	if (running_asid != vma_asid) {
+
+	if (running_asid != vma_asid)
 		switch_and_save_asid(running_asid);
-		local_irq_restore(flags);
-	}
 }
 
 static void sh64_icache_inv_user_page_range(struct mm_struct *mm,
@@ -172,16 +148,12 @@
 		unsigned long eaddr;
 		unsigned long after_last_page_start;
 		unsigned long mm_asid, current_asid;
-		unsigned long flags = 0;
 
 		mm_asid = cpu_asid(smp_processor_id(), mm);
 		current_asid = get_asid();
 
-		if (mm_asid != current_asid) {
-			/* Switch ASID and run the invalidate loop under cli */
-			local_irq_save(flags);
+		if (mm_asid != current_asid)
 			switch_and_save_asid(mm_asid);
-		}
 
 		aligned_start = start & PAGE_MASK;
 		after_last_page_start = PAGE_SIZE + ((end - 1) & PAGE_MASK);
@@ -207,59 +179,11 @@
 			aligned_start = vma->vm_end; /* Skip to start of next region */
 		}
 
-		if (mm_asid != current_asid) {
+		if (mm_asid != current_asid)
 			switch_and_save_asid(current_asid);
-			local_irq_restore(flags);
-		}
 	}
 }
 
-/*
- * Invalidate a small range of user context I-cache, not necessarily page
- * (or even cache-line) aligned.
- *
- * Since this is used inside ptrace, the ASID in the mm context typically
- * won't match current_asid.  We'll have to switch ASID to do this.  For
- * safety, and given that the range will be small, do all this under cli.
- *
- * Note, there is a hazard that the ASID in mm->context is no longer
- * actually associated with mm, i.e. if the mm->context has started a new
- * cycle since mm was last active.  However, this is just a performance
- * issue: all that happens is that we invalidate lines belonging to
- * another mm, so the owning process has to refill them when that mm goes
- * live again.  mm itself can't have any cache entries because there will
- * have been a flush_cache_all when the new mm->context cycle started.
- */
-static void sh64_icache_inv_user_small_range(struct mm_struct *mm,
-						unsigned long start, int len)
-{
-	unsigned long long eaddr = start;
-	unsigned long long eaddr_end = start + len;
-	unsigned long current_asid, mm_asid;
-	unsigned long flags;
-	unsigned long long epage_start;
-
-	/*
-	 * Align to start of cache line.  Otherwise, suppose len==8 and
-	 * start was at 32N+28 : the last 4 bytes wouldn't get invalidated.
-	 */
-	eaddr = L1_CACHE_ALIGN(start);
-	eaddr_end = start + len;
-
-	mm_asid = cpu_asid(smp_processor_id(), mm);
-	local_irq_save(flags);
-	current_asid = switch_and_save_asid(mm_asid);
-
-	epage_start = eaddr & PAGE_MASK;
-
-	while (eaddr < eaddr_end) {
-		__asm__ __volatile__("icbi %0, 0" : : "r" (eaddr));
-		eaddr += L1_CACHE_BYTES;
-	}
-	switch_and_save_asid(current_asid);
-	local_irq_restore(flags);
-}
-
 static void sh64_icache_inv_current_user_range(unsigned long start, unsigned long end)
 {
 	/* The icbi instruction never raises ITLBMISS.  i.e. if there's not a
@@ -287,9 +211,7 @@
 		addr += L1_CACHE_BYTES;
 	}
 }
-#endif /* !CONFIG_ICACHE_DISABLED */
 
-#ifndef CONFIG_DCACHE_DISABLED
 /* Buffer used as the target of alloco instructions to purge data from cache
    sets by natural eviction. -- RPC */
 #define DUMMY_ALLOCO_AREA_SIZE ((L1_CACHE_BYTES << 10) + (1024 * 4))
@@ -541,59 +463,10 @@
 }
 
 /*
- * Purge the range of addresses from the D-cache.
- *
- * The addresses lie in the superpage mapping. There's no harm if we
- * overpurge at either end - just a small performance loss.
- */
-void __flush_purge_region(void *start, int size)
-{
-	unsigned long long ullend, addr, aligned_start;
-
-	aligned_start = (unsigned long long)(signed long long)(signed long) start;
-	addr = L1_CACHE_ALIGN(aligned_start);
-	ullend = (unsigned long long) (signed long long) (signed long) start + size;
-
-	while (addr <= ullend) {
-		__asm__ __volatile__ ("ocbp %0, 0" : : "r" (addr));
-		addr += L1_CACHE_BYTES;
-	}
-}
-
-void __flush_wback_region(void *start, int size)
-{
-	unsigned long long ullend, addr, aligned_start;
-
-	aligned_start = (unsigned long long)(signed long long)(signed long) start;
-	addr = L1_CACHE_ALIGN(aligned_start);
-	ullend = (unsigned long long) (signed long long) (signed long) start + size;
-
-	while (addr < ullend) {
-		__asm__ __volatile__ ("ocbwb %0, 0" : : "r" (addr));
-		addr += L1_CACHE_BYTES;
-	}
-}
-
-void __flush_invalidate_region(void *start, int size)
-{
-	unsigned long long ullend, addr, aligned_start;
-
-	aligned_start = (unsigned long long)(signed long long)(signed long) start;
-	addr = L1_CACHE_ALIGN(aligned_start);
-	ullend = (unsigned long long) (signed long long) (signed long) start + size;
-
-	while (addr < ullend) {
-		__asm__ __volatile__ ("ocbi %0, 0" : : "r" (addr));
-		addr += L1_CACHE_BYTES;
-	}
-}
-#endif /* !CONFIG_DCACHE_DISABLED */
-
-/*
  * Invalidate the entire contents of both caches, after writing back to
  * memory any dirty data from the D-cache.
  */
-void flush_cache_all(void)
+static void sh5_flush_cache_all(void *unused)
 {
 	sh64_dcache_purge_all();
 	sh64_icache_inv_all();
@@ -620,7 +493,7 @@
  * I-cache.  This is similar to the lack of action needed in
  * flush_tlb_mm - see fault.c.
  */
-void flush_cache_mm(struct mm_struct *mm)
+static void sh5_flush_cache_mm(void *unused)
 {
 	sh64_dcache_purge_all();
 }
@@ -632,13 +505,18 @@
  *
  * Note, 'end' is 1 byte beyond the end of the range to flush.
  */
-void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
-		       unsigned long end)
+static void sh5_flush_cache_range(void *args)
 {
-	struct mm_struct *mm = vma->vm_mm;
+	struct flusher_data *data = args;
+	struct vm_area_struct *vma;
+	unsigned long start, end;
 
-	sh64_dcache_purge_user_range(mm, start, end);
-	sh64_icache_inv_user_page_range(mm, start, end);
+	vma = data->vma;
+	start = data->addr1;
+	end = data->addr2;
+
+	sh64_dcache_purge_user_range(vma->vm_mm, start, end);
+	sh64_icache_inv_user_page_range(vma->vm_mm, start, end);
 }
 
 /*
@@ -650,16 +528,23 @@
  *
  * Note, this is called with pte lock held.
  */
-void flush_cache_page(struct vm_area_struct *vma, unsigned long eaddr,
-		      unsigned long pfn)
+static void sh5_flush_cache_page(void *args)
 {
+	struct flusher_data *data = args;
+	struct vm_area_struct *vma;
+	unsigned long eaddr, pfn;
+
+	vma = data->vma;
+	eaddr = data->addr1;
+	pfn = data->addr2;
+
 	sh64_dcache_purge_phy_page(pfn << PAGE_SHIFT);
 
 	if (vma->vm_flags & VM_EXEC)
 		sh64_icache_inv_user_page(vma, eaddr);
 }
 
-void flush_dcache_page(struct page *page)
+static void sh5_flush_dcache_page(void *page)
 {
 	sh64_dcache_purge_phy_page(page_to_phys(page));
 	wmb();
@@ -673,162 +558,47 @@
  * mapping, therefore it's guaranteed that there no cache entries for
  * the range in cache sets of the wrong colour.
  */
-void flush_icache_range(unsigned long start, unsigned long end)
+static void sh5_flush_icache_range(void *args)
 {
+	struct flusher_data *data = args;
+	unsigned long start, end;
+
+	start = data->addr1;
+	end = data->addr2;
+
 	__flush_purge_region((void *)start, end);
 	wmb();
 	sh64_icache_inv_kernel_range(start, end);
 }
 
 /*
- * Flush the range of user (defined by vma->vm_mm) address space starting
- * at 'addr' for 'len' bytes from the cache.  The range does not straddle
- * a page boundary, the unique physical page containing the range is
- * 'page'.  This seems to be used mainly for invalidating an address
- * range following a poke into the program text through the ptrace() call
- * from another process (e.g. for BRK instruction insertion).
- */
-void flush_icache_user_range(struct vm_area_struct *vma,
-			struct page *page, unsigned long addr, int len)
-{
-
-	sh64_dcache_purge_coloured_phy_page(page_to_phys(page), addr);
-	mb();
-
-	if (vma->vm_flags & VM_EXEC)
-		sh64_icache_inv_user_small_range(vma->vm_mm, addr, len);
-}
-
-/*
  * For the address range [start,end), write back the data from the
  * D-cache and invalidate the corresponding region of the I-cache for the
  * current process.  Used to flush signal trampolines on the stack to
  * make them executable.
  */
-void flush_cache_sigtramp(unsigned long vaddr)
+static void sh5_flush_cache_sigtramp(void *vaddr)
 {
-	unsigned long end = vaddr + L1_CACHE_BYTES;
+	unsigned long end = (unsigned long)vaddr + L1_CACHE_BYTES;
 
-	__flush_wback_region((void *)vaddr, L1_CACHE_BYTES);
+	__flush_wback_region(vaddr, L1_CACHE_BYTES);
 	wmb();
-	sh64_icache_inv_current_user_range(vaddr, end);
+	sh64_icache_inv_current_user_range((unsigned long)vaddr, end);
 }
 
-#ifdef CONFIG_MMU
-/*
- * These *MUST* lie in an area of virtual address space that's otherwise
- * unused.
- */
-#define UNIQUE_EADDR_START 0xe0000000UL
-#define UNIQUE_EADDR_END   0xe8000000UL
-
-/*
- * Given a physical address paddr, and a user virtual address user_eaddr
- * which will eventually be mapped to it, create a one-off kernel-private
- * eaddr mapped to the same paddr.  This is used for creating special
- * destination pages for copy_user_page and clear_user_page.
- */
-static unsigned long sh64_make_unique_eaddr(unsigned long user_eaddr,
-					    unsigned long paddr)
+void __init sh5_cache_init(void)
 {
-	static unsigned long current_pointer = UNIQUE_EADDR_START;
-	unsigned long coloured_pointer;
+	local_flush_cache_all		= sh5_flush_cache_all;
+	local_flush_cache_mm		= sh5_flush_cache_mm;
+	local_flush_cache_dup_mm	= sh5_flush_cache_mm;
+	local_flush_cache_page		= sh5_flush_cache_page;
+	local_flush_cache_range		= sh5_flush_cache_range;
+	local_flush_dcache_page		= sh5_flush_dcache_page;
+	local_flush_icache_range	= sh5_flush_icache_range;
+	local_flush_cache_sigtramp	= sh5_flush_cache_sigtramp;
 
-	if (current_pointer == UNIQUE_EADDR_END) {
-		sh64_dcache_purge_all();
-		current_pointer = UNIQUE_EADDR_START;
-	}
+	/* Reserve a slot for dcache colouring in the DTLB */
+	dtlb_cache_slot	= sh64_get_wired_dtlb_entry();
 
-	coloured_pointer = (current_pointer & ~CACHE_OC_SYN_MASK) |
-				(user_eaddr & CACHE_OC_SYN_MASK);
-	sh64_setup_dtlb_cache_slot(coloured_pointer, get_asid(), paddr);
-
-	current_pointer += (PAGE_SIZE << CACHE_OC_N_SYNBITS);
-
-	return coloured_pointer;
+	sh4__flush_region_init();
 }
-
-static void sh64_copy_user_page_coloured(void *to, void *from,
-					 unsigned long address)
-{
-	void *coloured_to;
-
-	/*
-	 * Discard any existing cache entries of the wrong colour.  These are
-	 * present quite often, if the kernel has recently used the page
-	 * internally, then given it up, then it's been allocated to the user.
-	 */
-	sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long)to);
-
-	coloured_to = (void *)sh64_make_unique_eaddr(address, __pa(to));
-	copy_page(from, coloured_to);
-
-	sh64_teardown_dtlb_cache_slot();
-}
-
-static void sh64_clear_user_page_coloured(void *to, unsigned long address)
-{
-	void *coloured_to;
-
-	/*
-	 * Discard any existing kernel-originated lines of the wrong
-	 * colour (as above)
-	 */
-	sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long)to);
-
-	coloured_to = (void *)sh64_make_unique_eaddr(address, __pa(to));
-	clear_page(coloured_to);
-
-	sh64_teardown_dtlb_cache_slot();
-}
-
-/*
- * 'from' and 'to' are kernel virtual addresses (within the superpage
- * mapping of the physical RAM).  'address' is the user virtual address
- * where the copy 'to' will be mapped after.  This allows a custom
- * mapping to be used to ensure that the new copy is placed in the
- * right cache sets for the user to see it without having to bounce it
- * out via memory.  Note however : the call to flush_page_to_ram in
- * (generic)/mm/memory.c:(break_cow) undoes all this good work in that one
- * very important case!
- *
- * TBD : can we guarantee that on every call, any cache entries for
- * 'from' are in the same colour sets as 'address' also?  i.e. is this
- * always used just to deal with COW?  (I suspect not).
- *
- * There are two possibilities here for when the page 'from' was last accessed:
- * - by the kernel : this is OK, no purge required.
- * - by the/a user (e.g. for break_COW) : need to purge.
- *
- * If the potential user mapping at 'address' is the same colour as
- * 'from' there is no need to purge any cache lines from the 'from'
- * page mapped into cache sets of colour 'address'.  (The copy will be
- * accessing the page through 'from').
- */
-void copy_user_page(void *to, void *from, unsigned long address,
-		    struct page *page)
-{
-	if (((address ^ (unsigned long) from) & CACHE_OC_SYN_MASK) != 0)
-		sh64_dcache_purge_coloured_phy_page(__pa(from), address);
-
-	if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0)
-		copy_page(to, from);
-	else
-		sh64_copy_user_page_coloured(to, from, address);
-}
-
-/*
- * 'to' is a kernel virtual address (within the superpage mapping of the
- * physical RAM).  'address' is the user virtual address where the 'to'
- * page will be mapped after.  This allows a custom mapping to be used to
- * ensure that the new copy is placed in the right cache sets for the
- * user to see it without having to bounce it out via memory.
- */
-void clear_user_page(void *to, unsigned long address, struct page *page)
-{
-	if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0)
-		clear_page(to);
-	else
-		sh64_clear_user_page_coloured(to, address);
-}
-#endif
diff --git a/arch/sh/mm/cache-sh7705.c b/arch/sh/mm/cache-sh7705.c
index 22dacc7..9dc3866 100644
--- a/arch/sh/mm/cache-sh7705.c
+++ b/arch/sh/mm/cache-sh7705.c
@@ -12,6 +12,7 @@
 #include <linux/init.h>
 #include <linux/mman.h>
 #include <linux/mm.h>
+#include <linux/fs.h>
 #include <linux/threads.h>
 #include <asm/addrspace.h>
 #include <asm/page.h>
@@ -63,18 +64,23 @@
  *
  * Called from kernel/module.c:sys_init_module and routine for a.out format.
  */
-void flush_icache_range(unsigned long start, unsigned long end)
+static void sh7705_flush_icache_range(void *args)
 {
+	struct flusher_data *data = args;
+	unsigned long start, end;
+
+	start = data->addr1;
+	end = data->addr2;
+
 	__flush_wback_region((void *)start, end - start);
 }
 
 /*
  * Writeback&Invalidate the D-cache of the page
  */
-static void __uses_jump_to_uncached __flush_dcache_page(unsigned long phys)
+static void __flush_dcache_page(unsigned long phys)
 {
 	unsigned long ways, waysize, addrstart;
-	unsigned long flags;
 
 	phys |= SH_CACHE_VALID;
 
@@ -91,7 +97,6 @@
 	 * potential cache aliasing, therefore the optimisation is probably not
 	 * possible.
 	 */
-	local_irq_save(flags);
 	jump_to_uncached();
 
 	ways = current_cpu_data.dcache.ways;
@@ -119,59 +124,27 @@
 	} while (--ways);
 
 	back_to_cached();
-	local_irq_restore(flags);
 }
 
 /*
  * Write back & invalidate the D-cache of the page.
  * (To avoid "alias" issues)
  */
-void flush_dcache_page(struct page *page)
+static void sh7705_flush_dcache_page(void *page)
 {
-	if (test_bit(PG_mapped, &page->flags))
+	struct address_space *mapping = page_mapping(page);
+
+	if (mapping && !mapping_mapped(mapping))
+		set_bit(PG_dcache_dirty, &page->flags);
+	else
 		__flush_dcache_page(PHYSADDR(page_address(page)));
 }
 
-void __uses_jump_to_uncached flush_cache_all(void)
+static void sh7705_flush_cache_all(void *args)
 {
-	unsigned long flags;
-
-	local_irq_save(flags);
 	jump_to_uncached();
-
 	cache_wback_all();
 	back_to_cached();
-	local_irq_restore(flags);
-}
-
-void flush_cache_mm(struct mm_struct *mm)
-{
-	/* Is there any good way? */
-	/* XXX: possibly call flush_cache_range for each vm area */
-	flush_cache_all();
-}
-
-/*
- * Write back and invalidate D-caches.
- *
- * START, END: Virtual Address (U0 address)
- *
- * NOTE: We need to flush the _physical_ page entry.
- * Flushing the cache lines for U0 only isn't enough.
- * We need to flush for P1 too, which may contain aliases.
- */
-void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
-		       unsigned long end)
-{
-
-	/*
-	 * We could call flush_cache_page for the pages of these range,
-	 * but it's not efficient (scan the caches all the time...).
-	 *
-	 * We can't use A-bit magic, as there's the case we don't have
-	 * valid entry on TLB.
-	 */
-	flush_cache_all();
 }
 
 /*
@@ -179,9 +152,11 @@
  *
  * ADDRESS: Virtual Address (U0 address)
  */
-void flush_cache_page(struct vm_area_struct *vma, unsigned long address,
-		      unsigned long pfn)
+static void sh7705_flush_cache_page(void *args)
 {
+	struct flusher_data *data = args;
+	unsigned long pfn = data->addr2;
+
 	__flush_dcache_page(pfn << PAGE_SHIFT);
 }
 
@@ -193,7 +168,19 @@
  * Not entirely sure why this is necessary on SH3 with 32K cache but
  * without it we get occasional "Memory fault" when loading a program.
  */
-void flush_icache_page(struct vm_area_struct *vma, struct page *page)
+static void sh7705_flush_icache_page(void *page)
 {
 	__flush_purge_region(page_address(page), PAGE_SIZE);
 }
+
+void __init sh7705_cache_init(void)
+{
+	local_flush_icache_range	= sh7705_flush_icache_range;
+	local_flush_dcache_page		= sh7705_flush_dcache_page;
+	local_flush_cache_all		= sh7705_flush_cache_all;
+	local_flush_cache_mm		= sh7705_flush_cache_all;
+	local_flush_cache_dup_mm	= sh7705_flush_cache_all;
+	local_flush_cache_range		= sh7705_flush_cache_all;
+	local_flush_cache_page		= sh7705_flush_cache_page;
+	local_flush_icache_page		= sh7705_flush_icache_page;
+}
diff --git a/arch/sh/mm/cache.c b/arch/sh/mm/cache.c
new file mode 100644
index 0000000..411fe60
--- /dev/null
+++ b/arch/sh/mm/cache.c
@@ -0,0 +1,306 @@
+/*
+ * arch/sh/mm/cache.c
+ *
+ * Copyright (C) 1999, 2000, 2002  Niibe Yutaka
+ * Copyright (C) 2002 - 2009  Paul Mundt
+ *
+ * Released under the terms of the GNU GPL v2.0.
+ */
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/fs.h>
+#include <linux/smp.h>
+#include <linux/highmem.h>
+#include <linux/module.h>
+#include <asm/mmu_context.h>
+#include <asm/cacheflush.h>
+
+void (*local_flush_cache_all)(void *args) = cache_noop;
+void (*local_flush_cache_mm)(void *args) = cache_noop;
+void (*local_flush_cache_dup_mm)(void *args) = cache_noop;
+void (*local_flush_cache_page)(void *args) = cache_noop;
+void (*local_flush_cache_range)(void *args) = cache_noop;
+void (*local_flush_dcache_page)(void *args) = cache_noop;
+void (*local_flush_icache_range)(void *args) = cache_noop;
+void (*local_flush_icache_page)(void *args) = cache_noop;
+void (*local_flush_cache_sigtramp)(void *args) = cache_noop;
+
+void (*__flush_wback_region)(void *start, int size);
+void (*__flush_purge_region)(void *start, int size);
+void (*__flush_invalidate_region)(void *start, int size);
+
+static inline void noop__flush_region(void *start, int size)
+{
+}
+
+void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
+		       unsigned long vaddr, void *dst, const void *src,
+		       unsigned long len)
+{
+	if (boot_cpu_data.dcache.n_aliases && page_mapped(page) &&
+	    !test_bit(PG_dcache_dirty, &page->flags)) {
+		void *vto = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK);
+		memcpy(vto, src, len);
+		kunmap_coherent();
+	} else {
+		memcpy(dst, src, len);
+		if (boot_cpu_data.dcache.n_aliases)
+			set_bit(PG_dcache_dirty, &page->flags);
+	}
+
+	if (vma->vm_flags & VM_EXEC)
+		flush_cache_page(vma, vaddr, page_to_pfn(page));
+}
+
+void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
+			 unsigned long vaddr, void *dst, const void *src,
+			 unsigned long len)
+{
+	if (boot_cpu_data.dcache.n_aliases && page_mapped(page) &&
+	    !test_bit(PG_dcache_dirty, &page->flags)) {
+		void *vfrom = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK);
+		memcpy(dst, vfrom, len);
+		kunmap_coherent();
+	} else {
+		memcpy(dst, src, len);
+		if (boot_cpu_data.dcache.n_aliases)
+			set_bit(PG_dcache_dirty, &page->flags);
+	}
+}
+
+void copy_user_highpage(struct page *to, struct page *from,
+			unsigned long vaddr, struct vm_area_struct *vma)
+{
+	void *vfrom, *vto;
+
+	vto = kmap_atomic(to, KM_USER1);
+
+	if (boot_cpu_data.dcache.n_aliases && page_mapped(from) &&
+	    !test_bit(PG_dcache_dirty, &from->flags)) {
+		vfrom = kmap_coherent(from, vaddr);
+		copy_page(vto, vfrom);
+		kunmap_coherent();
+	} else {
+		vfrom = kmap_atomic(from, KM_USER0);
+		copy_page(vto, vfrom);
+		kunmap_atomic(vfrom, KM_USER0);
+	}
+
+	if (pages_do_alias((unsigned long)vto, vaddr & PAGE_MASK))
+		__flush_wback_region(vto, PAGE_SIZE);
+
+	kunmap_atomic(vto, KM_USER1);
+	/* Make sure this page is cleared on other CPU's too before using it */
+	smp_wmb();
+}
+EXPORT_SYMBOL(copy_user_highpage);
+
+void clear_user_highpage(struct page *page, unsigned long vaddr)
+{
+	void *kaddr = kmap_atomic(page, KM_USER0);
+
+	clear_page(kaddr);
+
+	if (pages_do_alias((unsigned long)kaddr, vaddr & PAGE_MASK))
+		__flush_wback_region(kaddr, PAGE_SIZE);
+
+	kunmap_atomic(kaddr, KM_USER0);
+}
+EXPORT_SYMBOL(clear_user_highpage);
+
+void __update_cache(struct vm_area_struct *vma,
+		    unsigned long address, pte_t pte)
+{
+	struct page *page;
+	unsigned long pfn = pte_pfn(pte);
+
+	if (!boot_cpu_data.dcache.n_aliases)
+		return;
+
+	page = pfn_to_page(pfn);
+	if (pfn_valid(pfn) && page_mapping(page)) {
+		int dirty = test_and_clear_bit(PG_dcache_dirty, &page->flags);
+		if (dirty) {
+			unsigned long addr = (unsigned long)page_address(page);
+
+			if (pages_do_alias(addr, address & PAGE_MASK))
+				__flush_wback_region((void *)addr, PAGE_SIZE);
+		}
+	}
+}
+
+void __flush_anon_page(struct page *page, unsigned long vmaddr)
+{
+	unsigned long addr = (unsigned long) page_address(page);
+
+	if (pages_do_alias(addr, vmaddr)) {
+		if (boot_cpu_data.dcache.n_aliases && page_mapped(page) &&
+		    !test_bit(PG_dcache_dirty, &page->flags)) {
+			void *kaddr;
+
+			kaddr = kmap_coherent(page, vmaddr);
+			__flush_wback_region((void *)kaddr, PAGE_SIZE);
+			kunmap_coherent();
+		} else
+			__flush_wback_region((void *)addr, PAGE_SIZE);
+	}
+}
+
+void flush_cache_all(void)
+{
+	on_each_cpu(local_flush_cache_all, NULL, 1);
+}
+
+void flush_cache_mm(struct mm_struct *mm)
+{
+	on_each_cpu(local_flush_cache_mm, mm, 1);
+}
+
+void flush_cache_dup_mm(struct mm_struct *mm)
+{
+	on_each_cpu(local_flush_cache_dup_mm, mm, 1);
+}
+
+void flush_cache_page(struct vm_area_struct *vma, unsigned long addr,
+		      unsigned long pfn)
+{
+	struct flusher_data data;
+
+	data.vma = vma;
+	data.addr1 = addr;
+	data.addr2 = pfn;
+
+	on_each_cpu(local_flush_cache_page, (void *)&data, 1);
+}
+
+void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
+		       unsigned long end)
+{
+	struct flusher_data data;
+
+	data.vma = vma;
+	data.addr1 = start;
+	data.addr2 = end;
+
+	on_each_cpu(local_flush_cache_range, (void *)&data, 1);
+}
+
+void flush_dcache_page(struct page *page)
+{
+	on_each_cpu(local_flush_dcache_page, page, 1);
+}
+
+void flush_icache_range(unsigned long start, unsigned long end)
+{
+	struct flusher_data data;
+
+	data.vma = NULL;
+	data.addr1 = start;
+	data.addr2 = end;
+
+	on_each_cpu(local_flush_icache_range, (void *)&data, 1);
+}
+
+void flush_icache_page(struct vm_area_struct *vma, struct page *page)
+{
+	/* Nothing uses the VMA, so just pass the struct page along */
+	on_each_cpu(local_flush_icache_page, page, 1);
+}
+
+void flush_cache_sigtramp(unsigned long address)
+{
+	on_each_cpu(local_flush_cache_sigtramp, (void *)address, 1);
+}
+
+static void compute_alias(struct cache_info *c)
+{
+	c->alias_mask = ((c->sets - 1) << c->entry_shift) & ~(PAGE_SIZE - 1);
+	c->n_aliases = c->alias_mask ? (c->alias_mask >> PAGE_SHIFT) + 1 : 0;
+}
+
+static void __init emit_cache_params(void)
+{
+	printk(KERN_NOTICE "I-cache : n_ways=%d n_sets=%d way_incr=%d\n",
+		boot_cpu_data.icache.ways,
+		boot_cpu_data.icache.sets,
+		boot_cpu_data.icache.way_incr);
+	printk(KERN_NOTICE "I-cache : entry_mask=0x%08x alias_mask=0x%08x n_aliases=%d\n",
+		boot_cpu_data.icache.entry_mask,
+		boot_cpu_data.icache.alias_mask,
+		boot_cpu_data.icache.n_aliases);
+	printk(KERN_NOTICE "D-cache : n_ways=%d n_sets=%d way_incr=%d\n",
+		boot_cpu_data.dcache.ways,
+		boot_cpu_data.dcache.sets,
+		boot_cpu_data.dcache.way_incr);
+	printk(KERN_NOTICE "D-cache : entry_mask=0x%08x alias_mask=0x%08x n_aliases=%d\n",
+		boot_cpu_data.dcache.entry_mask,
+		boot_cpu_data.dcache.alias_mask,
+		boot_cpu_data.dcache.n_aliases);
+
+	/*
+	 * Emit Secondary Cache parameters if the CPU has a probed L2.
+	 */
+	if (boot_cpu_data.flags & CPU_HAS_L2_CACHE) {
+		printk(KERN_NOTICE "S-cache : n_ways=%d n_sets=%d way_incr=%d\n",
+			boot_cpu_data.scache.ways,
+			boot_cpu_data.scache.sets,
+			boot_cpu_data.scache.way_incr);
+		printk(KERN_NOTICE "S-cache : entry_mask=0x%08x alias_mask=0x%08x n_aliases=%d\n",
+			boot_cpu_data.scache.entry_mask,
+			boot_cpu_data.scache.alias_mask,
+			boot_cpu_data.scache.n_aliases);
+	}
+}
+
+void __init cpu_cache_init(void)
+{
+	compute_alias(&boot_cpu_data.icache);
+	compute_alias(&boot_cpu_data.dcache);
+	compute_alias(&boot_cpu_data.scache);
+
+	__flush_wback_region		= noop__flush_region;
+	__flush_purge_region		= noop__flush_region;
+	__flush_invalidate_region	= noop__flush_region;
+
+	if (boot_cpu_data.family == CPU_FAMILY_SH2) {
+		extern void __weak sh2_cache_init(void);
+
+		sh2_cache_init();
+	}
+
+	if (boot_cpu_data.family == CPU_FAMILY_SH2A) {
+		extern void __weak sh2a_cache_init(void);
+
+		sh2a_cache_init();
+	}
+
+	if (boot_cpu_data.family == CPU_FAMILY_SH3) {
+		extern void __weak sh3_cache_init(void);
+
+		sh3_cache_init();
+
+		if ((boot_cpu_data.type == CPU_SH7705) &&
+		    (boot_cpu_data.dcache.sets == 512)) {
+			extern void __weak sh7705_cache_init(void);
+
+			sh7705_cache_init();
+		}
+	}
+
+	if ((boot_cpu_data.family == CPU_FAMILY_SH4) ||
+	    (boot_cpu_data.family == CPU_FAMILY_SH4A) ||
+	    (boot_cpu_data.family == CPU_FAMILY_SH4AL_DSP)) {
+		extern void __weak sh4_cache_init(void);
+
+		sh4_cache_init();
+	}
+
+	if (boot_cpu_data.family == CPU_FAMILY_SH5) {
+		extern void __weak sh5_cache_init(void);
+
+		sh5_cache_init();
+	}
+
+	emit_cache_params();
+}
diff --git a/arch/sh/mm/fault_32.c b/arch/sh/mm/fault_32.c
index dbbdeba..f1c93c8 100644
--- a/arch/sh/mm/fault_32.c
+++ b/arch/sh/mm/fault_32.c
@@ -318,16 +318,15 @@
 /*
  * Called with interrupts disabled.
  */
-asmlinkage int __kprobes __do_page_fault(struct pt_regs *regs,
-					 unsigned long writeaccess,
-					 unsigned long address)
+asmlinkage int __kprobes
+handle_tlbmiss(struct pt_regs *regs, unsigned long writeaccess,
+	       unsigned long address)
 {
 	pgd_t *pgd;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
 	pte_t entry;
-	int ret = 1;
 
 	/*
 	 * We don't take page faults for P1, P2, and parts of P4, these
@@ -338,40 +337,41 @@
 		pgd = pgd_offset_k(address);
 	} else {
 		if (unlikely(address >= TASK_SIZE || !current->mm))
-			goto out;
+			return 1;
 
 		pgd = pgd_offset(current->mm, address);
 	}
 
 	pud = pud_offset(pgd, address);
 	if (pud_none_or_clear_bad(pud))
-		goto out;
+		return 1;
 	pmd = pmd_offset(pud, address);
 	if (pmd_none_or_clear_bad(pmd))
-		goto out;
+		return 1;
 	pte = pte_offset_kernel(pmd, address);
 	entry = *pte;
 	if (unlikely(pte_none(entry) || pte_not_present(entry)))
-		goto out;
+		return 1;
 	if (unlikely(writeaccess && !pte_write(entry)))
-		goto out;
+		return 1;
 
 	if (writeaccess)
 		entry = pte_mkdirty(entry);
 	entry = pte_mkyoung(entry);
 
+	set_pte(pte, entry);
+
 #if defined(CONFIG_CPU_SH4) && !defined(CONFIG_SMP)
 	/*
-	 * ITLB is not affected by "ldtlb" instruction.
-	 * So, we need to flush the entry by ourselves.
+	 * SH-4 does not set MMUCR.RC to the corresponding TLB entry in
+	 * the case of an initial page write exception, so we need to
+	 * flush it in order to avoid potential TLB entry duplication.
 	 */
-	local_flush_tlb_one(get_asid(), address & PAGE_MASK);
+	if (writeaccess == 2)
+		local_flush_tlb_one(get_asid(), address & PAGE_MASK);
 #endif
 
-	set_pte(pte, entry);
 	update_mmu_cache(NULL, address, entry);
 
-	ret = 0;
-out:
-	return ret;
+	return 0;
 }
diff --git a/arch/sh/mm/fault_64.c b/arch/sh/mm/fault_64.c
index bd63b96..2b356ce 100644
--- a/arch/sh/mm/fault_64.c
+++ b/arch/sh/mm/fault_64.c
@@ -56,16 +56,7 @@
 	/*
 	 * Set PTEH register
 	 */
-	pteh = address & MMU_VPN_MASK;
-
-	/* Sign extend based on neff. */
-#if (NEFF == 32)
-	/* Faster sign extension */
-	pteh = (unsigned long long)(signed long long)(signed long)pteh;
-#else
-	/* General case */
-	pteh = (pteh & NEFF_SIGN) ? (pteh | NEFF_MASK) : pteh;
-#endif
+	pteh = neff_sign_extend(address & MMU_VPN_MASK);
 
 	/* Set the ASID. */
 	pteh |= get_asid() << PTEH_ASID_SHIFT;
diff --git a/arch/sh/mm/flush-sh4.c b/arch/sh/mm/flush-sh4.c
new file mode 100644
index 0000000..cef4026
--- /dev/null
+++ b/arch/sh/mm/flush-sh4.c
@@ -0,0 +1,108 @@
+#include <linux/mm.h>
+#include <asm/mmu_context.h>
+#include <asm/cacheflush.h>
+
+/*
+ * Write back the dirty D-caches, but not invalidate them.
+ *
+ * START: Virtual Address (U0, P1, or P3)
+ * SIZE: Size of the region.
+ */
+static void sh4__flush_wback_region(void *start, int size)
+{
+	reg_size_t aligned_start, v, cnt, end;
+
+	aligned_start = register_align(start);
+	v = aligned_start & ~(L1_CACHE_BYTES-1);
+	end = (aligned_start + size + L1_CACHE_BYTES-1)
+		& ~(L1_CACHE_BYTES-1);
+	cnt = (end - v) / L1_CACHE_BYTES;
+
+	while (cnt >= 8) {
+		__ocbwb(v); v += L1_CACHE_BYTES;
+		__ocbwb(v); v += L1_CACHE_BYTES;
+		__ocbwb(v); v += L1_CACHE_BYTES;
+		__ocbwb(v); v += L1_CACHE_BYTES;
+		__ocbwb(v); v += L1_CACHE_BYTES;
+		__ocbwb(v); v += L1_CACHE_BYTES;
+		__ocbwb(v); v += L1_CACHE_BYTES;
+		__ocbwb(v); v += L1_CACHE_BYTES;
+		cnt -= 8;
+	}
+
+	while (cnt) {
+		__ocbwb(v); v += L1_CACHE_BYTES;
+		cnt--;
+	}
+}
+
+/*
+ * Write back the dirty D-caches and invalidate them.
+ *
+ * START: Virtual Address (U0, P1, or P3)
+ * SIZE: Size of the region.
+ */
+static void sh4__flush_purge_region(void *start, int size)
+{
+	reg_size_t aligned_start, v, cnt, end;
+
+	aligned_start = register_align(start);
+	v = aligned_start & ~(L1_CACHE_BYTES-1);
+	end = (aligned_start + size + L1_CACHE_BYTES-1)
+		& ~(L1_CACHE_BYTES-1);
+	cnt = (end - v) / L1_CACHE_BYTES;
+
+	while (cnt >= 8) {
+		__ocbp(v); v += L1_CACHE_BYTES;
+		__ocbp(v); v += L1_CACHE_BYTES;
+		__ocbp(v); v += L1_CACHE_BYTES;
+		__ocbp(v); v += L1_CACHE_BYTES;
+		__ocbp(v); v += L1_CACHE_BYTES;
+		__ocbp(v); v += L1_CACHE_BYTES;
+		__ocbp(v); v += L1_CACHE_BYTES;
+		__ocbp(v); v += L1_CACHE_BYTES;
+		cnt -= 8;
+	}
+	while (cnt) {
+		__ocbp(v); v += L1_CACHE_BYTES;
+		cnt--;
+	}
+}
+
+/*
+ * No write back please
+ */
+static void sh4__flush_invalidate_region(void *start, int size)
+{
+	reg_size_t aligned_start, v, cnt, end;
+
+	aligned_start = register_align(start);
+	v = aligned_start & ~(L1_CACHE_BYTES-1);
+	end = (aligned_start + size + L1_CACHE_BYTES-1)
+		& ~(L1_CACHE_BYTES-1);
+	cnt = (end - v) / L1_CACHE_BYTES;
+
+	while (cnt >= 8) {
+		__ocbi(v); v += L1_CACHE_BYTES;
+		__ocbi(v); v += L1_CACHE_BYTES;
+		__ocbi(v); v += L1_CACHE_BYTES;
+		__ocbi(v); v += L1_CACHE_BYTES;
+		__ocbi(v); v += L1_CACHE_BYTES;
+		__ocbi(v); v += L1_CACHE_BYTES;
+		__ocbi(v); v += L1_CACHE_BYTES;
+		__ocbi(v); v += L1_CACHE_BYTES;
+		cnt -= 8;
+	}
+
+	while (cnt) {
+		__ocbi(v); v += L1_CACHE_BYTES;
+		cnt--;
+	}
+}
+
+void __init sh4__flush_region_init(void)
+{
+	__flush_wback_region		= sh4__flush_wback_region;
+	__flush_invalidate_region	= sh4__flush_invalidate_region;
+	__flush_purge_region		= sh4__flush_purge_region;
+}
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index fe532ae..0a9b4d8 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -210,6 +210,9 @@
 			high_memory = node_high_memory;
 	}
 
+	/* Set this up early, so we can take care of the zero page */
+	cpu_cache_init();
+
 	/* clear the zero-page */
 	memset(empty_zero_page, 0, PAGE_SIZE);
 	__flush_wback_region(empty_zero_page, PAGE_SIZE);
@@ -230,8 +233,6 @@
 		datasize >> 10,
 		initsize >> 10);
 
-	p3_cache_init();
-
 	/* Initialize the vDSO */
 	vsyscall_init();
 }
diff --git a/arch/sh/mm/kmap.c b/arch/sh/mm/kmap.c
new file mode 100644
index 0000000..3eecf0d
--- /dev/null
+++ b/arch/sh/mm/kmap.c
@@ -0,0 +1,64 @@
+/*
+ * arch/sh/mm/kmap.c
+ *
+ * Copyright (C) 1999, 2000, 2002  Niibe Yutaka
+ * Copyright (C) 2002 - 2009  Paul Mundt
+ *
+ * Released under the terms of the GNU GPL v2.0.
+ */
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/fs.h>
+#include <linux/highmem.h>
+#include <linux/module.h>
+#include <asm/mmu_context.h>
+#include <asm/cacheflush.h>
+
+#define kmap_get_fixmap_pte(vaddr)                                     \
+	pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)), (vaddr))
+
+static pte_t *kmap_coherent_pte;
+
+void __init kmap_coherent_init(void)
+{
+	unsigned long vaddr;
+
+	if (!boot_cpu_data.dcache.n_aliases)
+		return;
+
+	/* cache the first coherent kmap pte */
+	vaddr = __fix_to_virt(FIX_CMAP_BEGIN);
+	kmap_coherent_pte = kmap_get_fixmap_pte(vaddr);
+}
+
+void *kmap_coherent(struct page *page, unsigned long addr)
+{
+	enum fixed_addresses idx;
+	unsigned long vaddr, flags;
+	pte_t pte;
+
+	BUG_ON(test_bit(PG_dcache_dirty, &page->flags));
+
+	inc_preempt_count();
+
+	idx = (addr & current_cpu_data.dcache.alias_mask) >> PAGE_SHIFT;
+	vaddr = __fix_to_virt(FIX_CMAP_END - idx);
+	pte = mk_pte(page, PAGE_KERNEL);
+
+	local_irq_save(flags);
+	flush_tlb_one(get_asid(), vaddr);
+	local_irq_restore(flags);
+
+	update_mmu_cache(NULL, vaddr, pte);
+
+	set_pte(kmap_coherent_pte - (FIX_CMAP_END - idx), pte);
+
+	return (void *)vaddr;
+}
+
+void kunmap_coherent(void)
+{
+	dec_preempt_count();
+	preempt_check_resched();
+}
diff --git a/arch/sh/mm/mmap.c b/arch/sh/mm/mmap.c
index 1b5fdfb..d2984fa 100644
--- a/arch/sh/mm/mmap.c
+++ b/arch/sh/mm/mmap.c
@@ -14,10 +14,10 @@
 #include <asm/page.h>
 #include <asm/processor.h>
 
-#ifdef CONFIG_MMU
 unsigned long shm_align_mask = PAGE_SIZE - 1;	/* Sane caches */
 EXPORT_SYMBOL(shm_align_mask);
 
+#ifdef CONFIG_MMU
 /*
  * To avoid cache aliases, we map the shared page with same color.
  */
diff --git a/arch/sh/mm/nommu.c b/arch/sh/mm/nommu.c
new file mode 100644
index 0000000..51b5403
--- /dev/null
+++ b/arch/sh/mm/nommu.c
@@ -0,0 +1,96 @@
+/*
+ * arch/sh/mm/nommu.c
+ *
+ * Various helper routines and stubs for MMUless SH.
+ *
+ * Copyright (C) 2002 - 2009 Paul Mundt
+ *
+ * Released under the terms of the GNU GPL v2.0.
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+#include <asm/page.h>
+#include <asm/uaccess.h>
+
+/*
+ * Nothing too terribly exciting here ..
+ */
+void copy_page(void *to, void *from)
+{
+	memcpy(to, from, PAGE_SIZE);
+}
+
+__kernel_size_t __copy_user(void *to, const void *from, __kernel_size_t n)
+{
+	memcpy(to, from, n);
+	return 0;
+}
+
+__kernel_size_t __clear_user(void *to, __kernel_size_t n)
+{
+	memset(to, 0, n);
+	return 0;
+}
+
+void local_flush_tlb_all(void)
+{
+	BUG();
+}
+
+void local_flush_tlb_mm(struct mm_struct *mm)
+{
+	BUG();
+}
+
+void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+			    unsigned long end)
+{
+	BUG();
+}
+
+void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
+{
+	BUG();
+}
+
+void local_flush_tlb_one(unsigned long asid, unsigned long page)
+{
+	BUG();
+}
+
+void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+	BUG();
+}
+
+void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte)
+{
+}
+
+void __init kmap_coherent_init(void)
+{
+}
+
+void *kmap_coherent(struct page *page, unsigned long addr)
+{
+	BUG();
+	return NULL;
+}
+
+void kunmap_coherent(void)
+{
+	BUG();
+}
+
+void __init page_table_range_init(unsigned long start, unsigned long end,
+				  pgd_t *pgd_base)
+{
+}
+
+void __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
+{
+}
diff --git a/arch/sh/mm/pg-nommu.c b/arch/sh/mm/pg-nommu.c
deleted file mode 100644
index 91ed4e6..0000000
--- a/arch/sh/mm/pg-nommu.c
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * arch/sh/mm/pg-nommu.c
- *
- * clear_page()/copy_page() implementation for MMUless SH.
- *
- * Copyright (C) 2003  Paul Mundt
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <asm/page.h>
-#include <asm/uaccess.h>
-
-void copy_page(void *to, void *from)
-{
-	memcpy(to, from, PAGE_SIZE);
-}
-
-void clear_page(void *to)
-{
-	memset(to, 0, PAGE_SIZE);
-}
-
-__kernel_size_t __copy_user(void *to, const void *from, __kernel_size_t n)
-{
-	memcpy(to, from, n);
-	return 0;
-}
-
-__kernel_size_t __clear_user(void *to, __kernel_size_t n)
-{
-	memset(to, 0, n);
-	return 0;
-}
diff --git a/arch/sh/mm/pg-sh4.c b/arch/sh/mm/pg-sh4.c
deleted file mode 100644
index 2fe14da..0000000
--- a/arch/sh/mm/pg-sh4.c
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- * arch/sh/mm/pg-sh4.c
- *
- * Copyright (C) 1999, 2000, 2002  Niibe Yutaka
- * Copyright (C) 2002 - 2007  Paul Mundt
- *
- * Released under the terms of the GNU GPL v2.0.
- */
-#include <linux/mm.h>
-#include <linux/init.h>
-#include <linux/mutex.h>
-#include <linux/fs.h>
-#include <linux/highmem.h>
-#include <linux/module.h>
-#include <asm/mmu_context.h>
-#include <asm/cacheflush.h>
-
-#define CACHE_ALIAS (current_cpu_data.dcache.alias_mask)
-
-#define kmap_get_fixmap_pte(vaddr)                                     \
-	pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)), (vaddr))
-
-static pte_t *kmap_coherent_pte;
-
-void __init kmap_coherent_init(void)
-{
-	unsigned long vaddr;
-
-	/* cache the first coherent kmap pte */
-	vaddr = __fix_to_virt(FIX_CMAP_BEGIN);
-	kmap_coherent_pte = kmap_get_fixmap_pte(vaddr);
-}
-
-static inline void *kmap_coherent(struct page *page, unsigned long addr)
-{
-	enum fixed_addresses idx;
-	unsigned long vaddr, flags;
-	pte_t pte;
-
-	inc_preempt_count();
-
-	idx = (addr & current_cpu_data.dcache.alias_mask) >> PAGE_SHIFT;
-	vaddr = __fix_to_virt(FIX_CMAP_END - idx);
-	pte = mk_pte(page, PAGE_KERNEL);
-
-	local_irq_save(flags);
-	flush_tlb_one(get_asid(), vaddr);
-	local_irq_restore(flags);
-
-	update_mmu_cache(NULL, vaddr, pte);
-
-	set_pte(kmap_coherent_pte - (FIX_CMAP_END - idx), pte);
-
-	return (void *)vaddr;
-}
-
-static inline void kunmap_coherent(struct page *page)
-{
-	dec_preempt_count();
-	preempt_check_resched();
-}
-
-/*
- * clear_user_page
- * @to: P1 address
- * @address: U0 address to be mapped
- * @page: page (virt_to_page(to))
- */
-void clear_user_page(void *to, unsigned long address, struct page *page)
-{
-	__set_bit(PG_mapped, &page->flags);
-
-	clear_page(to);
-	if ((((address & PAGE_MASK) ^ (unsigned long)to) & CACHE_ALIAS))
-		__flush_wback_region(to, PAGE_SIZE);
-}
-
-void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
-		       unsigned long vaddr, void *dst, const void *src,
-		       unsigned long len)
-{
-	void *vto;
-
-	__set_bit(PG_mapped, &page->flags);
-
-	vto = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK);
-	memcpy(vto, src, len);
-	kunmap_coherent(vto);
-
-	if (vma->vm_flags & VM_EXEC)
-		flush_cache_page(vma, vaddr, page_to_pfn(page));
-}
-
-void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
-			 unsigned long vaddr, void *dst, const void *src,
-			 unsigned long len)
-{
-	void *vfrom;
-
-	__set_bit(PG_mapped, &page->flags);
-
-	vfrom = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK);
-	memcpy(dst, vfrom, len);
-	kunmap_coherent(vfrom);
-}
-
-void copy_user_highpage(struct page *to, struct page *from,
-			unsigned long vaddr, struct vm_area_struct *vma)
-{
-	void *vfrom, *vto;
-
-	__set_bit(PG_mapped, &to->flags);
-
-	vto = kmap_atomic(to, KM_USER1);
-	vfrom = kmap_coherent(from, vaddr);
-	copy_page(vto, vfrom);
-	kunmap_coherent(vfrom);
-
-	if (((vaddr ^ (unsigned long)vto) & CACHE_ALIAS))
-		__flush_wback_region(vto, PAGE_SIZE);
-
-	kunmap_atomic(vto, KM_USER1);
-	/* Make sure this page is cleared on other CPU's too before using it */
-	smp_wmb();
-}
-EXPORT_SYMBOL(copy_user_highpage);
-
-/*
- * For SH-4, we have our own implementation for ptep_get_and_clear
- */
-pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
-{
-	pte_t pte = *ptep;
-
-	pte_clear(mm, addr, ptep);
-	if (!pte_not_present(pte)) {
-		unsigned long pfn = pte_pfn(pte);
-		if (pfn_valid(pfn)) {
-			struct page *page = pfn_to_page(pfn);
-			struct address_space *mapping = page_mapping(page);
-			if (!mapping || !mapping_writably_mapped(mapping))
-				__clear_bit(PG_mapped, &page->flags);
-		}
-	}
-	return pte;
-}
diff --git a/arch/sh/mm/pg-sh7705.c b/arch/sh/mm/pg-sh7705.c
deleted file mode 100644
index eaf2514..0000000
--- a/arch/sh/mm/pg-sh7705.c
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * arch/sh/mm/pg-sh7705.c
- *
- * Copyright (C) 1999, 2000  Niibe Yutaka
- * Copyright (C) 2004  Alex Song
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- */
-
-#include <linux/init.h>
-#include <linux/mman.h>
-#include <linux/mm.h>
-#include <linux/threads.h>
-#include <linux/fs.h>
-#include <asm/addrspace.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/processor.h>
-#include <asm/cache.h>
-#include <asm/io.h>
-#include <asm/uaccess.h>
-#include <asm/pgalloc.h>
-#include <asm/mmu_context.h>
-#include <asm/cacheflush.h>
-
-static inline void __flush_purge_virtual_region(void *p1, void *virt, int size)
-{
-	unsigned long v;
-	unsigned long begin, end;
-	unsigned long p1_begin;
-
-
-	begin = L1_CACHE_ALIGN((unsigned long)virt);
-	end = L1_CACHE_ALIGN((unsigned long)virt + size);
-
-	p1_begin = (unsigned long)p1 & ~(L1_CACHE_BYTES - 1);
-
-	/* do this the slow way as we may not have TLB entries
-	 * for virt yet. */
-	for (v = begin; v < end; v += L1_CACHE_BYTES) {
-		unsigned long p;
-	        unsigned long ways, addr;
-
-		p = __pa(p1_begin);
-
-	        ways = current_cpu_data.dcache.ways;
-		addr = CACHE_OC_ADDRESS_ARRAY;
-
-		do {
-			unsigned long data;
-
-			addr |= (v & current_cpu_data.dcache.entry_mask);
-
-			data = ctrl_inl(addr);
-			if ((data & CACHE_PHYSADDR_MASK) ==
-			       (p & CACHE_PHYSADDR_MASK)) {
-				data &= ~(SH_CACHE_UPDATED|SH_CACHE_VALID);
-				ctrl_outl(data, addr);
-			}
-
-			addr += current_cpu_data.dcache.way_incr;
-		} while (--ways);
-
-		p1_begin += L1_CACHE_BYTES;
-	}
-}
-
-/*
- * clear_user_page
- * @to: P1 address
- * @address: U0 address to be mapped
- */
-void clear_user_page(void *to, unsigned long address, struct page *pg)
-{
-	struct page *page = virt_to_page(to);
-
-	__set_bit(PG_mapped, &page->flags);
-	if (((address ^ (unsigned long)to) & CACHE_ALIAS) == 0) {
-		clear_page(to);
-		__flush_wback_region(to, PAGE_SIZE);
-	} else {
-		__flush_purge_virtual_region(to,
-					     (void *)(address & 0xfffff000),
-					     PAGE_SIZE);
-		clear_page(to);
-		__flush_wback_region(to, PAGE_SIZE);
-	}
-}
-
-/*
- * copy_user_page
- * @to: P1 address
- * @from: P1 address
- * @address: U0 address to be mapped
- */
-void copy_user_page(void *to, void *from, unsigned long address, struct page *pg)
-{
-	struct page *page = virt_to_page(to);
-
-
-	__set_bit(PG_mapped, &page->flags);
-	if (((address ^ (unsigned long)to) & CACHE_ALIAS) == 0) {
-		copy_page(to, from);
-		__flush_wback_region(to, PAGE_SIZE);
-	} else {
-		__flush_purge_virtual_region(to,
-					     (void *)(address & 0xfffff000),
-					     PAGE_SIZE);
-		copy_page(to, from);
-		__flush_wback_region(to, PAGE_SIZE);
-	}
-}
-
-/*
- * For SH7705, we have our own implementation for ptep_get_and_clear
- * Copied from pg-sh4.c
- */
-pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
-{
-	pte_t pte = *ptep;
-
-	pte_clear(mm, addr, ptep);
-	if (!pte_not_present(pte)) {
-		unsigned long pfn = pte_pfn(pte);
-		if (pfn_valid(pfn)) {
-			struct page *page = pfn_to_page(pfn);
-			struct address_space *mapping = page_mapping(page);
-			if (!mapping || !mapping_writably_mapped(mapping))
-				__clear_bit(PG_mapped, &page->flags);
-		}
-	}
-
-	return pte;
-}
-
diff --git a/arch/sh/mm/tlb-nommu.c b/arch/sh/mm/tlb-nommu.c
deleted file mode 100644
index 71c742b..0000000
--- a/arch/sh/mm/tlb-nommu.c
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * arch/sh/mm/tlb-nommu.c
- *
- * TLB Operations for MMUless SH.
- *
- * Copyright (C) 2002 Paul Mundt
- *
- * Released under the terms of the GNU GPL v2.0.
- */
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <asm/pgtable.h>
-#include <asm/tlbflush.h>
-
-/*
- * Nothing too terribly exciting here ..
- */
-void local_flush_tlb_all(void)
-{
-	BUG();
-}
-
-void local_flush_tlb_mm(struct mm_struct *mm)
-{
-	BUG();
-}
-
-void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
-			    unsigned long end)
-{
-	BUG();
-}
-
-void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
-{
-	BUG();
-}
-
-void local_flush_tlb_one(unsigned long asid, unsigned long page)
-{
-	BUG();
-}
-
-void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)
-{
-	BUG();
-}
-
-void update_mmu_cache(struct vm_area_struct * vma,
-		      unsigned long address, pte_t pte)
-{
-	BUG();
-}
-
-void __init page_table_range_init(unsigned long start, unsigned long end,
-				  pgd_t *pgd_base)
-{
-}
-
-void __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
-{
-}
diff --git a/arch/sh/mm/tlb-pteaex.c b/arch/sh/mm/tlb-pteaex.c
index 2aab3ea..409b7c2 100644
--- a/arch/sh/mm/tlb-pteaex.c
+++ b/arch/sh/mm/tlb-pteaex.c
@@ -16,34 +16,16 @@
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
 
-void update_mmu_cache(struct vm_area_struct * vma,
-		      unsigned long address, pte_t pte)
+void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte)
 {
-	unsigned long flags;
-	unsigned long pteval;
-	unsigned long vpn;
+	unsigned long flags, pteval, vpn;
 
-	/* Ptrace may call this routine. */
+	/*
+	 * Handle debugger faulting in for debugee.
+	 */
 	if (vma && current->active_mm != vma->vm_mm)
 		return;
 
-#ifndef CONFIG_CACHE_OFF
-	{
-		unsigned long pfn = pte_pfn(pte);
-
-		if (pfn_valid(pfn)) {
-			struct page *page = pfn_to_page(pfn);
-
-			if (!test_bit(PG_mapped, &page->flags)) {
-				unsigned long phys = pte_val(pte) & PTE_PHYS_MASK;
-				__flush_wback_region((void *)P1SEGADDR(phys),
-						     PAGE_SIZE);
-				__set_bit(PG_mapped, &page->flags);
-			}
-		}
-	}
-#endif
-
 	local_irq_save(flags);
 
 	/* Set PTEH register */
diff --git a/arch/sh/mm/tlb-sh3.c b/arch/sh/mm/tlb-sh3.c
index 17cb7c3..ace8e6d 100644
--- a/arch/sh/mm/tlb-sh3.c
+++ b/arch/sh/mm/tlb-sh3.c
@@ -27,32 +27,16 @@
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
 
-void update_mmu_cache(struct vm_area_struct * vma,
-		      unsigned long address, pte_t pte)
+void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte)
 {
-	unsigned long flags;
-	unsigned long pteval;
-	unsigned long vpn;
+	unsigned long flags, pteval, vpn;
 
-	/* Ptrace may call this routine. */
+	/*
+	 * Handle debugger faulting in for debugee.
+	 */
 	if (vma && current->active_mm != vma->vm_mm)
 		return;
 
-#if defined(CONFIG_SH7705_CACHE_32KB)
-	{
-		struct page *page = pte_page(pte);
-		unsigned long pfn = pte_pfn(pte);
-
-		if (pfn_valid(pfn) && !test_bit(PG_mapped, &page->flags)) {
-			unsigned long phys = pte_val(pte) & PTE_PHYS_MASK;
-
-			__flush_wback_region((void *)P1SEGADDR(phys),
-					     PAGE_SIZE);
-			__set_bit(PG_mapped, &page->flags);
-		}
-	}
-#endif
-
 	local_irq_save(flags);
 
 	/* Set PTEH register */
@@ -93,4 +77,3 @@
 	for (i = 0; i < ways; i++)
 		ctrl_outl(data, addr + (i << 8));
 }
-
diff --git a/arch/sh/mm/tlb-sh4.c b/arch/sh/mm/tlb-sh4.c
index fd0d11f..8cf550e 100644
--- a/arch/sh/mm/tlb-sh4.c
+++ b/arch/sh/mm/tlb-sh4.c
@@ -15,34 +15,16 @@
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
 
-void update_mmu_cache(struct vm_area_struct * vma,
-		      unsigned long address, pte_t pte)
+void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte)
 {
-	unsigned long flags;
-	unsigned long pteval;
-	unsigned long vpn;
+	unsigned long flags, pteval, vpn;
 
-	/* Ptrace may call this routine. */
+	/*
+	 * Handle debugger faulting in for debugee.
+	 */
 	if (vma && current->active_mm != vma->vm_mm)
 		return;
 
-#ifndef CONFIG_CACHE_OFF
-	{
-		unsigned long pfn = pte_pfn(pte);
-
-		if (pfn_valid(pfn)) {
-			struct page *page = pfn_to_page(pfn);
-
-			if (!test_bit(PG_mapped, &page->flags)) {
-				unsigned long phys = pte_val(pte) & PTE_PHYS_MASK;
-				__flush_wback_region((void *)P1SEGADDR(phys),
-						     PAGE_SIZE);
-				__set_bit(PG_mapped, &page->flags);
-			}
-		}
-	}
-#endif
-
 	local_irq_save(flags);
 
 	/* Set PTEH register */
diff --git a/arch/sh/mm/tlb-sh5.c b/arch/sh/mm/tlb-sh5.c
index dae1312..fdb64e4 100644
--- a/arch/sh/mm/tlb-sh5.c
+++ b/arch/sh/mm/tlb-sh5.c
@@ -117,26 +117,15 @@
  * Load up a virtual<->physical translation for @eaddr<->@paddr in the
  * pre-allocated TLB slot @config_addr (see sh64_get_wired_dtlb_entry).
  */
-inline void sh64_setup_tlb_slot(unsigned long long config_addr,
-				unsigned long eaddr,
-				unsigned long asid,
-				unsigned long paddr)
+void sh64_setup_tlb_slot(unsigned long long config_addr, unsigned long eaddr,
+			 unsigned long asid, unsigned long paddr)
 {
 	unsigned long long pteh, ptel;
 
-	/* Sign extension */
-#if (NEFF == 32)
-	pteh = (unsigned long long)(signed long long)(signed long) eaddr;
-#else
-#error "Can't sign extend more than 32 bits yet"
-#endif
+	pteh = neff_sign_extend(eaddr);
 	pteh &= PAGE_MASK;
 	pteh |= (asid << PTEH_ASID_SHIFT) | PTEH_VALID;
-#if (NEFF == 32)
-	ptel = (unsigned long long)(signed long long)(signed long) paddr;
-#else
-#error "Can't sign extend more than 32 bits yet"
-#endif
+	ptel = neff_sign_extend(paddr);
 	ptel &= PAGE_MASK;
 	ptel |= (_PAGE_CACHABLE | _PAGE_READ | _PAGE_WRITE);
 
@@ -152,5 +141,5 @@
  *
  * Teardown any existing mapping in the TLB slot @config_addr.
  */
-inline void sh64_teardown_tlb_slot(unsigned long long config_addr)
+void sh64_teardown_tlb_slot(unsigned long long config_addr)
 	__attribute__ ((alias("__flush_tlb_slot")));
diff --git a/arch/sh/mm/tlbflush_64.c b/arch/sh/mm/tlbflush_64.c
index 3ce40ea..2dcc485 100644
--- a/arch/sh/mm/tlbflush_64.c
+++ b/arch/sh/mm/tlbflush_64.c
@@ -329,22 +329,6 @@
 		goto no_context;
 }
 
-void update_mmu_cache(struct vm_area_struct * vma,
-			unsigned long address, pte_t pte)
-{
-	/*
-	 * This appears to get called once for every pte entry that gets
-	 * established => I don't think it's efficient to try refilling the
-	 * TLBs with the pages - some may not get accessed even.  Also, for
-	 * executable pages, it is impossible to determine reliably here which
-	 * TLB they should be mapped into (or both even).
-	 *
-	 * So, just do nothing here and handle faults on demand.  In the
-	 * TLBMISS handling case, the refill is now done anyway after the pte
-	 * has been fixed up, so that deals with most useful cases.
-	 */
-}
-
 void local_flush_tlb_one(unsigned long asid, unsigned long page)
 {
 	unsigned long long match, pteh=0, lpage;
@@ -353,7 +337,7 @@
 	/*
 	 * Sign-extend based on neff.
 	 */
-	lpage = (page & NEFF_SIGN) ? (page | NEFF_MASK) : page;
+	lpage = neff_sign_extend(page);
 	match = (asid << PTEH_ASID_SHIFT) | PTEH_VALID;
 	match |= lpage;
 
@@ -482,3 +466,7 @@
         /* FIXME: Optimize this later.. */
         flush_tlb_all();
 }
+
+void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte)
+{
+}