Merge branch 'master' into sh/cachetlb
diff --git a/arch/sh/include/asm/cacheflush.h b/arch/sh/include/asm/cacheflush.h
index 4c5462d..5dffbd1 100644
--- a/arch/sh/include/asm/cacheflush.h
+++ b/arch/sh/include/asm/cacheflush.h
@@ -1,6 +1,8 @@
 #ifndef __ASM_SH_CACHEFLUSH_H
 #define __ASM_SH_CACHEFLUSH_H
 
+#include <linux/mm.h>
+
 #ifdef __KERNEL__
 
 #ifdef CONFIG_CACHE_OFF
@@ -43,13 +45,24 @@
 extern void __flush_invalidate_region(void *start, int size);
 #endif
 
+#ifdef CONFIG_MMU
+#define ARCH_HAS_FLUSH_ANON_PAGE
+extern void __flush_anon_page(struct page *page, unsigned long);
+
+static inline void flush_anon_page(struct vm_area_struct *vma,
+				   struct page *page, unsigned long vmaddr)
+{
+	if (boot_cpu_data.dcache.n_aliases && PageAnon(page))
+		__flush_anon_page(page, vmaddr);
+}
+#endif
+
 #define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
 static inline void flush_kernel_dcache_page(struct page *page)
 {
 	flush_dcache_page(page);
 }
 
-#if defined(CONFIG_CPU_SH4) && !defined(CONFIG_CACHE_OFF)
 extern void copy_to_user_page(struct vm_area_struct *vma,
 	struct page *page, unsigned long vaddr, void *dst, const void *src,
 	unsigned long len);
@@ -57,20 +70,6 @@
 extern void copy_from_user_page(struct vm_area_struct *vma,
 	struct page *page, unsigned long vaddr, void *dst, const void *src,
 	unsigned long len);
-#else
-#define copy_to_user_page(vma, page, vaddr, dst, src, len)	\
-	do {							\
-		flush_cache_page(vma, vaddr, page_to_pfn(page));\
-		memcpy(dst, src, len);				\
-		flush_icache_user_range(vma, page, vaddr, len);	\
-	} while (0)
-
-#define copy_from_user_page(vma, page, vaddr, dst, src, len)	\
-	do {							\
-		flush_cache_page(vma, vaddr, page_to_pfn(page));\
-		memcpy(dst, src, len);				\
-	} while (0)
-#endif
 
 #define flush_cache_vmap(start, end)		flush_cache_all()
 #define flush_cache_vunmap(start, end)		flush_cache_all()
diff --git a/arch/sh/include/asm/page.h b/arch/sh/include/asm/page.h
index 49592c7..847eeab 100644
--- a/arch/sh/include/asm/page.h
+++ b/arch/sh/include/asm/page.h
@@ -50,25 +50,36 @@
 extern unsigned long max_low_pfn, min_low_pfn;
 extern unsigned long memory_start, memory_end;
 
-extern void clear_page(void *to);
+static inline unsigned long
+pages_do_alias(unsigned long addr1, unsigned long addr2)
+{
+	return (addr1 ^ addr2) & shm_align_mask;
+}
+
+
+#define clear_page(page)	memset((void *)(page), 0, PAGE_SIZE)
 extern void copy_page(void *to, void *from);
 
-#if !defined(CONFIG_CACHE_OFF) && defined(CONFIG_MMU) && \
-	(defined(CONFIG_CPU_SH5) || defined(CONFIG_CPU_SH4) || \
-	 defined(CONFIG_SH7705_CACHE_32KB))
 struct page;
 struct vm_area_struct;
+
+#if defined(CONFIG_CPU_SH5)
 extern void clear_user_page(void *to, unsigned long address, struct page *page);
 extern void copy_user_page(void *to, void *from, unsigned long address,
 			   struct page *page);
-#if defined(CONFIG_CPU_SH4)
+
+#elif defined(CONFIG_MMU)
 extern void copy_user_highpage(struct page *to, struct page *from,
 			       unsigned long vaddr, struct vm_area_struct *vma);
 #define __HAVE_ARCH_COPY_USER_HIGHPAGE
-#endif
+extern void clear_user_highpage(struct page *page, unsigned long vaddr);
+#define clear_user_highpage	clear_user_highpage
+
 #else
+
 #define clear_user_page(page, vaddr, pg)	clear_page(page)
 #define copy_user_page(to, from, vaddr, pg)	copy_page(to, from)
+
 #endif
 
 /*
diff --git a/arch/sh/include/asm/pgtable.h b/arch/sh/include/asm/pgtable.h
index 2a011b1..3cd7127 100644
--- a/arch/sh/include/asm/pgtable.h
+++ b/arch/sh/include/asm/pgtable.h
@@ -36,6 +36,12 @@
 #define	NEFF_SIGN	(1LL << (NEFF - 1))
 #define	NEFF_MASK	(-1LL << NEFF)
 
+static inline unsigned long long neff_sign_extend(unsigned long val)
+{
+	unsigned long long extended = val;
+	return (extended & NEFF_SIGN) ? (extended | NEFF_MASK) : extended;
+}
+
 #ifdef CONFIG_29BIT
 #define NPHYS		29
 #else
@@ -133,22 +139,26 @@
  */
 #define pgtable_cache_init()	do { } while (0)
 
-#if !defined(CONFIG_CACHE_OFF) && (defined(CONFIG_CPU_SH4) || \
-	defined(CONFIG_SH7705_CACHE_32KB))
-struct mm_struct;
-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
-pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
-#endif
-
 struct vm_area_struct;
-extern void update_mmu_cache(struct vm_area_struct * vma,
-			     unsigned long address, pte_t pte);
+
+extern void __update_cache(struct vm_area_struct *vma,
+			   unsigned long address, pte_t pte);
+extern void __update_tlb(struct vm_area_struct *vma,
+			 unsigned long address, pte_t pte);
+
+static inline void
+update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t pte)
+{
+	__update_cache(vma, address, pte);
+	__update_tlb(vma, address, pte);
+}
+
 extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
 extern void paging_init(void);
 extern void page_table_range_init(unsigned long start, unsigned long end,
 				  pgd_t *pgd);
 
-#if !defined(CONFIG_CACHE_OFF) && defined(CONFIG_CPU_SH4) && defined(CONFIG_MMU)
+#if defined(CONFIG_MMU) && !defined(CONFIG_CPU_SH5)
 extern void kmap_coherent_init(void);
 #else
 #define kmap_coherent_init()	do { } while (0)
diff --git a/arch/sh/include/asm/system_32.h b/arch/sh/include/asm/system_32.h
index 6c68a51..d7299d6 100644
--- a/arch/sh/include/asm/system_32.h
+++ b/arch/sh/include/asm/system_32.h
@@ -198,6 +198,11 @@
 })
 #endif
 
+static inline reg_size_t register_align(void *val)
+{
+	return (unsigned long)(signed long)val;
+}
+
 int handle_unaligned_access(insn_size_t instruction, struct pt_regs *regs,
 			    struct mem_access *ma);
 
diff --git a/arch/sh/include/asm/system_64.h b/arch/sh/include/asm/system_64.h
index 943acf5..218b54d 100644
--- a/arch/sh/include/asm/system_64.h
+++ b/arch/sh/include/asm/system_64.h
@@ -37,4 +37,9 @@
 #define jump_to_uncached()	do { } while (0)
 #define back_to_cached()	do { } while (0)
 
+static inline reg_size_t register_align(void *val)
+{
+	return (unsigned long long)(signed long long)(signed long)val;
+}
+
 #endif /* __ASM_SH_SYSTEM_64_H */
diff --git a/arch/sh/include/asm/types.h b/arch/sh/include/asm/types.h
index c7f3c94..f8421f7 100644
--- a/arch/sh/include/asm/types.h
+++ b/arch/sh/include/asm/types.h
@@ -11,8 +11,10 @@
 
 #ifdef CONFIG_SUPERH32
 typedef u16 insn_size_t;
+typedef u32 reg_size_t;
 #else
 typedef u32 insn_size_t;
+typedef u64 reg_size_t;
 #endif
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/sh/include/cpu-sh3/cpu/cacheflush.h b/arch/sh/include/cpu-sh3/cpu/cacheflush.h
index 1ac27aa..6485ad5 100644
--- a/arch/sh/include/cpu-sh3/cpu/cacheflush.h
+++ b/arch/sh/include/cpu-sh3/cpu/cacheflush.h
@@ -15,10 +15,7 @@
  * SH4. Unlike the SH4 this is a unified cache so we need to do some work
  * in mmap when 'exec'ing a new binary
  */
- /* 32KB cache, 4kb PAGE sizes need to check bit 12 */
-#define CACHE_ALIAS 0x00001000
-
-#define PG_mapped	PG_arch_1
+#define PG_dcache_dirty	PG_arch_1
 
 void flush_cache_all(void);
 void flush_cache_mm(struct mm_struct *mm);
diff --git a/arch/sh/include/cpu-sh4/cpu/cacheflush.h b/arch/sh/include/cpu-sh4/cpu/cacheflush.h
index 065306d..3564f17 100644
--- a/arch/sh/include/cpu-sh4/cpu/cacheflush.h
+++ b/arch/sh/include/cpu-sh4/cpu/cacheflush.h
@@ -38,6 +38,6 @@
 /* Initialization of P3 area for copy_user_page */
 void p3_cache_init(void);
 
-#define PG_mapped	PG_arch_1
+#define PG_dcache_dirty	PG_arch_1
 
 #endif /* __ASM_CPU_SH4_CACHEFLUSH_H */
diff --git a/arch/sh/kernel/cpu/sh3/entry.S b/arch/sh/kernel/cpu/sh3/entry.S
index 67ad646..854921c 100644
--- a/arch/sh/kernel/cpu/sh3/entry.S
+++ b/arch/sh/kernel/cpu/sh3/entry.S
@@ -113,35 +113,34 @@
 #if defined(CONFIG_MMU)
 	.align	2
 ENTRY(tlb_miss_load)
-	bra	call_dpf
+	bra	call_handle_tlbmiss
 	 mov	#0, r5
 
 	.align	2
 ENTRY(tlb_miss_store)
-	bra	call_dpf
+	bra	call_handle_tlbmiss
 	 mov	#1, r5
 
 	.align	2
 ENTRY(initial_page_write)
-	bra	call_dpf
-	 mov	#1, r5
+	bra	call_handle_tlbmiss
+	 mov	#2, r5
 
 	.align	2
 ENTRY(tlb_protection_violation_load)
-	bra	call_dpf
+	bra	call_do_page_fault
 	 mov	#0, r5
 
 	.align	2
 ENTRY(tlb_protection_violation_store)
-	bra	call_dpf
+	bra	call_do_page_fault
 	 mov	#1, r5
 
-call_dpf:
+call_handle_tlbmiss:
 	setup_frame_reg
 	mov.l	1f, r0
 	mov	r5, r8
 	mov.l	@r0, r6
-	mov	r6, r9
 	mov.l	2f, r0
 	sts	pr, r10
 	jsr	@r0
@@ -152,16 +151,25 @@
 	 lds	r10, pr
 	rts
 	 nop
-0:	mov.l	3f, r0
-	mov	r9, r6
+0:
 	mov	r8, r5
+call_do_page_fault:
+	mov.l	1f, r0
+	mov.l	@r0, r6
+
+	sti
+
+	mov.l	3f, r0
+	mov.l	4f, r1
+	mov	r15, r4
 	jmp	@r0
-	 mov	r15, r4
+	 lds	r1, pr
 
 	.align 2
 1:	.long	MMU_TEA
-2:	.long	__do_page_fault
+2:	.long	handle_tlbmiss
 3:	.long	do_page_fault
+4:	.long	ret_from_exception
 
 	.align	2
 ENTRY(address_error_load)
diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c
index 24de742..1192398 100644
--- a/arch/sh/kernel/process_64.c
+++ b/arch/sh/kernel/process_64.c
@@ -425,7 +425,6 @@
 		struct task_struct *p, struct pt_regs *regs)
 {
 	struct pt_regs *childregs;
-	unsigned long long se;			/* Sign extension */
 
 #ifdef CONFIG_SH_FPU
 	if(last_task_used_math == current) {
@@ -441,11 +440,19 @@
 
 	*childregs = *regs;
 
+	/*
+	 * Sign extend the edited stack.
+	 * Note that thread.pc and thread.pc will stay
+	 * 32-bit wide and context switch must take care
+	 * of NEFF sign extension.
+	 */
 	if (user_mode(regs)) {
-		childregs->regs[15] = usp;
+		childregs->regs[15] = neff_sign_extend(usp);
 		p->thread.uregs = childregs;
 	} else {
-		childregs->regs[15] = (unsigned long)task_stack_page(p) + THREAD_SIZE;
+		childregs->regs[15] =
+			neff_sign_extend((unsigned long)task_stack_page(p) +
+					 THREAD_SIZE);
 	}
 
 	childregs->regs[9] = 0; /* Set return value for child */
@@ -454,17 +461,6 @@
 	p->thread.sp = (unsigned long) childregs;
 	p->thread.pc = (unsigned long) ret_from_fork;
 
-	/*
-	 * Sign extend the edited stack.
-         * Note that thread.pc and thread.pc will stay
-	 * 32-bit wide and context switch must take care
-	 * of NEFF sign extension.
-	 */
-
-	se = childregs->regs[15];
-	se = (se & NEFF_SIGN) ? (se | NEFF_MASK) : se;
-	childregs->regs[15] = se;
-
 	return 0;
 }
 
diff --git a/arch/sh/kernel/sh_ksyms_32.c b/arch/sh/kernel/sh_ksyms_32.c
index cec6108..8dbe26b 100644
--- a/arch/sh/kernel/sh_ksyms_32.c
+++ b/arch/sh/kernel/sh_ksyms_32.c
@@ -101,11 +101,6 @@
 EXPORT_SYMBOL(flush_dcache_page);
 #endif
 
-#if !defined(CONFIG_CACHE_OFF) && defined(CONFIG_MMU) && \
-	(defined(CONFIG_CPU_SH4) || defined(CONFIG_SH7705_CACHE_32KB))
-EXPORT_SYMBOL(clear_user_page);
-#endif
-
 #ifdef CONFIG_MCOUNT
 DECLARE_EXPORT(mcount);
 #endif
@@ -114,7 +109,6 @@
 #ifdef CONFIG_IPV6
 EXPORT_SYMBOL(csum_ipv6_magic);
 #endif
-EXPORT_SYMBOL(clear_page);
 EXPORT_SYMBOL(copy_page);
 EXPORT_SYMBOL(__clear_user);
 EXPORT_SYMBOL(_ebss);
diff --git a/arch/sh/kernel/sh_ksyms_64.c b/arch/sh/kernel/sh_ksyms_64.c
index f5bd156..f96c95c 100644
--- a/arch/sh/kernel/sh_ksyms_64.c
+++ b/arch/sh/kernel/sh_ksyms_64.c
@@ -52,7 +52,6 @@
 EXPORT_SYMBOL(__get_user_asm_q);
 EXPORT_SYMBOL(__strnlen_user);
 EXPORT_SYMBOL(__strncpy_from_user);
-EXPORT_SYMBOL(clear_page);
 EXPORT_SYMBOL(__clear_user);
 EXPORT_SYMBOL(copy_page);
 EXPORT_SYMBOL(__copy_user);
diff --git a/arch/sh/kernel/signal_64.c b/arch/sh/kernel/signal_64.c
index 0663a0e..026fd1c 100644
--- a/arch/sh/kernel/signal_64.c
+++ b/arch/sh/kernel/signal_64.c
@@ -561,13 +561,11 @@
 	/* Set up to return from userspace.  If provided, use a stub
 	   already in userspace.  */
 	if (ka->sa.sa_flags & SA_RESTORER) {
-		DEREF_REG_PR = (unsigned long) ka->sa.sa_restorer | 0x1;
-
 		/*
 		 * On SH5 all edited pointers are subject to NEFF
 		 */
-		DEREF_REG_PR = (DEREF_REG_PR & NEFF_SIGN) ?
-			(DEREF_REG_PR | NEFF_MASK) : DEREF_REG_PR;
+		DEREF_REG_PR = neff_sign_extend((unsigned long)
+			ka->sa.sa_restorer | 0x1);
 	} else {
 		/*
 		 * Different approach on SH5.
@@ -580,9 +578,8 @@
 		 * . being code, linker turns ShMedia bit on, always
 		 *   dereference index -1.
 		 */
-		DEREF_REG_PR = (unsigned long) frame->retcode | 0x01;
-		DEREF_REG_PR = (DEREF_REG_PR & NEFF_SIGN) ?
-			(DEREF_REG_PR | NEFF_MASK) : DEREF_REG_PR;
+		DEREF_REG_PR = neff_sign_extend((unsigned long)
+			frame->retcode | 0x01);
 
 		if (__copy_to_user(frame->retcode,
 			(void *)((unsigned long)sa_default_restorer & (~1)), 16) != 0)
@@ -596,9 +593,7 @@
 	 * Set up registers for signal handler.
 	 * All edited pointers are subject to NEFF.
 	 */
-	regs->regs[REG_SP] = (unsigned long) frame;
-	regs->regs[REG_SP] = (regs->regs[REG_SP] & NEFF_SIGN) ?
-		 (regs->regs[REG_SP] | NEFF_MASK) : regs->regs[REG_SP];
+	regs->regs[REG_SP] = neff_sign_extend((unsigned long)frame);
 	regs->regs[REG_ARG1] = signal; /* Arg for signal handler */
 
         /* FIXME:
@@ -613,8 +608,7 @@
 	regs->regs[REG_ARG2] = (unsigned long long)(unsigned long)(signed long)&frame->sc;
 	regs->regs[REG_ARG3] = (unsigned long long)(unsigned long)(signed long)&frame->sc;
 
-	regs->pc = (unsigned long) ka->sa.sa_handler;
-	regs->pc = (regs->pc & NEFF_SIGN) ? (regs->pc | NEFF_MASK) : regs->pc;
+	regs->pc = neff_sign_extend((unsigned long)ka->sa.sa_handler);
 
 	set_fs(USER_DS);
 
@@ -676,13 +670,11 @@
 	/* Set up to return from userspace.  If provided, use a stub
 	   already in userspace.  */
 	if (ka->sa.sa_flags & SA_RESTORER) {
-		DEREF_REG_PR = (unsigned long) ka->sa.sa_restorer | 0x1;
-
 		/*
 		 * On SH5 all edited pointers are subject to NEFF
 		 */
-		DEREF_REG_PR = (DEREF_REG_PR & NEFF_SIGN) ?
-			(DEREF_REG_PR | NEFF_MASK) : DEREF_REG_PR;
+		DEREF_REG_PR = neff_sign_extend((unsigned long)
+			ka->sa.sa_restorer | 0x1);
 	} else {
 		/*
 		 * Different approach on SH5.
@@ -695,15 +687,14 @@
 		 * . being code, linker turns ShMedia bit on, always
 		 *   dereference index -1.
 		 */
-
-		DEREF_REG_PR = (unsigned long) frame->retcode | 0x01;
-		DEREF_REG_PR = (DEREF_REG_PR & NEFF_SIGN) ?
-			(DEREF_REG_PR | NEFF_MASK) : DEREF_REG_PR;
+		DEREF_REG_PR = neff_sign_extend((unsigned long)
+			frame->retcode | 0x01);
 
 		if (__copy_to_user(frame->retcode,
 			(void *)((unsigned long)sa_default_rt_restorer & (~1)), 16) != 0)
 			goto give_sigsegv;
 
+		/* Cohere the trampoline with the I-cache. */
 		flush_icache_range(DEREF_REG_PR-1, DEREF_REG_PR-1+15);
 	}
 
@@ -711,14 +702,11 @@
 	 * Set up registers for signal handler.
 	 * All edited pointers are subject to NEFF.
 	 */
-	regs->regs[REG_SP] = (unsigned long) frame;
-	regs->regs[REG_SP] = (regs->regs[REG_SP] & NEFF_SIGN) ?
-		 (regs->regs[REG_SP] | NEFF_MASK) : regs->regs[REG_SP];
+	regs->regs[REG_SP] = neff_sign_extend((unsigned long)frame);
 	regs->regs[REG_ARG1] = signal; /* Arg for signal handler */
 	regs->regs[REG_ARG2] = (unsigned long long)(unsigned long)(signed long)&frame->info;
 	regs->regs[REG_ARG3] = (unsigned long long)(unsigned long)(signed long)&frame->uc.uc_mcontext;
-	regs->pc = (unsigned long) ka->sa.sa_handler;
-	regs->pc = (regs->pc & NEFF_SIGN) ? (regs->pc | NEFF_MASK) : regs->pc;
+	regs->pc = neff_sign_extend((unsigned long)ka->sa.sa_handler);
 
 	set_fs(USER_DS);
 
diff --git a/arch/sh/lib/Makefile b/arch/sh/lib/Makefile
index c2b28d8..a969b47 100644
--- a/arch/sh/lib/Makefile
+++ b/arch/sh/lib/Makefile
@@ -23,7 +23,7 @@
 memcpy-y			:= memcpy.o
 memcpy-$(CONFIG_CPU_SH4)	:= memcpy-sh4.o
 
-lib-$(CONFIG_MMU)		+= copy_page.o clear_page.o
+lib-$(CONFIG_MMU)		+= copy_page.o __clear_user.o
 lib-$(CONFIG_MCOUNT)		+= mcount.o
 lib-y				+= $(memcpy-y) $(udivsi3-y)
 
diff --git a/arch/sh/lib/clear_page.S b/arch/sh/lib/__clear_user.S
similarity index 76%
rename from arch/sh/lib/clear_page.S
rename to arch/sh/lib/__clear_user.S
index 8342bfb..bee9817 100644
--- a/arch/sh/lib/clear_page.S
+++ b/arch/sh/lib/__clear_user.S
@@ -8,52 +8,6 @@
 #include <linux/linkage.h>
 #include <asm/page.h>
 
-/*
- * clear_page
- * @to: P1 address
- *
- * void clear_page(void *to)
- */
-
-/*
- * r0 --- scratch
- * r4 --- to
- * r5 --- to + PAGE_SIZE
- */
-ENTRY(clear_page)
-	mov	r4,r5
-	mov.l	.Llimit,r0
-	add	r0,r5
-	mov	#0,r0
-	!
-1:
-#if defined(CONFIG_CPU_SH4)
-	movca.l	r0,@r4
-	mov	r4,r1
-#else
-	mov.l	r0,@r4
-#endif
-	add	#32,r4
-	mov.l	r0,@-r4
-	mov.l	r0,@-r4
-	mov.l	r0,@-r4
-	mov.l	r0,@-r4
-	mov.l	r0,@-r4
-	mov.l	r0,@-r4
-	mov.l	r0,@-r4
-#if defined(CONFIG_CPU_SH4)
-	ocbwb	@r1
-#endif
-	cmp/eq	r5,r4
-	bf/s	1b
-	 add	#28,r4
-	!
-	rts
-	 nop
-
-	.balign 4
-.Llimit:	.long	(PAGE_SIZE-28)
-
 ENTRY(__clear_user)
 	!
 	mov	#0, r0
diff --git a/arch/sh/lib64/Makefile b/arch/sh/lib64/Makefile
index 334bb2d..1fee75a 100644
--- a/arch/sh/lib64/Makefile
+++ b/arch/sh/lib64/Makefile
@@ -11,7 +11,7 @@
 
 # Panic should really be compiled as PIC
 lib-y  := udelay.o dbg.o panic.o memcpy.o memset.o \
-	  copy_user_memcpy.o copy_page.o clear_page.o strcpy.o strlen.o
+	  copy_user_memcpy.o copy_page.o strcpy.o strlen.o
 
 # Extracted from libgcc
 lib-y	+= udivsi3.o udivdi3.o sdivsi3.o
diff --git a/arch/sh/lib64/clear_page.S b/arch/sh/lib64/clear_page.S
deleted file mode 100644
index 007ab48..0000000
--- a/arch/sh/lib64/clear_page.S
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
-   Copyright 2003 Richard Curnow, SuperH (UK) Ltd.
-
-   This file is subject to the terms and conditions of the GNU General Public
-   License.  See the file "COPYING" in the main directory of this archive
-   for more details.
-
-   Tight version of memset for the case of just clearing a page.  It turns out
-   that having the alloco's spaced out slightly due to the increment/branch
-   pair causes them to contend less for access to the cache.  Similarly,
-   keeping the stores apart from the allocos causes less contention.  => Do two
-   separate loops.  Do multiple stores per loop to amortise the
-   increment/branch cost a little.
-
-   Parameters:
-   r2 : source effective address (start of page)
-
-   Always clears 4096 bytes.
-
-   Note : alloco guarded by synco to avoid TAKum03020 erratum
-
-*/
-
-	.section .text..SHmedia32,"ax"
-	.little
-
-	.balign 8
-	.global clear_page
-clear_page:
-	pta/l 1f, tr1
-	pta/l 2f, tr2
-	ptabs/l r18, tr0
-
-	movi 4096, r7
-	add  r2, r7, r7
-	add  r2, r63, r6
-1:
-	alloco r6, 0
-	synco	! TAKum03020
-	addi	r6, 32, r6
-	bgt/l	r7, r6, tr1
-
-	add  r2, r63, r6
-2:
-	st.q  r6,   0, r63
-	st.q  r6,   8, r63
-	st.q  r6,  16, r63
-	st.q  r6,  24, r63
-	addi r6, 32, r6
-	bgt/l r7, r6, tr2
-
-	blink tr0, r63
-
-
diff --git a/arch/sh/mm/Makefile_32 b/arch/sh/mm/Makefile_32
index 986a1e0..17b0252 100644
--- a/arch/sh/mm/Makefile_32
+++ b/arch/sh/mm/Makefile_32
@@ -8,14 +8,14 @@
 cache-$(CONFIG_CPU_SH2)		:= cache-sh2.o
 cache-$(CONFIG_CPU_SH2A)	:= cache-sh2a.o
 cache-$(CONFIG_CPU_SH3)		:= cache-sh3.o
-cache-$(CONFIG_CPU_SH4)		:= cache-sh4.o
+cache-$(CONFIG_CPU_SH4)		:= cache-sh4.o flush-sh4.o
 cache-$(CONFIG_SH7705_CACHE_32KB)	+= cache-sh7705.o
 endif
 
 obj-y			+= $(cache-y)
 
 mmu-y			:= tlb-nommu.o pg-nommu.o
-mmu-$(CONFIG_MMU)	:= fault_32.o tlbflush_32.o ioremap_32.o
+mmu-$(CONFIG_MMU)	:= fault_32.o tlbflush_32.o ioremap_32.o pg-mmu.o
 
 obj-y			+= $(mmu-y)
 obj-$(CONFIG_DEBUG_FS)	+= asids-debugfs.o
@@ -29,10 +29,6 @@
 tlb-$(CONFIG_CPU_SH4)		:= tlb-sh4.o
 tlb-$(CONFIG_CPU_HAS_PTEAEX)	:= tlb-pteaex.o
 obj-y				+= $(tlb-y)
-ifndef CONFIG_CACHE_OFF
-obj-$(CONFIG_CPU_SH4)		+= pg-sh4.o
-obj-$(CONFIG_SH7705_CACHE_32KB)	+= pg-sh7705.o
-endif
 endif
 
 obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
diff --git a/arch/sh/mm/Makefile_64 b/arch/sh/mm/Makefile_64
index 2863ffb..66c3910 100644
--- a/arch/sh/mm/Makefile_64
+++ b/arch/sh/mm/Makefile_64
@@ -9,7 +9,7 @@
 			   extable_64.o
 
 ifndef CONFIG_CACHE_OFF
-obj-y			+= cache-sh5.o
+obj-y			+= cache-sh5.o flush-sh4.o
 endif
 
 obj-y			+= $(mmu-y)
diff --git a/arch/sh/mm/cache-sh4.c b/arch/sh/mm/cache-sh4.c
index 5cfe08d..92f87a4 100644
--- a/arch/sh/mm/cache-sh4.c
+++ b/arch/sh/mm/cache-sh4.c
@@ -14,6 +14,7 @@
 #include <linux/mm.h>
 #include <linux/io.h>
 #include <linux/mutex.h>
+#include <linux/fs.h>
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
 
@@ -118,66 +119,6 @@
 }
 
 /*
- * Write back the dirty D-caches, but not invalidate them.
- *
- * START: Virtual Address (U0, P1, or P3)
- * SIZE: Size of the region.
- */
-void __flush_wback_region(void *start, int size)
-{
-	unsigned long v;
-	unsigned long begin, end;
-
-	begin = (unsigned long)start & ~(L1_CACHE_BYTES-1);
-	end = ((unsigned long)start + size + L1_CACHE_BYTES-1)
-		& ~(L1_CACHE_BYTES-1);
-	for (v = begin; v < end; v+=L1_CACHE_BYTES) {
-		asm volatile("ocbwb	%0"
-			     : /* no output */
-			     : "m" (__m(v)));
-	}
-}
-
-/*
- * Write back the dirty D-caches and invalidate them.
- *
- * START: Virtual Address (U0, P1, or P3)
- * SIZE: Size of the region.
- */
-void __flush_purge_region(void *start, int size)
-{
-	unsigned long v;
-	unsigned long begin, end;
-
-	begin = (unsigned long)start & ~(L1_CACHE_BYTES-1);
-	end = ((unsigned long)start + size + L1_CACHE_BYTES-1)
-		& ~(L1_CACHE_BYTES-1);
-	for (v = begin; v < end; v+=L1_CACHE_BYTES) {
-		asm volatile("ocbp	%0"
-			     : /* no output */
-			     : "m" (__m(v)));
-	}
-}
-
-/*
- * No write back please
- */
-void __flush_invalidate_region(void *start, int size)
-{
-	unsigned long v;
-	unsigned long begin, end;
-
-	begin = (unsigned long)start & ~(L1_CACHE_BYTES-1);
-	end = ((unsigned long)start + size + L1_CACHE_BYTES-1)
-		& ~(L1_CACHE_BYTES-1);
-	for (v = begin; v < end; v+=L1_CACHE_BYTES) {
-		asm volatile("ocbi	%0"
-			     : /* no output */
-			     : "m" (__m(v)));
-	}
-}
-
-/*
  * Write back the range of D-cache, and purge the I-cache.
  *
  * Called from kernel/module.c:sys_init_module and routine for a.out format,
@@ -246,7 +187,14 @@
  */
 void flush_dcache_page(struct page *page)
 {
-	if (test_bit(PG_mapped, &page->flags)) {
+	struct address_space *mapping = page_mapping(page);
+
+#ifndef CONFIG_SMP
+	if (mapping && !mapping_mapped(mapping))
+		set_bit(PG_dcache_dirty, &page->flags);
+	else
+#endif
+	{
 		unsigned long phys = PHYSADDR(page_address(page));
 		unsigned long addr = CACHE_OC_ADDRESS_ARRAY;
 		int i, n;
@@ -382,6 +330,9 @@
  */
 void flush_cache_mm(struct mm_struct *mm)
 {
+	if (cpu_context(smp_processor_id(), mm) == NO_CONTEXT)
+		return;
+
 	/*
 	 * If cache is only 4k-per-way, there are never any 'aliases'.  Since
 	 * the cache is physically tagged, the data can just be left in there.
@@ -423,6 +374,9 @@
 	unsigned long phys = pfn << PAGE_SHIFT;
 	unsigned int alias_mask;
 
+	if (cpu_context(smp_processor_id(), vma->vm_mm) == NO_CONTEXT)
+		return;
+
 	alias_mask = boot_cpu_data.dcache.alias_mask;
 
 	/* We only need to flush D-cache when we have alias */
@@ -465,6 +419,9 @@
 void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
 		       unsigned long end)
 {
+	if (cpu_context(smp_processor_id(), vma->vm_mm) == NO_CONTEXT)
+		return;
+
 	/*
 	 * If cache is only 4k-per-way, there are never any 'aliases'.  Since
 	 * the cache is physically tagged, the data can just be left in there.
diff --git a/arch/sh/mm/cache-sh5.c b/arch/sh/mm/cache-sh5.c
index 8676209..698113f 100644
--- a/arch/sh/mm/cache-sh5.c
+++ b/arch/sh/mm/cache-sh5.c
@@ -539,54 +539,6 @@
 		sh64_dcache_purge_user_pages(mm, start, end);
 	}
 }
-
-/*
- * Purge the range of addresses from the D-cache.
- *
- * The addresses lie in the superpage mapping. There's no harm if we
- * overpurge at either end - just a small performance loss.
- */
-void __flush_purge_region(void *start, int size)
-{
-	unsigned long long ullend, addr, aligned_start;
-
-	aligned_start = (unsigned long long)(signed long long)(signed long) start;
-	addr = L1_CACHE_ALIGN(aligned_start);
-	ullend = (unsigned long long) (signed long long) (signed long) start + size;
-
-	while (addr <= ullend) {
-		__asm__ __volatile__ ("ocbp %0, 0" : : "r" (addr));
-		addr += L1_CACHE_BYTES;
-	}
-}
-
-void __flush_wback_region(void *start, int size)
-{
-	unsigned long long ullend, addr, aligned_start;
-
-	aligned_start = (unsigned long long)(signed long long)(signed long) start;
-	addr = L1_CACHE_ALIGN(aligned_start);
-	ullend = (unsigned long long) (signed long long) (signed long) start + size;
-
-	while (addr < ullend) {
-		__asm__ __volatile__ ("ocbwb %0, 0" : : "r" (addr));
-		addr += L1_CACHE_BYTES;
-	}
-}
-
-void __flush_invalidate_region(void *start, int size)
-{
-	unsigned long long ullend, addr, aligned_start;
-
-	aligned_start = (unsigned long long)(signed long long)(signed long) start;
-	addr = L1_CACHE_ALIGN(aligned_start);
-	ullend = (unsigned long long) (signed long long) (signed long) start + size;
-
-	while (addr < ullend) {
-		__asm__ __volatile__ ("ocbi %0, 0" : : "r" (addr));
-		addr += L1_CACHE_BYTES;
-	}
-}
 #endif /* !CONFIG_DCACHE_DISABLED */
 
 /*
@@ -831,4 +783,21 @@
 	else
 		sh64_clear_user_page_coloured(to, address);
 }
+
+void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
+		       unsigned long vaddr, void *dst, const void *src,
+		       unsigned long len)
+{
+	flush_cache_page(vma, vaddr, page_to_pfn(page));
+	memcpy(dst, src, len);
+	flush_icache_user_range(vma, page, vaddr, len);
+}
+
+void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
+			 unsigned long vaddr, void *dst, const void *src,
+			 unsigned long len)
+{
+	flush_cache_page(vma, vaddr, page_to_pfn(page));
+	memcpy(dst, src, len);
+}
 #endif
diff --git a/arch/sh/mm/cache-sh7705.c b/arch/sh/mm/cache-sh7705.c
index 22dacc7..fa37bff 100644
--- a/arch/sh/mm/cache-sh7705.c
+++ b/arch/sh/mm/cache-sh7705.c
@@ -12,6 +12,7 @@
 #include <linux/init.h>
 #include <linux/mman.h>
 #include <linux/mm.h>
+#include <linux/fs.h>
 #include <linux/threads.h>
 #include <asm/addrspace.h>
 #include <asm/page.h>
@@ -128,7 +129,11 @@
  */
 void flush_dcache_page(struct page *page)
 {
-	if (test_bit(PG_mapped, &page->flags))
+	struct address_space *mapping = page_mapping(page);
+
+	if (mapping && !mapping_mapped(mapping))
+		set_bit(PG_dcache_dirty, &page->flags);
+	else
 		__flush_dcache_page(PHYSADDR(page_address(page)));
 }
 
diff --git a/arch/sh/mm/fault_32.c b/arch/sh/mm/fault_32.c
index dbbdeba..f1c93c8 100644
--- a/arch/sh/mm/fault_32.c
+++ b/arch/sh/mm/fault_32.c
@@ -318,16 +318,15 @@
 /*
  * Called with interrupts disabled.
  */
-asmlinkage int __kprobes __do_page_fault(struct pt_regs *regs,
-					 unsigned long writeaccess,
-					 unsigned long address)
+asmlinkage int __kprobes
+handle_tlbmiss(struct pt_regs *regs, unsigned long writeaccess,
+	       unsigned long address)
 {
 	pgd_t *pgd;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
 	pte_t entry;
-	int ret = 1;
 
 	/*
 	 * We don't take page faults for P1, P2, and parts of P4, these
@@ -338,40 +337,41 @@
 		pgd = pgd_offset_k(address);
 	} else {
 		if (unlikely(address >= TASK_SIZE || !current->mm))
-			goto out;
+			return 1;
 
 		pgd = pgd_offset(current->mm, address);
 	}
 
 	pud = pud_offset(pgd, address);
 	if (pud_none_or_clear_bad(pud))
-		goto out;
+		return 1;
 	pmd = pmd_offset(pud, address);
 	if (pmd_none_or_clear_bad(pmd))
-		goto out;
+		return 1;
 	pte = pte_offset_kernel(pmd, address);
 	entry = *pte;
 	if (unlikely(pte_none(entry) || pte_not_present(entry)))
-		goto out;
+		return 1;
 	if (unlikely(writeaccess && !pte_write(entry)))
-		goto out;
+		return 1;
 
 	if (writeaccess)
 		entry = pte_mkdirty(entry);
 	entry = pte_mkyoung(entry);
 
+	set_pte(pte, entry);
+
 #if defined(CONFIG_CPU_SH4) && !defined(CONFIG_SMP)
 	/*
-	 * ITLB is not affected by "ldtlb" instruction.
-	 * So, we need to flush the entry by ourselves.
+	 * SH-4 does not set MMUCR.RC to the corresponding TLB entry in
+	 * the case of an initial page write exception, so we need to
+	 * flush it in order to avoid potential TLB entry duplication.
 	 */
-	local_flush_tlb_one(get_asid(), address & PAGE_MASK);
+	if (writeaccess == 2)
+		local_flush_tlb_one(get_asid(), address & PAGE_MASK);
 #endif
 
-	set_pte(pte, entry);
 	update_mmu_cache(NULL, address, entry);
 
-	ret = 0;
-out:
-	return ret;
+	return 0;
 }
diff --git a/arch/sh/mm/fault_64.c b/arch/sh/mm/fault_64.c
index bd63b96..2b356ce 100644
--- a/arch/sh/mm/fault_64.c
+++ b/arch/sh/mm/fault_64.c
@@ -56,16 +56,7 @@
 	/*
 	 * Set PTEH register
 	 */
-	pteh = address & MMU_VPN_MASK;
-
-	/* Sign extend based on neff. */
-#if (NEFF == 32)
-	/* Faster sign extension */
-	pteh = (unsigned long long)(signed long long)(signed long)pteh;
-#else
-	/* General case */
-	pteh = (pteh & NEFF_SIGN) ? (pteh | NEFF_MASK) : pteh;
-#endif
+	pteh = neff_sign_extend(address & MMU_VPN_MASK);
 
 	/* Set the ASID. */
 	pteh |= get_asid() << PTEH_ASID_SHIFT;
diff --git a/arch/sh/mm/flush-sh4.c b/arch/sh/mm/flush-sh4.c
new file mode 100644
index 0000000..1b6b6a1
--- /dev/null
+++ b/arch/sh/mm/flush-sh4.c
@@ -0,0 +1,128 @@
+#include <linux/mm.h>
+#include <asm/mmu_context.h>
+#include <asm/cacheflush.h>
+
+/*
+ * Write back the dirty D-caches, but not invalidate them.
+ *
+ * START: Virtual Address (U0, P1, or P3)
+ * SIZE: Size of the region.
+ */
+void __weak __flush_wback_region(void *start, int size)
+{
+	reg_size_t aligned_start, v, cnt, end;
+
+	aligned_start = register_align(start);
+	v = aligned_start & ~(L1_CACHE_BYTES-1);
+	end = (aligned_start + size + L1_CACHE_BYTES-1)
+		& ~(L1_CACHE_BYTES-1);
+	cnt = (end - v) / L1_CACHE_BYTES;
+
+	while (cnt >= 8) {
+		asm volatile("ocbwb	@%0" : : "r" (v));
+		v += L1_CACHE_BYTES;
+		asm volatile("ocbwb	@%0" : : "r" (v));
+		v += L1_CACHE_BYTES;
+		asm volatile("ocbwb	@%0" : : "r" (v));
+		v += L1_CACHE_BYTES;
+		asm volatile("ocbwb	@%0" : : "r" (v));
+		v += L1_CACHE_BYTES;
+		asm volatile("ocbwb	@%0" : : "r" (v));
+		v += L1_CACHE_BYTES;
+		asm volatile("ocbwb	@%0" : : "r" (v));
+		v += L1_CACHE_BYTES;
+		asm volatile("ocbwb	@%0" : : "r" (v));
+		v += L1_CACHE_BYTES;
+		asm volatile("ocbwb	@%0" : : "r" (v));
+		v += L1_CACHE_BYTES;
+		cnt -= 8;
+	}
+
+	while (cnt) {
+		asm volatile("ocbwb	@%0" : : "r" (v));
+		v += L1_CACHE_BYTES;
+		cnt--;
+	}
+}
+
+/*
+ * Write back the dirty D-caches and invalidate them.
+ *
+ * START: Virtual Address (U0, P1, or P3)
+ * SIZE: Size of the region.
+ */
+void __weak __flush_purge_region(void *start, int size)
+{
+	reg_size_t aligned_start, v, cnt, end;
+
+	aligned_start = register_align(start);
+	v = aligned_start & ~(L1_CACHE_BYTES-1);
+	end = (aligned_start + size + L1_CACHE_BYTES-1)
+		& ~(L1_CACHE_BYTES-1);
+	cnt = (end - v) / L1_CACHE_BYTES;
+
+	while (cnt >= 8) {
+		asm volatile("ocbp	@%0" : : "r" (v));
+		v += L1_CACHE_BYTES;
+		asm volatile("ocbp	@%0" : : "r" (v));
+		v += L1_CACHE_BYTES;
+		asm volatile("ocbp	@%0" : : "r" (v));
+		v += L1_CACHE_BYTES;
+		asm volatile("ocbp	@%0" : : "r" (v));
+		v += L1_CACHE_BYTES;
+		asm volatile("ocbp	@%0" : : "r" (v));
+		v += L1_CACHE_BYTES;
+		asm volatile("ocbp	@%0" : : "r" (v));
+		v += L1_CACHE_BYTES;
+		asm volatile("ocbp	@%0" : : "r" (v));
+		v += L1_CACHE_BYTES;
+		asm volatile("ocbp	@%0" : : "r" (v));
+		v += L1_CACHE_BYTES;
+		cnt -= 8;
+	}
+	while (cnt) {
+		asm volatile("ocbp	@%0" : : "r" (v));
+		v += L1_CACHE_BYTES;
+		cnt--;
+	}
+}
+
+/*
+ * No write back please
+ */
+void __weak __flush_invalidate_region(void *start, int size)
+{
+	reg_size_t aligned_start, v, cnt, end;
+
+	aligned_start = register_align(start);
+	v = aligned_start & ~(L1_CACHE_BYTES-1);
+	end = (aligned_start + size + L1_CACHE_BYTES-1)
+		& ~(L1_CACHE_BYTES-1);
+	cnt = (end - v) / L1_CACHE_BYTES;
+
+	while (cnt >= 8) {
+		asm volatile("ocbi	@%0" : : "r" (v));
+		v += L1_CACHE_BYTES;
+		asm volatile("ocbi	@%0" : : "r" (v));
+		v += L1_CACHE_BYTES;
+		asm volatile("ocbi	@%0" : : "r" (v));
+		v += L1_CACHE_BYTES;
+		asm volatile("ocbi	@%0" : : "r" (v));
+		v += L1_CACHE_BYTES;
+		asm volatile("ocbi	@%0" : : "r" (v));
+		v += L1_CACHE_BYTES;
+		asm volatile("ocbi	@%0" : : "r" (v));
+		v += L1_CACHE_BYTES;
+		asm volatile("ocbi	@%0" : : "r" (v));
+		v += L1_CACHE_BYTES;
+		asm volatile("ocbi	@%0" : : "r" (v));
+		v += L1_CACHE_BYTES;
+		cnt -= 8;
+	}
+
+	while (cnt) {
+		asm volatile("ocbi	@%0" : : "r" (v));
+		v += L1_CACHE_BYTES;
+		cnt--;
+	}
+}
diff --git a/arch/sh/mm/pg-mmu.c b/arch/sh/mm/pg-mmu.c
new file mode 100644
index 0000000..027c4d8
--- /dev/null
+++ b/arch/sh/mm/pg-mmu.c
@@ -0,0 +1,176 @@
+/*
+ * arch/sh/mm/pg-mmu.c
+ *
+ * Copyright (C) 1999, 2000, 2002  Niibe Yutaka
+ * Copyright (C) 2002 - 2009  Paul Mundt
+ *
+ * Released under the terms of the GNU GPL v2.0.
+ */
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/fs.h>
+#include <linux/highmem.h>
+#include <linux/module.h>
+#include <asm/mmu_context.h>
+#include <asm/cacheflush.h>
+
+#define kmap_get_fixmap_pte(vaddr)                                     \
+	pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)), (vaddr))
+
+static pte_t *kmap_coherent_pte;
+
+void __init kmap_coherent_init(void)
+{
+#if defined(CONFIG_CPU_SH4) || defined(CONFIG_SH7705_CACHE_32KB)
+	unsigned long vaddr;
+
+	/* cache the first coherent kmap pte */
+	vaddr = __fix_to_virt(FIX_CMAP_BEGIN);
+	kmap_coherent_pte = kmap_get_fixmap_pte(vaddr);
+#endif
+}
+
+static void *kmap_coherent(struct page *page, unsigned long addr)
+{
+	enum fixed_addresses idx;
+	unsigned long vaddr, flags;
+	pte_t pte;
+
+	BUG_ON(test_bit(PG_dcache_dirty, &page->flags));
+
+	inc_preempt_count();
+
+	idx = (addr & current_cpu_data.dcache.alias_mask) >> PAGE_SHIFT;
+	vaddr = __fix_to_virt(FIX_CMAP_END - idx);
+	pte = mk_pte(page, PAGE_KERNEL);
+
+	local_irq_save(flags);
+	flush_tlb_one(get_asid(), vaddr);
+	local_irq_restore(flags);
+
+	update_mmu_cache(NULL, vaddr, pte);
+
+	set_pte(kmap_coherent_pte - (FIX_CMAP_END - idx), pte);
+
+	return (void *)vaddr;
+}
+
+static inline void kunmap_coherent(void)
+{
+	dec_preempt_count();
+	preempt_check_resched();
+}
+
+void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
+		       unsigned long vaddr, void *dst, const void *src,
+		       unsigned long len)
+{
+	if (boot_cpu_data.dcache.n_aliases && page_mapped(page) &&
+	    !test_bit(PG_dcache_dirty, &page->flags)) {
+		void *vto = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK);
+		memcpy(vto, src, len);
+		kunmap_coherent();
+	} else {
+		memcpy(dst, src, len);
+		if (boot_cpu_data.dcache.n_aliases)
+			set_bit(PG_dcache_dirty, &page->flags);
+	}
+
+	if (vma->vm_flags & VM_EXEC)
+		flush_cache_page(vma, vaddr, page_to_pfn(page));
+}
+
+void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
+			 unsigned long vaddr, void *dst, const void *src,
+			 unsigned long len)
+{
+	if (boot_cpu_data.dcache.n_aliases && page_mapped(page) &&
+	    !test_bit(PG_dcache_dirty, &page->flags)) {
+		void *vfrom = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK);
+		memcpy(dst, vfrom, len);
+		kunmap_coherent();
+	} else {
+		memcpy(dst, src, len);
+		if (boot_cpu_data.dcache.n_aliases)
+			set_bit(PG_dcache_dirty, &page->flags);
+	}
+}
+
+void copy_user_highpage(struct page *to, struct page *from,
+			unsigned long vaddr, struct vm_area_struct *vma)
+{
+	void *vfrom, *vto;
+
+	vto = kmap_atomic(to, KM_USER1);
+
+	if (boot_cpu_data.dcache.n_aliases && page_mapped(from) &&
+	    !test_bit(PG_dcache_dirty, &from->flags)) {
+		vfrom = kmap_coherent(from, vaddr);
+		copy_page(vto, vfrom);
+		kunmap_coherent();
+	} else {
+		vfrom = kmap_atomic(from, KM_USER0);
+		copy_page(vto, vfrom);
+		kunmap_atomic(vfrom, KM_USER0);
+	}
+
+	if (pages_do_alias((unsigned long)vto, vaddr & PAGE_MASK))
+		__flush_wback_region(vto, PAGE_SIZE);
+
+	kunmap_atomic(vto, KM_USER1);
+	/* Make sure this page is cleared on other CPU's too before using it */
+	smp_wmb();
+}
+EXPORT_SYMBOL(copy_user_highpage);
+
+void clear_user_highpage(struct page *page, unsigned long vaddr)
+{
+	void *kaddr = kmap_atomic(page, KM_USER0);
+
+	clear_page(kaddr);
+
+	if (pages_do_alias((unsigned long)kaddr, vaddr & PAGE_MASK))
+		__flush_wback_region(kaddr, PAGE_SIZE);
+
+	kunmap_atomic(kaddr, KM_USER0);
+}
+EXPORT_SYMBOL(clear_user_highpage);
+
+void __update_cache(struct vm_area_struct *vma,
+		    unsigned long address, pte_t pte)
+{
+	struct page *page;
+	unsigned long pfn = pte_pfn(pte);
+
+	if (!boot_cpu_data.dcache.n_aliases)
+		return;
+
+	page = pfn_to_page(pfn);
+	if (pfn_valid(pfn) && page_mapping(page)) {
+		int dirty = test_and_clear_bit(PG_dcache_dirty, &page->flags);
+		if (dirty) {
+			unsigned long addr = (unsigned long)page_address(page);
+
+			if (pages_do_alias(addr, address & PAGE_MASK))
+				__flush_wback_region((void *)addr, PAGE_SIZE);
+		}
+	}
+}
+
+void __flush_anon_page(struct page *page, unsigned long vmaddr)
+{
+	unsigned long addr = (unsigned long) page_address(page);
+
+	if (pages_do_alias(addr, vmaddr)) {
+		if (boot_cpu_data.dcache.n_aliases && page_mapped(page) &&
+		    !test_bit(PG_dcache_dirty, &page->flags)) {
+			void *kaddr;
+
+			kaddr = kmap_coherent(page, vmaddr);
+			__flush_wback_region((void *)kaddr, PAGE_SIZE);
+			kunmap_coherent();
+		} else
+			__flush_wback_region((void *)addr, PAGE_SIZE);
+	}
+}
diff --git a/arch/sh/mm/pg-nommu.c b/arch/sh/mm/pg-nommu.c
index 91ed4e6..7e33b48 100644
--- a/arch/sh/mm/pg-nommu.c
+++ b/arch/sh/mm/pg-nommu.c
@@ -1,7 +1,7 @@
 /*
  * arch/sh/mm/pg-nommu.c
  *
- * clear_page()/copy_page() implementation for MMUless SH.
+ * copy_page()/__copy_user()/__clear_user() implementations for MMUless SH.
  *
  * Copyright (C) 2003  Paul Mundt
  *
@@ -20,11 +20,6 @@
 	memcpy(to, from, PAGE_SIZE);
 }
 
-void clear_page(void *to)
-{
-	memset(to, 0, PAGE_SIZE);
-}
-
 __kernel_size_t __copy_user(void *to, const void *from, __kernel_size_t n)
 {
 	memcpy(to, from, n);
diff --git a/arch/sh/mm/pg-sh4.c b/arch/sh/mm/pg-sh4.c
deleted file mode 100644
index 2fe14da..0000000
--- a/arch/sh/mm/pg-sh4.c
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- * arch/sh/mm/pg-sh4.c
- *
- * Copyright (C) 1999, 2000, 2002  Niibe Yutaka
- * Copyright (C) 2002 - 2007  Paul Mundt
- *
- * Released under the terms of the GNU GPL v2.0.
- */
-#include <linux/mm.h>
-#include <linux/init.h>
-#include <linux/mutex.h>
-#include <linux/fs.h>
-#include <linux/highmem.h>
-#include <linux/module.h>
-#include <asm/mmu_context.h>
-#include <asm/cacheflush.h>
-
-#define CACHE_ALIAS (current_cpu_data.dcache.alias_mask)
-
-#define kmap_get_fixmap_pte(vaddr)                                     \
-	pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)), (vaddr))
-
-static pte_t *kmap_coherent_pte;
-
-void __init kmap_coherent_init(void)
-{
-	unsigned long vaddr;
-
-	/* cache the first coherent kmap pte */
-	vaddr = __fix_to_virt(FIX_CMAP_BEGIN);
-	kmap_coherent_pte = kmap_get_fixmap_pte(vaddr);
-}
-
-static inline void *kmap_coherent(struct page *page, unsigned long addr)
-{
-	enum fixed_addresses idx;
-	unsigned long vaddr, flags;
-	pte_t pte;
-
-	inc_preempt_count();
-
-	idx = (addr & current_cpu_data.dcache.alias_mask) >> PAGE_SHIFT;
-	vaddr = __fix_to_virt(FIX_CMAP_END - idx);
-	pte = mk_pte(page, PAGE_KERNEL);
-
-	local_irq_save(flags);
-	flush_tlb_one(get_asid(), vaddr);
-	local_irq_restore(flags);
-
-	update_mmu_cache(NULL, vaddr, pte);
-
-	set_pte(kmap_coherent_pte - (FIX_CMAP_END - idx), pte);
-
-	return (void *)vaddr;
-}
-
-static inline void kunmap_coherent(struct page *page)
-{
-	dec_preempt_count();
-	preempt_check_resched();
-}
-
-/*
- * clear_user_page
- * @to: P1 address
- * @address: U0 address to be mapped
- * @page: page (virt_to_page(to))
- */
-void clear_user_page(void *to, unsigned long address, struct page *page)
-{
-	__set_bit(PG_mapped, &page->flags);
-
-	clear_page(to);
-	if ((((address & PAGE_MASK) ^ (unsigned long)to) & CACHE_ALIAS))
-		__flush_wback_region(to, PAGE_SIZE);
-}
-
-void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
-		       unsigned long vaddr, void *dst, const void *src,
-		       unsigned long len)
-{
-	void *vto;
-
-	__set_bit(PG_mapped, &page->flags);
-
-	vto = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK);
-	memcpy(vto, src, len);
-	kunmap_coherent(vto);
-
-	if (vma->vm_flags & VM_EXEC)
-		flush_cache_page(vma, vaddr, page_to_pfn(page));
-}
-
-void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
-			 unsigned long vaddr, void *dst, const void *src,
-			 unsigned long len)
-{
-	void *vfrom;
-
-	__set_bit(PG_mapped, &page->flags);
-
-	vfrom = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK);
-	memcpy(dst, vfrom, len);
-	kunmap_coherent(vfrom);
-}
-
-void copy_user_highpage(struct page *to, struct page *from,
-			unsigned long vaddr, struct vm_area_struct *vma)
-{
-	void *vfrom, *vto;
-
-	__set_bit(PG_mapped, &to->flags);
-
-	vto = kmap_atomic(to, KM_USER1);
-	vfrom = kmap_coherent(from, vaddr);
-	copy_page(vto, vfrom);
-	kunmap_coherent(vfrom);
-
-	if (((vaddr ^ (unsigned long)vto) & CACHE_ALIAS))
-		__flush_wback_region(vto, PAGE_SIZE);
-
-	kunmap_atomic(vto, KM_USER1);
-	/* Make sure this page is cleared on other CPU's too before using it */
-	smp_wmb();
-}
-EXPORT_SYMBOL(copy_user_highpage);
-
-/*
- * For SH-4, we have our own implementation for ptep_get_and_clear
- */
-pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
-{
-	pte_t pte = *ptep;
-
-	pte_clear(mm, addr, ptep);
-	if (!pte_not_present(pte)) {
-		unsigned long pfn = pte_pfn(pte);
-		if (pfn_valid(pfn)) {
-			struct page *page = pfn_to_page(pfn);
-			struct address_space *mapping = page_mapping(page);
-			if (!mapping || !mapping_writably_mapped(mapping))
-				__clear_bit(PG_mapped, &page->flags);
-		}
-	}
-	return pte;
-}
diff --git a/arch/sh/mm/pg-sh7705.c b/arch/sh/mm/pg-sh7705.c
deleted file mode 100644
index eaf2514..0000000
--- a/arch/sh/mm/pg-sh7705.c
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * arch/sh/mm/pg-sh7705.c
- *
- * Copyright (C) 1999, 2000  Niibe Yutaka
- * Copyright (C) 2004  Alex Song
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- */
-
-#include <linux/init.h>
-#include <linux/mman.h>
-#include <linux/mm.h>
-#include <linux/threads.h>
-#include <linux/fs.h>
-#include <asm/addrspace.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/processor.h>
-#include <asm/cache.h>
-#include <asm/io.h>
-#include <asm/uaccess.h>
-#include <asm/pgalloc.h>
-#include <asm/mmu_context.h>
-#include <asm/cacheflush.h>
-
-static inline void __flush_purge_virtual_region(void *p1, void *virt, int size)
-{
-	unsigned long v;
-	unsigned long begin, end;
-	unsigned long p1_begin;
-
-
-	begin = L1_CACHE_ALIGN((unsigned long)virt);
-	end = L1_CACHE_ALIGN((unsigned long)virt + size);
-
-	p1_begin = (unsigned long)p1 & ~(L1_CACHE_BYTES - 1);
-
-	/* do this the slow way as we may not have TLB entries
-	 * for virt yet. */
-	for (v = begin; v < end; v += L1_CACHE_BYTES) {
-		unsigned long p;
-	        unsigned long ways, addr;
-
-		p = __pa(p1_begin);
-
-	        ways = current_cpu_data.dcache.ways;
-		addr = CACHE_OC_ADDRESS_ARRAY;
-
-		do {
-			unsigned long data;
-
-			addr |= (v & current_cpu_data.dcache.entry_mask);
-
-			data = ctrl_inl(addr);
-			if ((data & CACHE_PHYSADDR_MASK) ==
-			       (p & CACHE_PHYSADDR_MASK)) {
-				data &= ~(SH_CACHE_UPDATED|SH_CACHE_VALID);
-				ctrl_outl(data, addr);
-			}
-
-			addr += current_cpu_data.dcache.way_incr;
-		} while (--ways);
-
-		p1_begin += L1_CACHE_BYTES;
-	}
-}
-
-/*
- * clear_user_page
- * @to: P1 address
- * @address: U0 address to be mapped
- */
-void clear_user_page(void *to, unsigned long address, struct page *pg)
-{
-	struct page *page = virt_to_page(to);
-
-	__set_bit(PG_mapped, &page->flags);
-	if (((address ^ (unsigned long)to) & CACHE_ALIAS) == 0) {
-		clear_page(to);
-		__flush_wback_region(to, PAGE_SIZE);
-	} else {
-		__flush_purge_virtual_region(to,
-					     (void *)(address & 0xfffff000),
-					     PAGE_SIZE);
-		clear_page(to);
-		__flush_wback_region(to, PAGE_SIZE);
-	}
-}
-
-/*
- * copy_user_page
- * @to: P1 address
- * @from: P1 address
- * @address: U0 address to be mapped
- */
-void copy_user_page(void *to, void *from, unsigned long address, struct page *pg)
-{
-	struct page *page = virt_to_page(to);
-
-
-	__set_bit(PG_mapped, &page->flags);
-	if (((address ^ (unsigned long)to) & CACHE_ALIAS) == 0) {
-		copy_page(to, from);
-		__flush_wback_region(to, PAGE_SIZE);
-	} else {
-		__flush_purge_virtual_region(to,
-					     (void *)(address & 0xfffff000),
-					     PAGE_SIZE);
-		copy_page(to, from);
-		__flush_wback_region(to, PAGE_SIZE);
-	}
-}
-
-/*
- * For SH7705, we have our own implementation for ptep_get_and_clear
- * Copied from pg-sh4.c
- */
-pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
-{
-	pte_t pte = *ptep;
-
-	pte_clear(mm, addr, ptep);
-	if (!pte_not_present(pte)) {
-		unsigned long pfn = pte_pfn(pte);
-		if (pfn_valid(pfn)) {
-			struct page *page = pfn_to_page(pfn);
-			struct address_space *mapping = page_mapping(page);
-			if (!mapping || !mapping_writably_mapped(mapping))
-				__clear_bit(PG_mapped, &page->flags);
-		}
-	}
-
-	return pte;
-}
-
diff --git a/arch/sh/mm/tlb-nommu.c b/arch/sh/mm/tlb-nommu.c
index 71c742b..0ef5429 100644
--- a/arch/sh/mm/tlb-nommu.c
+++ b/arch/sh/mm/tlb-nommu.c
@@ -46,10 +46,13 @@
 	BUG();
 }
 
-void update_mmu_cache(struct vm_area_struct * vma,
-		      unsigned long address, pte_t pte)
+void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte)
 {
-	BUG();
+}
+
+void __update_cache(struct vm_area_struct *vma,
+		    unsigned long address, pte_t pte)
+{
 }
 
 void __init page_table_range_init(unsigned long start, unsigned long end,
diff --git a/arch/sh/mm/tlb-pteaex.c b/arch/sh/mm/tlb-pteaex.c
index 2aab3ea..409b7c2 100644
--- a/arch/sh/mm/tlb-pteaex.c
+++ b/arch/sh/mm/tlb-pteaex.c
@@ -16,34 +16,16 @@
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
 
-void update_mmu_cache(struct vm_area_struct * vma,
-		      unsigned long address, pte_t pte)
+void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte)
 {
-	unsigned long flags;
-	unsigned long pteval;
-	unsigned long vpn;
+	unsigned long flags, pteval, vpn;
 
-	/* Ptrace may call this routine. */
+	/*
+	 * Handle debugger faulting in for debugee.
+	 */
 	if (vma && current->active_mm != vma->vm_mm)
 		return;
 
-#ifndef CONFIG_CACHE_OFF
-	{
-		unsigned long pfn = pte_pfn(pte);
-
-		if (pfn_valid(pfn)) {
-			struct page *page = pfn_to_page(pfn);
-
-			if (!test_bit(PG_mapped, &page->flags)) {
-				unsigned long phys = pte_val(pte) & PTE_PHYS_MASK;
-				__flush_wback_region((void *)P1SEGADDR(phys),
-						     PAGE_SIZE);
-				__set_bit(PG_mapped, &page->flags);
-			}
-		}
-	}
-#endif
-
 	local_irq_save(flags);
 
 	/* Set PTEH register */
diff --git a/arch/sh/mm/tlb-sh3.c b/arch/sh/mm/tlb-sh3.c
index 17cb7c3..ace8e6d 100644
--- a/arch/sh/mm/tlb-sh3.c
+++ b/arch/sh/mm/tlb-sh3.c
@@ -27,32 +27,16 @@
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
 
-void update_mmu_cache(struct vm_area_struct * vma,
-		      unsigned long address, pte_t pte)
+void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte)
 {
-	unsigned long flags;
-	unsigned long pteval;
-	unsigned long vpn;
+	unsigned long flags, pteval, vpn;
 
-	/* Ptrace may call this routine. */
+	/*
+	 * Handle debugger faulting in for debugee.
+	 */
 	if (vma && current->active_mm != vma->vm_mm)
 		return;
 
-#if defined(CONFIG_SH7705_CACHE_32KB)
-	{
-		struct page *page = pte_page(pte);
-		unsigned long pfn = pte_pfn(pte);
-
-		if (pfn_valid(pfn) && !test_bit(PG_mapped, &page->flags)) {
-			unsigned long phys = pte_val(pte) & PTE_PHYS_MASK;
-
-			__flush_wback_region((void *)P1SEGADDR(phys),
-					     PAGE_SIZE);
-			__set_bit(PG_mapped, &page->flags);
-		}
-	}
-#endif
-
 	local_irq_save(flags);
 
 	/* Set PTEH register */
@@ -93,4 +77,3 @@
 	for (i = 0; i < ways; i++)
 		ctrl_outl(data, addr + (i << 8));
 }
-
diff --git a/arch/sh/mm/tlb-sh4.c b/arch/sh/mm/tlb-sh4.c
index f0c7b73..7d3c63e 100644
--- a/arch/sh/mm/tlb-sh4.c
+++ b/arch/sh/mm/tlb-sh4.c
@@ -15,34 +15,16 @@
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
 
-void update_mmu_cache(struct vm_area_struct * vma,
-		      unsigned long address, pte_t pte)
+void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte)
 {
-	unsigned long flags;
-	unsigned long pteval;
-	unsigned long vpn;
+	unsigned long flags, pteval, vpn;
 
-	/* Ptrace may call this routine. */
+	/*
+	 * Handle debugger faulting in for debugee.
+	 */
 	if (vma && current->active_mm != vma->vm_mm)
 		return;
 
-#ifndef CONFIG_CACHE_OFF
-	{
-		unsigned long pfn = pte_pfn(pte);
-
-		if (pfn_valid(pfn)) {
-			struct page *page = pfn_to_page(pfn);
-
-			if (!test_bit(PG_mapped, &page->flags)) {
-				unsigned long phys = pte_val(pte) & PTE_PHYS_MASK;
-				__flush_wback_region((void *)P1SEGADDR(phys),
-						     PAGE_SIZE);
-				__set_bit(PG_mapped, &page->flags);
-			}
-		}
-	}
-#endif
-
 	local_irq_save(flags);
 
 	/* Set PTEH register */
diff --git a/arch/sh/mm/tlb-sh5.c b/arch/sh/mm/tlb-sh5.c
index dae1312..fdb64e4 100644
--- a/arch/sh/mm/tlb-sh5.c
+++ b/arch/sh/mm/tlb-sh5.c
@@ -117,26 +117,15 @@
  * Load up a virtual<->physical translation for @eaddr<->@paddr in the
  * pre-allocated TLB slot @config_addr (see sh64_get_wired_dtlb_entry).
  */
-inline void sh64_setup_tlb_slot(unsigned long long config_addr,
-				unsigned long eaddr,
-				unsigned long asid,
-				unsigned long paddr)
+void sh64_setup_tlb_slot(unsigned long long config_addr, unsigned long eaddr,
+			 unsigned long asid, unsigned long paddr)
 {
 	unsigned long long pteh, ptel;
 
-	/* Sign extension */
-#if (NEFF == 32)
-	pteh = (unsigned long long)(signed long long)(signed long) eaddr;
-#else
-#error "Can't sign extend more than 32 bits yet"
-#endif
+	pteh = neff_sign_extend(eaddr);
 	pteh &= PAGE_MASK;
 	pteh |= (asid << PTEH_ASID_SHIFT) | PTEH_VALID;
-#if (NEFF == 32)
-	ptel = (unsigned long long)(signed long long)(signed long) paddr;
-#else
-#error "Can't sign extend more than 32 bits yet"
-#endif
+	ptel = neff_sign_extend(paddr);
 	ptel &= PAGE_MASK;
 	ptel |= (_PAGE_CACHABLE | _PAGE_READ | _PAGE_WRITE);
 
@@ -152,5 +141,5 @@
  *
  * Teardown any existing mapping in the TLB slot @config_addr.
  */
-inline void sh64_teardown_tlb_slot(unsigned long long config_addr)
+void sh64_teardown_tlb_slot(unsigned long long config_addr)
 	__attribute__ ((alias("__flush_tlb_slot")));
diff --git a/arch/sh/mm/tlbflush_64.c b/arch/sh/mm/tlbflush_64.c
index 3ce40ea..fa5a95a 100644
--- a/arch/sh/mm/tlbflush_64.c
+++ b/arch/sh/mm/tlbflush_64.c
@@ -329,22 +329,6 @@
 		goto no_context;
 }
 
-void update_mmu_cache(struct vm_area_struct * vma,
-			unsigned long address, pte_t pte)
-{
-	/*
-	 * This appears to get called once for every pte entry that gets
-	 * established => I don't think it's efficient to try refilling the
-	 * TLBs with the pages - some may not get accessed even.  Also, for
-	 * executable pages, it is impossible to determine reliably here which
-	 * TLB they should be mapped into (or both even).
-	 *
-	 * So, just do nothing here and handle faults on demand.  In the
-	 * TLBMISS handling case, the refill is now done anyway after the pte
-	 * has been fixed up, so that deals with most useful cases.
-	 */
-}
-
 void local_flush_tlb_one(unsigned long asid, unsigned long page)
 {
 	unsigned long long match, pteh=0, lpage;
@@ -353,7 +337,7 @@
 	/*
 	 * Sign-extend based on neff.
 	 */
-	lpage = (page & NEFF_SIGN) ? (page | NEFF_MASK) : page;
+	lpage = neff_sign_extend(page);
 	match = (asid << PTEH_ASID_SHIFT) | PTEH_VALID;
 	match |= lpage;
 
@@ -482,3 +466,12 @@
         /* FIXME: Optimize this later.. */
         flush_tlb_all();
 }
+
+void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte)
+{
+}
+
+void __update_cache(struct vm_area_struct *vma,
+		    unsigned long address, pte_t pte)
+{
+}