sh: Improve performance of SH4 versions of copy/clear_user_highpage

The previous implementation of clear_user_highpage and copy_user_highpage
checked to see if there was a D-cache aliasing issue between the user
and kernel mappings of a page, but if there was they always did a
flush with writeback on the dirtied kernel alias.

However as we now have the ability to map a page into kernel space
with the same cache colour as the user mapping, there is no need to
write back this data.

Currently we also invalidate the kernel alias as a precaution, however
I'm not sure if this is actually required.

Also correct the definition of FIX_CMAP_END so that the mappings created
by kmap_coherent() are actually at the correct colour.

Signed-off-by: Stuart Menefy <stuart.menefy@st.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
diff --git a/arch/sh/include/asm/fixmap.h b/arch/sh/include/asm/fixmap.h
index 76c5a30..5ac1e40 100644
--- a/arch/sh/include/asm/fixmap.h
+++ b/arch/sh/include/asm/fixmap.h
@@ -46,9 +46,15 @@
  * fix-mapped?
  */
 enum fixed_addresses {
+	/*
+	 * The FIX_CMAP entries are used by kmap_coherent() to get virtual
+	 * addresses which are of a known color, and so their values are
+	 * important. __fix_to_virt(FIX_CMAP_END - n) must give an address
+	 * which is the same color as a page (n<<PAGE_SHIFT).
+	 */
 #define FIX_N_COLOURS 8
 	FIX_CMAP_BEGIN,
-	FIX_CMAP_END = FIX_CMAP_BEGIN + (FIX_N_COLOURS * NR_CPUS),
+	FIX_CMAP_END = FIX_CMAP_BEGIN + (FIX_N_COLOURS * NR_CPUS) - 1,
 	FIX_UNCACHED,
 #ifdef CONFIG_HIGHMEM
 	FIX_KMAP_BEGIN,	/* reserved pte's for temporary kernel mappings */
diff --git a/arch/sh/mm/cache.c b/arch/sh/mm/cache.c
index e9415d3..997c7e4 100644
--- a/arch/sh/mm/cache.c
+++ b/arch/sh/mm/cache.c
@@ -46,6 +46,18 @@
 	preempt_enable();
 }
 
+/*
+ * copy_to_user_page
+ * @vma: vm_area_struct holding the pages
+ * @page: struct page
+ * @vaddr: user space address
+ * @dst: address of page in kernel space (possibly from kmap)
+ * @src: source address in kernel logical memory
+ * @len: length of data in bytes (may be less than PAGE_SIZE)
+ *
+ * Copy data into the address space of a process other than the current
+ * process (eg for ptrace).
+ */
 void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
 		       unsigned long vaddr, void *dst, const void *src,
 		       unsigned long len)
@@ -81,28 +93,49 @@
 	}
 }
 
+/*
+ * copy_user_highpage
+ * @to: destination page
+ * @from: source page
+ * @vaddr: address of pages in user address space
+ * @vma: vm_area_struct holding the pages
+ *
+ * This is used in COW implementation to copy data from page @from to
+ * page @to. @from was previousl mapped at @vaddr, and @to will be.
+ * As this is used only in the COW implementation, this means that the
+ * source is unmodified, and so we don't have to worry about cache
+ * aliasing on that side.
+ */
+#ifdef CONFIG_HIGHMEM
+/*
+ * If we ever have a real highmem system, this code will need fixing
+ * (as will clear_user/clear_user_highmem), because the kmap potentitally
+ * creates another alias risk.
+ */
+#error This code is broken with real HIGHMEM
+#endif
 void copy_user_highpage(struct page *to, struct page *from,
 			unsigned long vaddr, struct vm_area_struct *vma)
 {
 	void *vfrom, *vto;
 
 	vto = kmap_atomic(to, KM_USER1);
+	vfrom = kmap_atomic(from, KM_USER0);
+
+	if (pages_do_alias((unsigned long)vto, vaddr & PAGE_MASK))
+		__flush_invalidate_region(vto, PAGE_SIZE);
 
 	if (boot_cpu_data.dcache.n_aliases && page_mapped(from) &&
 	    !test_bit(PG_dcache_dirty, &from->flags)) {
-		vfrom = kmap_coherent(from, vaddr);
+		void *vto_coloured = kmap_coherent(to, vaddr);
+		copy_page(vto_coloured, vfrom);
+		kunmap_coherent(vto_coloured);
+	} else
 		copy_page(vto, vfrom);
-		kunmap_coherent(vfrom);
-	} else {
-		vfrom = kmap_atomic(from, KM_USER0);
-		copy_page(vto, vfrom);
-		kunmap_atomic(vfrom, KM_USER0);
-	}
 
-	if (pages_do_alias((unsigned long)vto, vaddr & PAGE_MASK))
-		__flush_purge_region(vto, PAGE_SIZE);
-
+	kunmap_atomic(vfrom, KM_USER0);
 	kunmap_atomic(vto, KM_USER1);
+
 	/* Make sure this page is cleared on other CPU's too before using it */
 	smp_wmb();
 }
@@ -112,10 +145,17 @@
 {
 	void *kaddr = kmap_atomic(page, KM_USER0);
 
-	clear_page(kaddr);
+	if (pages_do_alias((unsigned long)kaddr, vaddr & PAGE_MASK)) {
+		void *vto;
 
-	if (pages_do_alias((unsigned long)kaddr, vaddr & PAGE_MASK))
-		__flush_purge_region(kaddr, PAGE_SIZE);
+		/* Kernel alias may have modified data in the cache. */
+		__flush_invalidate_region(kaddr, PAGE_SIZE);
+
+		vto = kmap_coherent(page, vaddr);
+		clear_page(vto);
+		kunmap_coherent(vto);
+	} else
+		clear_page(kaddr);
 
 	kunmap_atomic(kaddr, KM_USER0);
 }