sh: Fix up and optimize the kmap_coherent() interface.

This fixes up the kmap_coherent/kunmap_coherent() interface for recent
changes both in the page fault path and the shared cache flushers, as
well as adding in some optimizations.

One of the key things to note here is that the TLB flush itself is
deferred until the unmap, and the call in to update_mmu_cache() itself
goes away, relying on the regular page fault path to handle the lazy
dcache writeback if necessary.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
diff --git a/arch/sh/include/asm/cacheflush.h b/arch/sh/include/asm/cacheflush.h
index 11e4166..c29918f 100644
--- a/arch/sh/include/asm/cacheflush.h
+++ b/arch/sh/include/asm/cacheflush.h
@@ -85,7 +85,7 @@
 
 void kmap_coherent_init(void);
 void *kmap_coherent(struct page *page, unsigned long addr);
-void kunmap_coherent(void);
+void kunmap_coherent(void *kvaddr);
 
 #define PG_dcache_dirty	PG_arch_1
 
diff --git a/arch/sh/include/asm/pgtable_32.h b/arch/sh/include/asm/pgtable_32.h
index 4c4429c..c0d359c 100644
--- a/arch/sh/include/asm/pgtable_32.h
+++ b/arch/sh/include/asm/pgtable_32.h
@@ -408,13 +408,19 @@
 
 /* to find an entry in a page-table-directory. */
 #define pgd_index(address)	(((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
-#define pgd_offset(mm, address)	((mm)->pgd+pgd_index(address))
+#define pgd_offset(mm, address)	((mm)->pgd + pgd_index(address))
+#define __pgd_offset(address)	pgd_index(address)
 
 /* to find an entry in a kernel page-table-directory */
 #define pgd_offset_k(address)	pgd_offset(&init_mm, address)
 
+#define __pud_offset(address)	(((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
+#define __pmd_offset(address)	(((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
+
 /* Find an entry in the third-level page table.. */
 #define pte_index(address)	((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+#define __pte_offset(address)	pte_index(address)
+
 #define pte_offset_kernel(dir, address) \
 	((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(address))
 #define pte_offset_map(dir, address)		pte_offset_kernel(dir, address)
diff --git a/arch/sh/include/asm/pgtable_64.h b/arch/sh/include/asm/pgtable_64.h
index c78990c..17cdbec 100644
--- a/arch/sh/include/asm/pgtable_64.h
+++ b/arch/sh/include/asm/pgtable_64.h
@@ -60,6 +60,9 @@
 /* To find an entry in a kernel PGD. */
 #define pgd_offset_k(address) pgd_offset(&init_mm, address)
 
+#define __pud_offset(address)	(((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
+#define __pmd_offset(address)	(((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
+
 /*
  * PMD level access routines. Same notes as above.
  */
@@ -80,6 +83,8 @@
 #define pte_index(address) \
 		((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
 
+#define __pte_offset(address)	pte_index(address)
+
 #define pte_offset_kernel(dir, addr) \
 		((pte_t *) ((pmd_val(*(dir))) & PAGE_MASK) + pte_index((addr)))
 
diff --git a/arch/sh/mm/cache.c b/arch/sh/mm/cache.c
index db2b1c5..8e4a8d1 100644
--- a/arch/sh/mm/cache.c
+++ b/arch/sh/mm/cache.c
@@ -51,7 +51,7 @@
 	    !test_bit(PG_dcache_dirty, &page->flags)) {
 		void *vto = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK);
 		memcpy(vto, src, len);
-		kunmap_coherent();
+		kunmap_coherent(vto);
 	} else {
 		memcpy(dst, src, len);
 		if (boot_cpu_data.dcache.n_aliases)
@@ -70,7 +70,7 @@
 	    !test_bit(PG_dcache_dirty, &page->flags)) {
 		void *vfrom = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK);
 		memcpy(dst, vfrom, len);
-		kunmap_coherent();
+		kunmap_coherent(vfrom);
 	} else {
 		memcpy(dst, src, len);
 		if (boot_cpu_data.dcache.n_aliases)
@@ -89,7 +89,7 @@
 	    !test_bit(PG_dcache_dirty, &from->flags)) {
 		vfrom = kmap_coherent(from, vaddr);
 		copy_page(vto, vfrom);
-		kunmap_coherent();
+		kunmap_coherent(vfrom);
 	} else {
 		vfrom = kmap_atomic(from, KM_USER0);
 		copy_page(vto, vfrom);
@@ -150,7 +150,7 @@
 
 			kaddr = kmap_coherent(page, vmaddr);
 			__flush_wback_region((void *)kaddr, PAGE_SIZE);
-			kunmap_coherent();
+			kunmap_coherent(kaddr);
 		} else
 			__flush_wback_region((void *)addr, PAGE_SIZE);
 	}
diff --git a/arch/sh/mm/fault_32.c b/arch/sh/mm/fault_32.c
index f1c93c8..781b413 100644
--- a/arch/sh/mm/fault_32.c
+++ b/arch/sh/mm/fault_32.c
@@ -82,8 +82,8 @@
 	pmd_t *pmd_k;
 	pte_t *pte_k;
 
-	/* Make sure we are in vmalloc area: */
-	if (!(address >= VMALLOC_START && address < VMALLOC_END))
+	/* Make sure we are in vmalloc/module/P3 area: */
+	if (!(address >= VMALLOC_START && address < P3_ADDR_MAX))
 		return -1;
 
 	/*
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index 0a9b4d8..edc842f 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -106,27 +106,31 @@
 	pgd_t *pgd;
 	pud_t *pud;
 	pmd_t *pmd;
-	int pgd_idx;
+	pte_t *pte;
+	int i, j, k;
 	unsigned long vaddr;
 
-	vaddr = start & PMD_MASK;
-	end = (end + PMD_SIZE - 1) & PMD_MASK;
-	pgd_idx = pgd_index(vaddr);
-	pgd = pgd_base + pgd_idx;
+	vaddr = start;
+	i = __pgd_offset(vaddr);
+	j = __pud_offset(vaddr);
+	k = __pmd_offset(vaddr);
+	pgd = pgd_base + i;
 
-	for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) {
-		BUG_ON(pgd_none(*pgd));
-		pud = pud_offset(pgd, 0);
-		BUG_ON(pud_none(*pud));
-		pmd = pmd_offset(pud, 0);
-
-		if (!pmd_present(*pmd)) {
-			pte_t *pte_table;
-			pte_table = (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
-			pmd_populate_kernel(&init_mm, pmd, pte_table);
+	for ( ; (i < PTRS_PER_PGD) && (vaddr != end); pgd++, i++) {
+		pud = (pud_t *)pgd;
+		for ( ; (j < PTRS_PER_PUD) && (vaddr != end); pud++, j++) {
+			pmd = (pmd_t *)pud;
+			for (; (k < PTRS_PER_PMD) && (vaddr != end); pmd++, k++) {
+				if (pmd_none(*pmd)) {
+					pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+					pmd_populate_kernel(&init_mm, pmd, pte);
+					BUG_ON(pte != pte_offset_kernel(pmd, 0));
+				}
+				vaddr += PMD_SIZE;
+			}
+			k = 0;
 		}
-
-		vaddr += PMD_SIZE;
+		j = 0;
 	}
 }
 #endif	/* CONFIG_MMU */
@@ -137,7 +141,7 @@
 void __init paging_init(void)
 {
 	unsigned long max_zone_pfns[MAX_NR_ZONES];
-	unsigned long vaddr;
+	unsigned long vaddr, end;
 	int nid;
 
 	/* We don't need to map the kernel through the TLB, as
@@ -155,7 +159,8 @@
 	 * pte's will be filled in by __set_fixmap().
 	 */
 	vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
-	page_table_range_init(vaddr, 0, swapper_pg_dir);
+	end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK;
+	page_table_range_init(vaddr, end, swapper_pg_dir);
 
 	kmap_coherent_init();
 
diff --git a/arch/sh/mm/kmap.c b/arch/sh/mm/kmap.c
index 3eecf0d..c52cd8c 100644
--- a/arch/sh/mm/kmap.c
+++ b/arch/sh/mm/kmap.c
@@ -24,9 +24,6 @@
 {
 	unsigned long vaddr;
 
-	if (!boot_cpu_data.dcache.n_aliases)
-		return;
-
 	/* cache the first coherent kmap pte */
 	vaddr = __fix_to_virt(FIX_CMAP_BEGIN);
 	kmap_coherent_pte = kmap_get_fixmap_pte(vaddr);
@@ -35,30 +32,31 @@
 void *kmap_coherent(struct page *page, unsigned long addr)
 {
 	enum fixed_addresses idx;
-	unsigned long vaddr, flags;
-	pte_t pte;
+	unsigned long vaddr;
 
 	BUG_ON(test_bit(PG_dcache_dirty, &page->flags));
 
-	inc_preempt_count();
+	pagefault_disable();
 
-	idx = (addr & current_cpu_data.dcache.alias_mask) >> PAGE_SHIFT;
-	vaddr = __fix_to_virt(FIX_CMAP_END - idx);
-	pte = mk_pte(page, PAGE_KERNEL);
+	idx = FIX_CMAP_END -
+		((addr & current_cpu_data.dcache.alias_mask) >> PAGE_SHIFT);
+	vaddr = __fix_to_virt(idx);
 
-	local_irq_save(flags);
-	flush_tlb_one(get_asid(), vaddr);
-	local_irq_restore(flags);
-
-	update_mmu_cache(NULL, vaddr, pte);
-
-	set_pte(kmap_coherent_pte - (FIX_CMAP_END - idx), pte);
+	BUG_ON(!pte_none(*(kmap_coherent_pte - idx)));
+	set_pte(kmap_coherent_pte - idx, mk_pte(page, PAGE_KERNEL));
 
 	return (void *)vaddr;
 }
 
-void kunmap_coherent(void)
+void kunmap_coherent(void *kvaddr)
 {
-	dec_preempt_count();
-	preempt_check_resched();
+	if (kvaddr >= (void *)FIXADDR_START) {
+		unsigned long vaddr = (unsigned long)kvaddr & PAGE_MASK;
+		enum fixed_addresses idx = __virt_to_fix(vaddr);
+
+		pte_clear(&init_mm, vaddr, kmap_coherent_pte - idx);
+		local_flush_tlb_one(get_asid(), vaddr);
+	}
+
+	pagefault_enable();
 }
diff --git a/arch/sh/mm/nommu.c b/arch/sh/mm/nommu.c
index 51b5403..ac16c05 100644
--- a/arch/sh/mm/nommu.c
+++ b/arch/sh/mm/nommu.c
@@ -81,7 +81,7 @@
 	return NULL;
 }
 
-void kunmap_coherent(void)
+void kunmap_coherent(void *kvaddr)
 {
 	BUG();
 }