sh: sh4_flush_cache_mm() optimizations.
The i-cache flush in the case of VM_EXEC was added way back when as a
sanity measure, and in practice we only care about evicting aliases from
the d-cache. As a result, it's possible to drop the i-cache flush
completely here.
After careful profiling it's also come up that all of the work associated
with hunting down aliases and doing ranged flushing ends up generating
more overhead than simply blasting away the entire dcache, particularly
if there are many mm's that need to be iterated over. As a result of
that, just move back to flush_dcache_all() in these cases, which restores
the old behaviour, and vastly simplifies the path.
Additionally, on platforms without aliases at all, this can simply be
nopped out. Presently we have the alias check in the SH-4 specific
version, but this is true for all of the platforms, so move the check up
to a generic location. This cuts down quite a bit on superfluous cacheop
IPIs.
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
diff --git a/arch/sh/mm/cache-sh4.c b/arch/sh/mm/cache-sh4.c
index b2453bb..a5c339b 100644
--- a/arch/sh/mm/cache-sh4.c
+++ b/arch/sh/mm/cache-sh4.c
@@ -170,89 +170,13 @@
flush_icache_all();
}
-static void __flush_cache_mm(struct mm_struct *mm, unsigned long start,
- unsigned long end)
-{
- unsigned long d = 0, p = start & PAGE_MASK;
- unsigned long alias_mask = boot_cpu_data.dcache.alias_mask;
- unsigned long n_aliases = boot_cpu_data.dcache.n_aliases;
- unsigned long select_bit;
- unsigned long all_aliases_mask;
- unsigned long addr_offset;
- pgd_t *dir;
- pmd_t *pmd;
- pud_t *pud;
- pte_t *pte;
- int i;
-
- dir = pgd_offset(mm, p);
- pud = pud_offset(dir, p);
- pmd = pmd_offset(pud, p);
- end = PAGE_ALIGN(end);
-
- all_aliases_mask = (1 << n_aliases) - 1;
-
- do {
- if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) {
- p &= PMD_MASK;
- p += PMD_SIZE;
- pmd++;
-
- continue;
- }
-
- pte = pte_offset_kernel(pmd, p);
-
- do {
- unsigned long phys;
- pte_t entry = *pte;
-
- if (!(pte_val(entry) & _PAGE_PRESENT)) {
- pte++;
- p += PAGE_SIZE;
- continue;
- }
-
- phys = pte_val(entry) & PTE_PHYS_MASK;
-
- if ((p ^ phys) & alias_mask) {
- d |= 1 << ((p & alias_mask) >> PAGE_SHIFT);
- d |= 1 << ((phys & alias_mask) >> PAGE_SHIFT);
-
- if (d == all_aliases_mask)
- goto loop_exit;
- }
-
- pte++;
- p += PAGE_SIZE;
- } while (p < end && ((unsigned long)pte & ~PAGE_MASK));
- pmd++;
- } while (p < end);
-
-loop_exit:
- addr_offset = 0;
- select_bit = 1;
-
- for (i = 0; i < n_aliases; i++) {
- if (d & select_bit) {
- (*__flush_dcache_segment_fn)(addr_offset, PAGE_SIZE);
- wmb();
- }
-
- select_bit <<= 1;
- addr_offset += PAGE_SIZE;
- }
-}
-
/*
* Note : (RPC) since the caches are physically tagged, the only point
* of flush_cache_mm for SH-4 is to get rid of aliases from the
* D-cache. The assumption elsewhere, e.g. flush_cache_range, is that
* lines can stay resident so long as the virtual address they were
* accessed with (hence cache set) is in accord with the physical
- * address (i.e. tag). It's no different here. So I reckon we don't
- * need to flush the I-cache, since aliases don't matter for that. We
- * should try that.
+ * address (i.e. tag). It's no different here.
*
* Caller takes mm->mmap_sem.
*/
@@ -263,33 +187,7 @@
if (cpu_context(smp_processor_id(), mm) == NO_CONTEXT)
return;
- /*
- * If cache is only 4k-per-way, there are never any 'aliases'. Since
- * the cache is physically tagged, the data can just be left in there.
- */
- if (boot_cpu_data.dcache.n_aliases == 0)
- return;
-
- /*
- * Don't bother groveling around the dcache for the VMA ranges
- * if there are too many PTEs to make it worthwhile.
- */
- if (mm->nr_ptes >= MAX_DCACHE_PAGES)
- flush_dcache_all();
- else {
- struct vm_area_struct *vma;
-
- /*
- * In this case there are reasonably sized ranges to flush,
- * iterate through the VMA list and take care of any aliases.
- */
- for (vma = mm->mmap; vma; vma = vma->vm_next)
- __flush_cache_mm(mm, vma->vm_start, vma->vm_end);
- }
-
- /* Only touch the icache if one of the VMAs has VM_EXEC set. */
- if (mm->exec_vm)
- flush_icache_all();
+ flush_dcache_all();
}
/*
@@ -372,24 +270,10 @@
if (boot_cpu_data.dcache.n_aliases == 0)
return;
- /*
- * Don't bother with the lookup and alias check if we have a
- * wide range to cover, just blow away the dcache in its
- * entirety instead. -- PFM.
- */
- if (((end - start) >> PAGE_SHIFT) >= MAX_DCACHE_PAGES)
- flush_dcache_all();
- else
- __flush_cache_mm(vma->vm_mm, start, end);
+ flush_dcache_all();
- if (vma->vm_flags & VM_EXEC) {
- /*
- * TODO: Is this required??? Need to look at how I-cache
- * coherency is assured when new programs are loaded to see if
- * this matters.
- */
+ if (vma->vm_flags & VM_EXEC)
flush_icache_all();
- }
}
/**
diff --git a/arch/sh/mm/cache.c b/arch/sh/mm/cache.c
index 35c37b7..4aa9260 100644
--- a/arch/sh/mm/cache.c
+++ b/arch/sh/mm/cache.c
@@ -164,11 +164,17 @@
void flush_cache_mm(struct mm_struct *mm)
{
+ if (boot_cpu_data.dcache.n_aliases == 0)
+ return;
+
cacheop_on_each_cpu(local_flush_cache_mm, mm, 1);
}
void flush_cache_dup_mm(struct mm_struct *mm)
{
+ if (boot_cpu_data.dcache.n_aliases == 0)
+ return;
+
cacheop_on_each_cpu(local_flush_cache_dup_mm, mm, 1);
}