blob: 2f9dd6df00a664bca4c1b043e4ed402a34f61073 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
Paul Mundta23ba432007-11-28 20:19:38 +09002 * arch/sh/mm/cache-sh5.c
Linus Torvalds1da177e2005-04-16 15:20:36 -07003 *
Paul Mundt38350e02008-02-13 20:14:10 +09004 * Copyright (C) 2000, 2001 Paolo Alberelli
5 * Copyright (C) 2002 Benedict Gaster
6 * Copyright (C) 2003 Richard Curnow
7 * Copyright (C) 2003 - 2008 Paul Mundt
Paul Mundta23ba432007-11-28 20:19:38 +09008 *
9 * This file is subject to the terms and conditions of the GNU General Public
10 * License. See the file "COPYING" in the main directory of this archive
11 * for more details.
Linus Torvalds1da177e2005-04-16 15:20:36 -070012 */
Linus Torvalds1da177e2005-04-16 15:20:36 -070013#include <linux/init.h>
14#include <linux/mman.h>
15#include <linux/mm.h>
Paul Mundt38350e02008-02-13 20:14:10 +090016#include <asm/tlb.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070017#include <asm/processor.h>
18#include <asm/cache.h>
Paul Mundt38350e02008-02-13 20:14:10 +090019#include <asm/pgalloc.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070020#include <asm/uaccess.h>
21#include <asm/mmu_context.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070022
Paul Mundt37443ef2009-08-15 12:29:49 +090023extern void __weak sh4__flush_region_init(void);
24
Linus Torvalds1da177e2005-04-16 15:20:36 -070025/* Wired TLB entry for the D-cache */
26static unsigned long long dtlb_cache_slot;
27
Paul Mundt38350e02008-02-13 20:14:10 +090028/*
29 * The following group of functions deal with mapping and unmapping a
30 * temporary page into a DTLB slot that has been set aside for exclusive
31 * use.
32 */
33static inline void
34sh64_setup_dtlb_cache_slot(unsigned long eaddr, unsigned long asid,
35 unsigned long paddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -070036{
Linus Torvalds1da177e2005-04-16 15:20:36 -070037 sh64_setup_tlb_slot(dtlb_cache_slot, eaddr, asid, paddr);
38}
39
40static inline void sh64_teardown_dtlb_cache_slot(void)
41{
42 sh64_teardown_tlb_slot(dtlb_cache_slot);
Linus Torvalds1da177e2005-04-16 15:20:36 -070043}
44
Paul Mundt38350e02008-02-13 20:14:10 +090045static inline void sh64_icache_inv_all(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -070046{
47 unsigned long long addr, flag, data;
Linus Torvalds1da177e2005-04-16 15:20:36 -070048
Paul Mundt38350e02008-02-13 20:14:10 +090049 addr = ICCR0;
50 flag = ICCR0_ICI;
51 data = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -070052
Linus Torvalds1da177e2005-04-16 15:20:36 -070053 /* Without %1 it gets unexplicably wrong */
Paul Mundt38350e02008-02-13 20:14:10 +090054 __asm__ __volatile__ (
55 "getcfg %3, 0, %0\n\t"
56 "or %0, %2, %0\n\t"
57 "putcfg %3, 0, %0\n\t"
58 "synci"
59 : "=&r" (data)
60 : "0" (data), "r" (flag), "r" (addr));
Linus Torvalds1da177e2005-04-16 15:20:36 -070061}
62
63static void sh64_icache_inv_kernel_range(unsigned long start, unsigned long end)
64{
65 /* Invalidate range of addresses [start,end] from the I-cache, where
66 * the addresses lie in the kernel superpage. */
67
68 unsigned long long ullend, addr, aligned_start;
Linus Torvalds1da177e2005-04-16 15:20:36 -070069 aligned_start = (unsigned long long)(signed long long)(signed long) start;
Paul Mundt38350e02008-02-13 20:14:10 +090070 addr = L1_CACHE_ALIGN(aligned_start);
Linus Torvalds1da177e2005-04-16 15:20:36 -070071 ullend = (unsigned long long) (signed long long) (signed long) end;
Paul Mundt38350e02008-02-13 20:14:10 +090072
Linus Torvalds1da177e2005-04-16 15:20:36 -070073 while (addr <= ullend) {
Paul Mundt38350e02008-02-13 20:14:10 +090074 __asm__ __volatile__ ("icbi %0, 0" : : "r" (addr));
Linus Torvalds1da177e2005-04-16 15:20:36 -070075 addr += L1_CACHE_BYTES;
76 }
77}
78
79static void sh64_icache_inv_user_page(struct vm_area_struct *vma, unsigned long eaddr)
80{
81 /* If we get called, we know that vma->vm_flags contains VM_EXEC.
82 Also, eaddr is page-aligned. */
Paul Mundt38350e02008-02-13 20:14:10 +090083 unsigned int cpu = smp_processor_id();
Linus Torvalds1da177e2005-04-16 15:20:36 -070084 unsigned long long addr, end_addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -070085 unsigned long running_asid, vma_asid;
86 addr = eaddr;
87 end_addr = addr + PAGE_SIZE;
88
89 /* Check whether we can use the current ASID for the I-cache
90 invalidation. For example, if we're called via
91 access_process_vm->flush_cache_page->here, (e.g. when reading from
92 /proc), 'running_asid' will be that of the reader, not of the
93 victim.
94
95 Also, note the risk that we might get pre-empted between the ASID
96 compare and blocking IRQs, and before we regain control, the
97 pid->ASID mapping changes. However, the whole cache will get
98 invalidated when the mapping is renewed, so the worst that can
99 happen is that the loop below ends up invalidating somebody else's
100 cache entries.
101 */
102
103 running_asid = get_asid();
Paul Mundt38350e02008-02-13 20:14:10 +0900104 vma_asid = cpu_asid(cpu, vma->vm_mm);
Paul Mundt64a6d722009-08-21 18:21:07 +0900105 if (running_asid != vma_asid)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700106 switch_and_save_asid(vma_asid);
Paul Mundt64a6d722009-08-21 18:21:07 +0900107
Linus Torvalds1da177e2005-04-16 15:20:36 -0700108 while (addr < end_addr) {
109 /* Worth unrolling a little */
Paul Mundt38350e02008-02-13 20:14:10 +0900110 __asm__ __volatile__("icbi %0, 0" : : "r" (addr));
111 __asm__ __volatile__("icbi %0, 32" : : "r" (addr));
112 __asm__ __volatile__("icbi %0, 64" : : "r" (addr));
113 __asm__ __volatile__("icbi %0, 96" : : "r" (addr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700114 addr += 128;
115 }
Paul Mundt64a6d722009-08-21 18:21:07 +0900116
117 if (running_asid != vma_asid)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700118 switch_and_save_asid(running_asid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700119}
120
Linus Torvalds1da177e2005-04-16 15:20:36 -0700121static void sh64_icache_inv_user_page_range(struct mm_struct *mm,
122 unsigned long start, unsigned long end)
123{
124 /* Used for invalidating big chunks of I-cache, i.e. assume the range
125 is whole pages. If 'start' or 'end' is not page aligned, the code
126 is conservative and invalidates to the ends of the enclosing pages.
127 This is functionally OK, just a performance loss. */
128
129 /* See the comments below in sh64_dcache_purge_user_range() regarding
130 the choice of algorithm. However, for the I-cache option (2) isn't
131 available because there are no physical tags so aliases can't be
132 resolved. The icbi instruction has to be used through the user
133 mapping. Because icbi is cheaper than ocbp on a cache hit, it
134 would be cheaper to use the selective code for a large range than is
135 possible with the D-cache. Just assume 64 for now as a working
136 figure.
137 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700138 int n_pages;
139
Paul Mundt38350e02008-02-13 20:14:10 +0900140 if (!mm)
141 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700142
143 n_pages = ((end - start) >> PAGE_SHIFT);
144 if (n_pages >= 64) {
145 sh64_icache_inv_all();
146 } else {
147 unsigned long aligned_start;
148 unsigned long eaddr;
149 unsigned long after_last_page_start;
150 unsigned long mm_asid, current_asid;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700151
Paul Mundt38350e02008-02-13 20:14:10 +0900152 mm_asid = cpu_asid(smp_processor_id(), mm);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700153 current_asid = get_asid();
154
Paul Mundt64a6d722009-08-21 18:21:07 +0900155 if (mm_asid != current_asid)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700156 switch_and_save_asid(mm_asid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700157
158 aligned_start = start & PAGE_MASK;
159 after_last_page_start = PAGE_SIZE + ((end - 1) & PAGE_MASK);
160
161 while (aligned_start < after_last_page_start) {
162 struct vm_area_struct *vma;
163 unsigned long vma_end;
164 vma = find_vma(mm, aligned_start);
165 if (!vma || (aligned_start <= vma->vm_end)) {
166 /* Avoid getting stuck in an error condition */
167 aligned_start += PAGE_SIZE;
168 continue;
169 }
170 vma_end = vma->vm_end;
171 if (vma->vm_flags & VM_EXEC) {
172 /* Executable */
173 eaddr = aligned_start;
174 while (eaddr < vma_end) {
175 sh64_icache_inv_user_page(vma, eaddr);
176 eaddr += PAGE_SIZE;
177 }
178 }
179 aligned_start = vma->vm_end; /* Skip to start of next region */
180 }
Paul Mundt38350e02008-02-13 20:14:10 +0900181
Paul Mundt64a6d722009-08-21 18:21:07 +0900182 if (mm_asid != current_asid)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700183 switch_and_save_asid(current_asid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700184 }
185}
186
Linus Torvalds1da177e2005-04-16 15:20:36 -0700187static void sh64_icache_inv_current_user_range(unsigned long start, unsigned long end)
188{
189 /* The icbi instruction never raises ITLBMISS. i.e. if there's not a
190 cache hit on the virtual tag the instruction ends there, without a
191 TLB lookup. */
192
193 unsigned long long aligned_start;
194 unsigned long long ull_end;
195 unsigned long long addr;
196
197 ull_end = end;
198
199 /* Just invalidate over the range using the natural addresses. TLB
200 miss handling will be OK (TBC). Since it's for the current process,
201 either we're already in the right ASID context, or the ASIDs have
202 been recycled since we were last active in which case we might just
203 invalidate another processes I-cache entries : no worries, just a
204 performance drop for him. */
Paul Mundt38350e02008-02-13 20:14:10 +0900205 aligned_start = L1_CACHE_ALIGN(start);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206 addr = aligned_start;
207 while (addr < ull_end) {
Paul Mundt38350e02008-02-13 20:14:10 +0900208 __asm__ __volatile__ ("icbi %0, 0" : : "r" (addr));
209 __asm__ __volatile__ ("nop");
210 __asm__ __volatile__ ("nop");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700211 addr += L1_CACHE_BYTES;
212 }
213}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700214
Linus Torvalds1da177e2005-04-16 15:20:36 -0700215/* Buffer used as the target of alloco instructions to purge data from cache
216 sets by natural eviction. -- RPC */
Paul Mundt38350e02008-02-13 20:14:10 +0900217#define DUMMY_ALLOCO_AREA_SIZE ((L1_CACHE_BYTES << 10) + (1024 * 4))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700218static unsigned char dummy_alloco_area[DUMMY_ALLOCO_AREA_SIZE] __cacheline_aligned = { 0, };
219
Paul Mundt38350e02008-02-13 20:14:10 +0900220static void inline sh64_dcache_purge_sets(int sets_to_purge_base, int n_sets)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700221{
222 /* Purge all ways in a particular block of sets, specified by the base
223 set number and number of sets. Can handle wrap-around, if that's
224 needed. */
225
226 int dummy_buffer_base_set;
227 unsigned long long eaddr, eaddr0, eaddr1;
228 int j;
229 int set_offset;
230
Paul Mundt38350e02008-02-13 20:14:10 +0900231 dummy_buffer_base_set = ((int)&dummy_alloco_area &
232 cpu_data->dcache.entry_mask) >>
233 cpu_data->dcache.entry_shift;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700234 set_offset = sets_to_purge_base - dummy_buffer_base_set;
235
Paul Mundt38350e02008-02-13 20:14:10 +0900236 for (j = 0; j < n_sets; j++, set_offset++) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700237 set_offset &= (cpu_data->dcache.sets - 1);
Paul Mundt38350e02008-02-13 20:14:10 +0900238 eaddr0 = (unsigned long long)dummy_alloco_area +
239 (set_offset << cpu_data->dcache.entry_shift);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700240
Paul Mundt38350e02008-02-13 20:14:10 +0900241 /*
242 * Do one alloco which hits the required set per cache
243 * way. For write-back mode, this will purge the #ways
244 * resident lines. There's little point unrolling this
245 * loop because the allocos stall more if they're too
246 * close together.
247 */
248 eaddr1 = eaddr0 + cpu_data->dcache.way_size *
249 cpu_data->dcache.ways;
250
251 for (eaddr = eaddr0; eaddr < eaddr1;
252 eaddr += cpu_data->dcache.way_size) {
253 __asm__ __volatile__ ("alloco %0, 0" : : "r" (eaddr));
254 __asm__ __volatile__ ("synco"); /* TAKum03020 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255 }
256
Paul Mundt38350e02008-02-13 20:14:10 +0900257 eaddr1 = eaddr0 + cpu_data->dcache.way_size *
258 cpu_data->dcache.ways;
259
260 for (eaddr = eaddr0; eaddr < eaddr1;
261 eaddr += cpu_data->dcache.way_size) {
262 /*
263 * Load from each address. Required because
264 * alloco is a NOP if the cache is write-through.
265 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700266 if (test_bit(SH_CACHE_MODE_WT, &(cpu_data->dcache.flags)))
Paul Mundt2fedaac2009-05-09 14:38:49 +0900267 __raw_readb((unsigned long)eaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700268 }
269 }
270
Paul Mundt38350e02008-02-13 20:14:10 +0900271 /*
272 * Don't use OCBI to invalidate the lines. That costs cycles
273 * directly. If the dummy block is just left resident, it will
274 * naturally get evicted as required.
275 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700276}
277
Paul Mundt38350e02008-02-13 20:14:10 +0900278/*
279 * Purge the entire contents of the dcache. The most efficient way to
280 * achieve this is to use alloco instructions on a region of unused
281 * memory equal in size to the cache, thereby causing the current
282 * contents to be discarded by natural eviction. The alternative, namely
283 * reading every tag, setting up a mapping for the corresponding page and
284 * doing an OCBP for the line, would be much more expensive.
285 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700286static void sh64_dcache_purge_all(void)
287{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700288
289 sh64_dcache_purge_sets(0, cpu_data->dcache.sets);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700290}
291
Linus Torvalds1da177e2005-04-16 15:20:36 -0700292
293/* Assumes this address (+ (2**n_synbits) pages up from it) aren't used for
294 anything else in the kernel */
295#define MAGIC_PAGE0_START 0xffffffffec000000ULL
296
Paul Mundt38350e02008-02-13 20:14:10 +0900297/* Purge the physical page 'paddr' from the cache. It's known that any
298 * cache lines requiring attention have the same page colour as the the
299 * address 'eaddr'.
300 *
301 * This relies on the fact that the D-cache matches on physical tags when
302 * no virtual tag matches. So we create an alias for the original page
303 * and purge through that. (Alternatively, we could have done this by
304 * switching ASID to match the original mapping and purged through that,
305 * but that involves ASID switching cost + probably a TLBMISS + refill
306 * anyway.)
307 */
308static void sh64_dcache_purge_coloured_phy_page(unsigned long paddr,
309 unsigned long eaddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700310{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700311 unsigned long long magic_page_start;
312 unsigned long long magic_eaddr, magic_eaddr_end;
313
314 magic_page_start = MAGIC_PAGE0_START + (eaddr & CACHE_OC_SYN_MASK);
315
316 /* As long as the kernel is not pre-emptible, this doesn't need to be
317 under cli/sti. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700318 sh64_setup_dtlb_cache_slot(magic_page_start, get_asid(), paddr);
319
320 magic_eaddr = magic_page_start;
321 magic_eaddr_end = magic_eaddr + PAGE_SIZE;
Paul Mundt38350e02008-02-13 20:14:10 +0900322
Linus Torvalds1da177e2005-04-16 15:20:36 -0700323 while (magic_eaddr < magic_eaddr_end) {
324 /* Little point in unrolling this loop - the OCBPs are blocking
325 and won't go any quicker (i.e. the loop overhead is parallel
326 to part of the OCBP execution.) */
Paul Mundt38350e02008-02-13 20:14:10 +0900327 __asm__ __volatile__ ("ocbp %0, 0" : : "r" (magic_eaddr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700328 magic_eaddr += L1_CACHE_BYTES;
329 }
330
331 sh64_teardown_dtlb_cache_slot();
332}
333
Paul Mundt38350e02008-02-13 20:14:10 +0900334/*
335 * Purge a page given its physical start address, by creating a temporary
336 * 1 page mapping and purging across that. Even if we know the virtual
337 * address (& vma or mm) of the page, the method here is more elegant
338 * because it avoids issues of coping with page faults on the purge
339 * instructions (i.e. no special-case code required in the critical path
340 * in the TLB miss handling).
341 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700342static void sh64_dcache_purge_phy_page(unsigned long paddr)
343{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700344 unsigned long long eaddr_start, eaddr, eaddr_end;
345 int i;
346
347 /* As long as the kernel is not pre-emptible, this doesn't need to be
348 under cli/sti. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700349 eaddr_start = MAGIC_PAGE0_START;
Paul Mundt38350e02008-02-13 20:14:10 +0900350 for (i = 0; i < (1 << CACHE_OC_N_SYNBITS); i++) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700351 sh64_setup_dtlb_cache_slot(eaddr_start, get_asid(), paddr);
352
353 eaddr = eaddr_start;
354 eaddr_end = eaddr + PAGE_SIZE;
355 while (eaddr < eaddr_end) {
Paul Mundt38350e02008-02-13 20:14:10 +0900356 __asm__ __volatile__ ("ocbp %0, 0" : : "r" (eaddr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700357 eaddr += L1_CACHE_BYTES;
358 }
359
360 sh64_teardown_dtlb_cache_slot();
361 eaddr_start += PAGE_SIZE;
362 }
363}
364
Hugh Dickins60ec5582005-10-29 18:16:34 -0700365static void sh64_dcache_purge_user_pages(struct mm_struct *mm,
366 unsigned long addr, unsigned long end)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700367{
368 pgd_t *pgd;
Paul Mundt38350e02008-02-13 20:14:10 +0900369 pud_t *pud;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700370 pmd_t *pmd;
371 pte_t *pte;
372 pte_t entry;
Hugh Dickins60ec5582005-10-29 18:16:34 -0700373 spinlock_t *ptl;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700374 unsigned long paddr;
375
Hugh Dickins60ec5582005-10-29 18:16:34 -0700376 if (!mm)
377 return; /* No way to find physical address of page */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700378
Hugh Dickins60ec5582005-10-29 18:16:34 -0700379 pgd = pgd_offset(mm, addr);
380 if (pgd_bad(*pgd))
381 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700382
Paul Mundt38350e02008-02-13 20:14:10 +0900383 pud = pud_offset(pgd, addr);
384 if (pud_none(*pud) || pud_bad(*pud))
385 return;
386
387 pmd = pmd_offset(pud, addr);
Hugh Dickins60ec5582005-10-29 18:16:34 -0700388 if (pmd_none(*pmd) || pmd_bad(*pmd))
389 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700390
Hugh Dickins60ec5582005-10-29 18:16:34 -0700391 pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
392 do {
393 entry = *pte;
394 if (pte_none(entry) || !pte_present(entry))
395 continue;
396 paddr = pte_val(entry) & PAGE_MASK;
397 sh64_dcache_purge_coloured_phy_page(paddr, addr);
398 } while (pte++, addr += PAGE_SIZE, addr != end);
399 pte_unmap_unlock(pte - 1, ptl);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700400}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700401
Paul Mundt38350e02008-02-13 20:14:10 +0900402/*
403 * There are at least 5 choices for the implementation of this, with
404 * pros (+), cons(-), comments(*):
405 *
406 * 1. ocbp each line in the range through the original user's ASID
407 * + no lines spuriously evicted
408 * - tlbmiss handling (must either handle faults on demand => extra
409 * special-case code in tlbmiss critical path), or map the page in
410 * advance (=> flush_tlb_range in advance to avoid multiple hits)
411 * - ASID switching
412 * - expensive for large ranges
413 *
414 * 2. temporarily map each page in the range to a special effective
415 * address and ocbp through the temporary mapping; relies on the
416 * fact that SH-5 OCB* always do TLB lookup and match on ptags (they
417 * never look at the etags)
418 * + no spurious evictions
419 * - expensive for large ranges
420 * * surely cheaper than (1)
421 *
422 * 3. walk all the lines in the cache, check the tags, if a match
423 * occurs create a page mapping to ocbp the line through
424 * + no spurious evictions
425 * - tag inspection overhead
426 * - (especially for small ranges)
427 * - potential cost of setting up/tearing down page mapping for
428 * every line that matches the range
429 * * cost partly independent of range size
430 *
431 * 4. walk all the lines in the cache, check the tags, if a match
432 * occurs use 4 * alloco to purge the line (+3 other probably
433 * innocent victims) by natural eviction
434 * + no tlb mapping overheads
435 * - spurious evictions
436 * - tag inspection overhead
437 *
438 * 5. implement like flush_cache_all
439 * + no tag inspection overhead
440 * - spurious evictions
441 * - bad for small ranges
442 *
443 * (1) can be ruled out as more expensive than (2). (2) appears best
444 * for small ranges. The choice between (3), (4) and (5) for large
445 * ranges and the range size for the large/small boundary need
446 * benchmarking to determine.
447 *
448 * For now use approach (2) for small ranges and (5) for large ones.
449 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700450static void sh64_dcache_purge_user_range(struct mm_struct *mm,
451 unsigned long start, unsigned long end)
452{
Paul Mundt38350e02008-02-13 20:14:10 +0900453 int n_pages = ((end - start) >> PAGE_SHIFT);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700454
Hugh Dickins60ec5582005-10-29 18:16:34 -0700455 if (n_pages >= 64 || ((start ^ (end - 1)) & PMD_MASK)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700456 sh64_dcache_purge_all();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700457 } else {
Hugh Dickins60ec5582005-10-29 18:16:34 -0700458 /* Small range, covered by a single page table page */
459 start &= PAGE_MASK; /* should already be so */
460 end = PAGE_ALIGN(end); /* should already be so */
461 sh64_dcache_purge_user_pages(mm, start, end);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700462 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700463}
Paul Mundt38350e02008-02-13 20:14:10 +0900464
465/*
466 * Invalidate the entire contents of both caches, after writing back to
467 * memory any dirty data from the D-cache.
468 */
Paul Mundtf26b2a52009-08-21 17:23:14 +0900469static void sh5_flush_cache_all(void *unused)
Paul Mundt38350e02008-02-13 20:14:10 +0900470{
471 sh64_dcache_purge_all();
472 sh64_icache_inv_all();
473}
474
475/*
476 * Invalidate an entire user-address space from both caches, after
477 * writing back dirty data (e.g. for shared mmap etc).
478 *
479 * This could be coded selectively by inspecting all the tags then
480 * doing 4*alloco on any set containing a match (as for
481 * flush_cache_range), but fork/exit/execve (where this is called from)
482 * are expensive anyway.
483 *
484 * Have to do a purge here, despite the comments re I-cache below.
485 * There could be odd-coloured dirty data associated with the mm still
486 * in the cache - if this gets written out through natural eviction
487 * after the kernel has reused the page there will be chaos.
488 *
489 * The mm being torn down won't ever be active again, so any Icache
490 * lines tagged with its ASID won't be visible for the rest of the
491 * lifetime of this ASID cycle. Before the ASID gets reused, there
492 * will be a flush_cache_all. Hence we don't need to touch the
493 * I-cache. This is similar to the lack of action needed in
494 * flush_tlb_mm - see fault.c.
495 */
Paul Mundtf26b2a52009-08-21 17:23:14 +0900496static void sh5_flush_cache_mm(void *unused)
Paul Mundt38350e02008-02-13 20:14:10 +0900497{
498 sh64_dcache_purge_all();
499}
500
501/*
502 * Invalidate (from both caches) the range [start,end) of virtual
503 * addresses from the user address space specified by mm, after writing
504 * back any dirty data.
505 *
506 * Note, 'end' is 1 byte beyond the end of the range to flush.
507 */
Paul Mundtf26b2a52009-08-21 17:23:14 +0900508static void sh5_flush_cache_range(void *args)
Paul Mundt38350e02008-02-13 20:14:10 +0900509{
Paul Mundtf26b2a52009-08-21 17:23:14 +0900510 struct flusher_data *data = args;
511 struct vm_area_struct *vma;
512 unsigned long start, end;
Paul Mundt38350e02008-02-13 20:14:10 +0900513
Paul Mundtf26b2a52009-08-21 17:23:14 +0900514 vma = data->vma;
515 start = data->addr1;
516 end = data->addr2;
517
518 sh64_dcache_purge_user_range(vma->vm_mm, start, end);
519 sh64_icache_inv_user_page_range(vma->vm_mm, start, end);
Paul Mundt38350e02008-02-13 20:14:10 +0900520}
521
522/*
523 * Invalidate any entries in either cache for the vma within the user
524 * address space vma->vm_mm for the page starting at virtual address
525 * 'eaddr'. This seems to be used primarily in breaking COW. Note,
526 * the I-cache must be searched too in case the page in question is
527 * both writable and being executed from (e.g. stack trampolines.)
528 *
529 * Note, this is called with pte lock held.
530 */
Paul Mundtf26b2a52009-08-21 17:23:14 +0900531static void sh5_flush_cache_page(void *args)
Paul Mundt38350e02008-02-13 20:14:10 +0900532{
Paul Mundtf26b2a52009-08-21 17:23:14 +0900533 struct flusher_data *data = args;
534 struct vm_area_struct *vma;
535 unsigned long eaddr, pfn;
536
537 vma = data->vma;
538 eaddr = data->addr1;
539 pfn = data->addr2;
540
Paul Mundt38350e02008-02-13 20:14:10 +0900541 sh64_dcache_purge_phy_page(pfn << PAGE_SHIFT);
542
543 if (vma->vm_flags & VM_EXEC)
544 sh64_icache_inv_user_page(vma, eaddr);
545}
546
Paul Mundtf26b2a52009-08-21 17:23:14 +0900547static void sh5_flush_dcache_page(void *page)
Paul Mundt38350e02008-02-13 20:14:10 +0900548{
549 sh64_dcache_purge_phy_page(page_to_phys(page));
550 wmb();
551}
552
553/*
554 * Flush the range [start,end] of kernel virtual adddress space from
555 * the I-cache. The corresponding range must be purged from the
556 * D-cache also because the SH-5 doesn't have cache snooping between
557 * the caches. The addresses will be visible through the superpage
558 * mapping, therefore it's guaranteed that there no cache entries for
559 * the range in cache sets of the wrong colour.
560 */
Paul Mundtf26b2a52009-08-21 17:23:14 +0900561static void sh5_flush_icache_range(void *args)
Paul Mundt38350e02008-02-13 20:14:10 +0900562{
Paul Mundtf26b2a52009-08-21 17:23:14 +0900563 struct flusher_data *data = args;
564 unsigned long start, end;
565
566 start = data->addr1;
567 end = data->addr2;
568
Paul Mundt38350e02008-02-13 20:14:10 +0900569 __flush_purge_region((void *)start, end);
570 wmb();
571 sh64_icache_inv_kernel_range(start, end);
572}
573
574/*
Paul Mundt38350e02008-02-13 20:14:10 +0900575 * For the address range [start,end), write back the data from the
576 * D-cache and invalidate the corresponding region of the I-cache for the
577 * current process. Used to flush signal trampolines on the stack to
578 * make them executable.
579 */
Paul Mundtf26b2a52009-08-21 17:23:14 +0900580static void sh5_flush_cache_sigtramp(void *vaddr)
Paul Mundt38350e02008-02-13 20:14:10 +0900581{
Paul Mundtf26b2a52009-08-21 17:23:14 +0900582 unsigned long end = (unsigned long)vaddr + L1_CACHE_BYTES;
Paul Mundt38350e02008-02-13 20:14:10 +0900583
Paul Mundtf26b2a52009-08-21 17:23:14 +0900584 __flush_wback_region(vaddr, L1_CACHE_BYTES);
Paul Mundt38350e02008-02-13 20:14:10 +0900585 wmb();
Paul Mundtf26b2a52009-08-21 17:23:14 +0900586 sh64_icache_inv_current_user_range((unsigned long)vaddr, end);
Paul Mundt38350e02008-02-13 20:14:10 +0900587}
588
Paul Mundt94ecd222009-08-16 01:50:17 +0900589void __init sh5_cache_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700590{
Paul Mundtf26b2a52009-08-21 17:23:14 +0900591 local_flush_cache_all = sh5_flush_cache_all;
592 local_flush_cache_mm = sh5_flush_cache_mm;
593 local_flush_cache_dup_mm = sh5_flush_cache_mm;
594 local_flush_cache_page = sh5_flush_cache_page;
595 local_flush_cache_range = sh5_flush_cache_range;
596 local_flush_dcache_page = sh5_flush_dcache_page;
597 local_flush_icache_range = sh5_flush_icache_range;
598 local_flush_cache_sigtramp = sh5_flush_cache_sigtramp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700599
Paul Mundt94ecd222009-08-16 01:50:17 +0900600 /* Reserve a slot for dcache colouring in the DTLB */
601 dtlb_cache_slot = sh64_get_wired_dtlb_entry();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700602
Paul Mundt94ecd222009-08-16 01:50:17 +0900603 sh4__flush_region_init();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700604}