blob: c0c1b21350d86d4001c4fed500e40a91363e9c02 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * arch/sh64/mm/cache.c
7 *
8 * Original version Copyright (C) 2000, 2001 Paolo Alberelli
9 * Second version Copyright (C) benedict.gaster@superh.com 2002
10 * Third version Copyright Richard.Curnow@superh.com 2003
11 * Hacks to third version Copyright (C) 2003 Paul Mundt
12 */
13
14/****************************************************************************/
15
16#include <linux/config.h>
17#include <linux/init.h>
18#include <linux/mman.h>
19#include <linux/mm.h>
20#include <linux/threads.h>
21#include <asm/page.h>
22#include <asm/pgtable.h>
23#include <asm/processor.h>
24#include <asm/cache.h>
25#include <asm/tlb.h>
26#include <asm/io.h>
27#include <asm/uaccess.h>
28#include <asm/mmu_context.h>
29#include <asm/pgalloc.h> /* for flush_itlb_range */
30
31#include <linux/proc_fs.h>
32
33/* This function is in entry.S */
34extern unsigned long switch_and_save_asid(unsigned long new_asid);
35
36/* Wired TLB entry for the D-cache */
37static unsigned long long dtlb_cache_slot;
38
39/**
40 * sh64_cache_init()
41 *
42 * This is pretty much just a straightforward clone of the SH
43 * detect_cpu_and_cache_system().
44 *
45 * This function is responsible for setting up all of the cache
46 * info dynamically as well as taking care of CPU probing and
47 * setting up the relevant subtype data.
48 *
49 * FIXME: For the time being, we only really support the SH5-101
50 * out of the box, and don't support dynamic probing for things
51 * like the SH5-103 or even cut2 of the SH5-101. Implement this
52 * later!
53 */
54int __init sh64_cache_init(void)
55{
56 /*
57 * First, setup some sane values for the I-cache.
58 */
59 cpu_data->icache.ways = 4;
60 cpu_data->icache.sets = 256;
61 cpu_data->icache.linesz = L1_CACHE_BYTES;
62
63 /*
64 * FIXME: This can probably be cleaned up a bit as well.. for example,
65 * do we really need the way shift _and_ the way_step_shift ?? Judging
66 * by the existing code, I would guess no.. is there any valid reason
67 * why we need to be tracking this around?
68 */
69 cpu_data->icache.way_shift = 13;
70 cpu_data->icache.entry_shift = 5;
71 cpu_data->icache.set_shift = 4;
72 cpu_data->icache.way_step_shift = 16;
73 cpu_data->icache.asid_shift = 2;
74
75 /*
76 * way offset = cache size / associativity, so just don't factor in
77 * associativity in the first place..
78 */
79 cpu_data->icache.way_ofs = cpu_data->icache.sets *
80 cpu_data->icache.linesz;
81
82 cpu_data->icache.asid_mask = 0x3fc;
83 cpu_data->icache.idx_mask = 0x1fe0;
84 cpu_data->icache.epn_mask = 0xffffe000;
85 cpu_data->icache.flags = 0;
86
87 /*
88 * Next, setup some sane values for the D-cache.
89 *
90 * On the SH5, these are pretty consistent with the I-cache settings,
91 * so we just copy over the existing definitions.. these can be fixed
92 * up later, especially if we add runtime CPU probing.
93 *
94 * Though in the meantime it saves us from having to duplicate all of
95 * the above definitions..
96 */
97 cpu_data->dcache = cpu_data->icache;
98
99 /*
100 * Setup any cache-related flags here
101 */
102#if defined(CONFIG_DCACHE_WRITE_THROUGH)
103 set_bit(SH_CACHE_MODE_WT, &(cpu_data->dcache.flags));
104#elif defined(CONFIG_DCACHE_WRITE_BACK)
105 set_bit(SH_CACHE_MODE_WB, &(cpu_data->dcache.flags));
106#endif
107
108 /*
109 * We also need to reserve a slot for the D-cache in the DTLB, so we
110 * do this now ..
111 */
112 dtlb_cache_slot = sh64_get_wired_dtlb_entry();
113
114 return 0;
115}
116
117#ifdef CONFIG_DCACHE_DISABLED
118#define sh64_dcache_purge_all() do { } while (0)
119#define sh64_dcache_purge_coloured_phy_page(paddr, eaddr) do { } while (0)
120#define sh64_dcache_purge_user_range(mm, start, end) do { } while (0)
121#define sh64_dcache_purge_phy_page(paddr) do { } while (0)
122#define sh64_dcache_purge_virt_page(mm, eaddr) do { } while (0)
123#define sh64_dcache_purge_kernel_range(start, end) do { } while (0)
124#define sh64_dcache_wback_current_user_range(start, end) do { } while (0)
125#endif
126
127/*##########################################################################*/
128
129/* From here onwards, a rewrite of the implementation,
130 by Richard.Curnow@superh.com.
131
132 The major changes in this compared to the old version are;
133 1. use more selective purging through OCBP instead of using ALLOCO to purge
134 by natural replacement. This avoids purging out unrelated cache lines
135 that happen to be in the same set.
136 2. exploit the APIs copy_user_page and clear_user_page better
137 3. be more selective about I-cache purging, in particular use invalidate_all
138 more sparingly.
139
140 */
141
142/*##########################################################################
143 SUPPORT FUNCTIONS
144 ##########################################################################*/
145
146/****************************************************************************/
147/* The following group of functions deal with mapping and unmapping a temporary
148 page into the DTLB slot that have been set aside for our exclusive use. */
149/* In order to accomplish this, we use the generic interface for adding and
150 removing a wired slot entry as defined in arch/sh64/mm/tlb.c */
151/****************************************************************************/
152
153static unsigned long slot_own_flags;
154
155static inline void sh64_setup_dtlb_cache_slot(unsigned long eaddr, unsigned long asid, unsigned long paddr)
156{
157 local_irq_save(slot_own_flags);
158 sh64_setup_tlb_slot(dtlb_cache_slot, eaddr, asid, paddr);
159}
160
161static inline void sh64_teardown_dtlb_cache_slot(void)
162{
163 sh64_teardown_tlb_slot(dtlb_cache_slot);
164 local_irq_restore(slot_own_flags);
165}
166
167/****************************************************************************/
168
169#ifndef CONFIG_ICACHE_DISABLED
170
171static void __inline__ sh64_icache_inv_all(void)
172{
173 unsigned long long addr, flag, data;
174 unsigned int flags;
175
176 addr=ICCR0;
177 flag=ICCR0_ICI;
178 data=0;
179
180 /* Make this a critical section for safety (probably not strictly necessary.) */
181 local_irq_save(flags);
182
183 /* Without %1 it gets unexplicably wrong */
184 asm volatile("getcfg %3, 0, %0\n\t"
185 "or %0, %2, %0\n\t"
186 "putcfg %3, 0, %0\n\t"
187 "synci"
188 : "=&r" (data)
189 : "0" (data), "r" (flag), "r" (addr));
190
191 local_irq_restore(flags);
192}
193
194static void sh64_icache_inv_kernel_range(unsigned long start, unsigned long end)
195{
196 /* Invalidate range of addresses [start,end] from the I-cache, where
197 * the addresses lie in the kernel superpage. */
198
199 unsigned long long ullend, addr, aligned_start;
200#if (NEFF == 32)
201 aligned_start = (unsigned long long)(signed long long)(signed long) start;
202#else
203#error "NEFF != 32"
204#endif
205 aligned_start &= L1_CACHE_ALIGN_MASK;
206 addr = aligned_start;
207#if (NEFF == 32)
208 ullend = (unsigned long long) (signed long long) (signed long) end;
209#else
210#error "NEFF != 32"
211#endif
212 while (addr <= ullend) {
213 asm __volatile__ ("icbi %0, 0" : : "r" (addr));
214 addr += L1_CACHE_BYTES;
215 }
216}
217
218static void sh64_icache_inv_user_page(struct vm_area_struct *vma, unsigned long eaddr)
219{
220 /* If we get called, we know that vma->vm_flags contains VM_EXEC.
221 Also, eaddr is page-aligned. */
222
223 unsigned long long addr, end_addr;
224 unsigned long flags = 0;
225 unsigned long running_asid, vma_asid;
226 addr = eaddr;
227 end_addr = addr + PAGE_SIZE;
228
229 /* Check whether we can use the current ASID for the I-cache
230 invalidation. For example, if we're called via
231 access_process_vm->flush_cache_page->here, (e.g. when reading from
232 /proc), 'running_asid' will be that of the reader, not of the
233 victim.
234
235 Also, note the risk that we might get pre-empted between the ASID
236 compare and blocking IRQs, and before we regain control, the
237 pid->ASID mapping changes. However, the whole cache will get
238 invalidated when the mapping is renewed, so the worst that can
239 happen is that the loop below ends up invalidating somebody else's
240 cache entries.
241 */
242
243 running_asid = get_asid();
244 vma_asid = (vma->vm_mm->context & MMU_CONTEXT_ASID_MASK);
245 if (running_asid != vma_asid) {
246 local_irq_save(flags);
247 switch_and_save_asid(vma_asid);
248 }
249 while (addr < end_addr) {
250 /* Worth unrolling a little */
251 asm __volatile__("icbi %0, 0" : : "r" (addr));
252 asm __volatile__("icbi %0, 32" : : "r" (addr));
253 asm __volatile__("icbi %0, 64" : : "r" (addr));
254 asm __volatile__("icbi %0, 96" : : "r" (addr));
255 addr += 128;
256 }
257 if (running_asid != vma_asid) {
258 switch_and_save_asid(running_asid);
259 local_irq_restore(flags);
260 }
261}
262
263/****************************************************************************/
264
265static void sh64_icache_inv_user_page_range(struct mm_struct *mm,
266 unsigned long start, unsigned long end)
267{
268 /* Used for invalidating big chunks of I-cache, i.e. assume the range
269 is whole pages. If 'start' or 'end' is not page aligned, the code
270 is conservative and invalidates to the ends of the enclosing pages.
271 This is functionally OK, just a performance loss. */
272
273 /* See the comments below in sh64_dcache_purge_user_range() regarding
274 the choice of algorithm. However, for the I-cache option (2) isn't
275 available because there are no physical tags so aliases can't be
276 resolved. The icbi instruction has to be used through the user
277 mapping. Because icbi is cheaper than ocbp on a cache hit, it
278 would be cheaper to use the selective code for a large range than is
279 possible with the D-cache. Just assume 64 for now as a working
280 figure.
281 */
282
283 int n_pages;
284
285 if (!mm) return;
286
287 n_pages = ((end - start) >> PAGE_SHIFT);
288 if (n_pages >= 64) {
289 sh64_icache_inv_all();
290 } else {
291 unsigned long aligned_start;
292 unsigned long eaddr;
293 unsigned long after_last_page_start;
294 unsigned long mm_asid, current_asid;
295 unsigned long long flags = 0ULL;
296
297 mm_asid = mm->context & MMU_CONTEXT_ASID_MASK;
298 current_asid = get_asid();
299
300 if (mm_asid != current_asid) {
301 /* Switch ASID and run the invalidate loop under cli */
302 local_irq_save(flags);
303 switch_and_save_asid(mm_asid);
304 }
305
306 aligned_start = start & PAGE_MASK;
307 after_last_page_start = PAGE_SIZE + ((end - 1) & PAGE_MASK);
308
309 while (aligned_start < after_last_page_start) {
310 struct vm_area_struct *vma;
311 unsigned long vma_end;
312 vma = find_vma(mm, aligned_start);
313 if (!vma || (aligned_start <= vma->vm_end)) {
314 /* Avoid getting stuck in an error condition */
315 aligned_start += PAGE_SIZE;
316 continue;
317 }
318 vma_end = vma->vm_end;
319 if (vma->vm_flags & VM_EXEC) {
320 /* Executable */
321 eaddr = aligned_start;
322 while (eaddr < vma_end) {
323 sh64_icache_inv_user_page(vma, eaddr);
324 eaddr += PAGE_SIZE;
325 }
326 }
327 aligned_start = vma->vm_end; /* Skip to start of next region */
328 }
329 if (mm_asid != current_asid) {
330 switch_and_save_asid(current_asid);
331 local_irq_restore(flags);
332 }
333 }
334}
335
336static void sh64_icache_inv_user_small_range(struct mm_struct *mm,
337 unsigned long start, int len)
338{
339
340 /* Invalidate a small range of user context I-cache, not necessarily
341 page (or even cache-line) aligned. */
342
343 unsigned long long eaddr = start;
344 unsigned long long eaddr_end = start + len;
345 unsigned long current_asid, mm_asid;
346 unsigned long long flags;
347 unsigned long long epage_start;
348
349 /* Since this is used inside ptrace, the ASID in the mm context
350 typically won't match current_asid. We'll have to switch ASID to do
351 this. For safety, and given that the range will be small, do all
352 this under cli.
353
354 Note, there is a hazard that the ASID in mm->context is no longer
355 actually associated with mm, i.e. if the mm->context has started a
356 new cycle since mm was last active. However, this is just a
357 performance issue: all that happens is that we invalidate lines
358 belonging to another mm, so the owning process has to refill them
359 when that mm goes live again. mm itself can't have any cache
360 entries because there will have been a flush_cache_all when the new
361 mm->context cycle started. */
362
363 /* Align to start of cache line. Otherwise, suppose len==8 and start
364 was at 32N+28 : the last 4 bytes wouldn't get invalidated. */
365 eaddr = start & L1_CACHE_ALIGN_MASK;
366 eaddr_end = start + len;
367
368 local_irq_save(flags);
369 mm_asid = mm->context & MMU_CONTEXT_ASID_MASK;
370 current_asid = switch_and_save_asid(mm_asid);
371
372 epage_start = eaddr & PAGE_MASK;
373
374 while (eaddr < eaddr_end)
375 {
376 asm __volatile__("icbi %0, 0" : : "r" (eaddr));
377 eaddr += L1_CACHE_BYTES;
378 }
379 switch_and_save_asid(current_asid);
380 local_irq_restore(flags);
381}
382
383static void sh64_icache_inv_current_user_range(unsigned long start, unsigned long end)
384{
385 /* The icbi instruction never raises ITLBMISS. i.e. if there's not a
386 cache hit on the virtual tag the instruction ends there, without a
387 TLB lookup. */
388
389 unsigned long long aligned_start;
390 unsigned long long ull_end;
391 unsigned long long addr;
392
393 ull_end = end;
394
395 /* Just invalidate over the range using the natural addresses. TLB
396 miss handling will be OK (TBC). Since it's for the current process,
397 either we're already in the right ASID context, or the ASIDs have
398 been recycled since we were last active in which case we might just
399 invalidate another processes I-cache entries : no worries, just a
400 performance drop for him. */
401 aligned_start = start & L1_CACHE_ALIGN_MASK;
402 addr = aligned_start;
403 while (addr < ull_end) {
404 asm __volatile__ ("icbi %0, 0" : : "r" (addr));
405 asm __volatile__ ("nop");
406 asm __volatile__ ("nop");
407 addr += L1_CACHE_BYTES;
408 }
409}
410
411#endif /* !CONFIG_ICACHE_DISABLED */
412
413/****************************************************************************/
414
415#ifndef CONFIG_DCACHE_DISABLED
416
417/* Buffer used as the target of alloco instructions to purge data from cache
418 sets by natural eviction. -- RPC */
419#define DUMMY_ALLOCO_AREA_SIZE L1_CACHE_SIZE_BYTES + (1024 * 4)
420static unsigned char dummy_alloco_area[DUMMY_ALLOCO_AREA_SIZE] __cacheline_aligned = { 0, };
421
422/****************************************************************************/
423
424static void __inline__ sh64_dcache_purge_sets(int sets_to_purge_base, int n_sets)
425{
426 /* Purge all ways in a particular block of sets, specified by the base
427 set number and number of sets. Can handle wrap-around, if that's
428 needed. */
429
430 int dummy_buffer_base_set;
431 unsigned long long eaddr, eaddr0, eaddr1;
432 int j;
433 int set_offset;
434
435 dummy_buffer_base_set = ((int)&dummy_alloco_area & cpu_data->dcache.idx_mask) >> cpu_data->dcache.entry_shift;
436 set_offset = sets_to_purge_base - dummy_buffer_base_set;
437
438 for (j=0; j<n_sets; j++, set_offset++) {
439 set_offset &= (cpu_data->dcache.sets - 1);
440 eaddr0 = (unsigned long long)dummy_alloco_area + (set_offset << cpu_data->dcache.entry_shift);
441
442 /* Do one alloco which hits the required set per cache way. For
443 write-back mode, this will purge the #ways resident lines. There's
444 little point unrolling this loop because the allocos stall more if
445 they're too close together. */
446 eaddr1 = eaddr0 + cpu_data->dcache.way_ofs * cpu_data->dcache.ways;
447 for (eaddr=eaddr0; eaddr<eaddr1; eaddr+=cpu_data->dcache.way_ofs) {
448 asm __volatile__ ("alloco %0, 0" : : "r" (eaddr));
449 asm __volatile__ ("synco"); /* TAKum03020 */
450 }
451
452 eaddr1 = eaddr0 + cpu_data->dcache.way_ofs * cpu_data->dcache.ways;
453 for (eaddr=eaddr0; eaddr<eaddr1; eaddr+=cpu_data->dcache.way_ofs) {
454 /* Load from each address. Required because alloco is a NOP if
455 the cache is write-through. Write-through is a config option. */
456 if (test_bit(SH_CACHE_MODE_WT, &(cpu_data->dcache.flags)))
457 *(volatile unsigned char *)(int)eaddr;
458 }
459 }
460
461 /* Don't use OCBI to invalidate the lines. That costs cycles directly.
462 If the dummy block is just left resident, it will naturally get
463 evicted as required. */
464
465 return;
466}
467
468/****************************************************************************/
469
470static void sh64_dcache_purge_all(void)
471{
472 /* Purge the entire contents of the dcache. The most efficient way to
473 achieve this is to use alloco instructions on a region of unused
474 memory equal in size to the cache, thereby causing the current
475 contents to be discarded by natural eviction. The alternative,
476 namely reading every tag, setting up a mapping for the corresponding
477 page and doing an OCBP for the line, would be much more expensive.
478 */
479
480 sh64_dcache_purge_sets(0, cpu_data->dcache.sets);
481
482 return;
483
484}
485
486/****************************************************************************/
487
488static void sh64_dcache_purge_kernel_range(unsigned long start, unsigned long end)
489{
490 /* Purge the range of addresses [start,end] from the D-cache. The
491 addresses lie in the superpage mapping. There's no harm if we
492 overpurge at either end - just a small performance loss. */
493 unsigned long long ullend, addr, aligned_start;
494#if (NEFF == 32)
495 aligned_start = (unsigned long long)(signed long long)(signed long) start;
496#else
497#error "NEFF != 32"
498#endif
499 aligned_start &= L1_CACHE_ALIGN_MASK;
500 addr = aligned_start;
501#if (NEFF == 32)
502 ullend = (unsigned long long) (signed long long) (signed long) end;
503#else
504#error "NEFF != 32"
505#endif
506 while (addr <= ullend) {
507 asm __volatile__ ("ocbp %0, 0" : : "r" (addr));
508 addr += L1_CACHE_BYTES;
509 }
510 return;
511}
512
513/* Assumes this address (+ (2**n_synbits) pages up from it) aren't used for
514 anything else in the kernel */
515#define MAGIC_PAGE0_START 0xffffffffec000000ULL
516
517static void sh64_dcache_purge_coloured_phy_page(unsigned long paddr, unsigned long eaddr)
518{
519 /* Purge the physical page 'paddr' from the cache. It's known that any
520 cache lines requiring attention have the same page colour as the the
521 address 'eaddr'.
522
523 This relies on the fact that the D-cache matches on physical tags
524 when no virtual tag matches. So we create an alias for the original
525 page and purge through that. (Alternatively, we could have done
526 this by switching ASID to match the original mapping and purged
527 through that, but that involves ASID switching cost + probably a
528 TLBMISS + refill anyway.)
529 */
530
531 unsigned long long magic_page_start;
532 unsigned long long magic_eaddr, magic_eaddr_end;
533
534 magic_page_start = MAGIC_PAGE0_START + (eaddr & CACHE_OC_SYN_MASK);
535
536 /* As long as the kernel is not pre-emptible, this doesn't need to be
537 under cli/sti. */
538
539 sh64_setup_dtlb_cache_slot(magic_page_start, get_asid(), paddr);
540
541 magic_eaddr = magic_page_start;
542 magic_eaddr_end = magic_eaddr + PAGE_SIZE;
543 while (magic_eaddr < magic_eaddr_end) {
544 /* Little point in unrolling this loop - the OCBPs are blocking
545 and won't go any quicker (i.e. the loop overhead is parallel
546 to part of the OCBP execution.) */
547 asm __volatile__ ("ocbp %0, 0" : : "r" (magic_eaddr));
548 magic_eaddr += L1_CACHE_BYTES;
549 }
550
551 sh64_teardown_dtlb_cache_slot();
552}
553
554/****************************************************************************/
555
556static void sh64_dcache_purge_phy_page(unsigned long paddr)
557{
558 /* Pure a page given its physical start address, by creating a
559 temporary 1 page mapping and purging across that. Even if we know
560 the virtual address (& vma or mm) of the page, the method here is
561 more elegant because it avoids issues of coping with page faults on
562 the purge instructions (i.e. no special-case code required in the
563 critical path in the TLB miss handling). */
564
565 unsigned long long eaddr_start, eaddr, eaddr_end;
566 int i;
567
568 /* As long as the kernel is not pre-emptible, this doesn't need to be
569 under cli/sti. */
570
571 eaddr_start = MAGIC_PAGE0_START;
572 for (i=0; i < (1 << CACHE_OC_N_SYNBITS); i++) {
573 sh64_setup_dtlb_cache_slot(eaddr_start, get_asid(), paddr);
574
575 eaddr = eaddr_start;
576 eaddr_end = eaddr + PAGE_SIZE;
577 while (eaddr < eaddr_end) {
578 asm __volatile__ ("ocbp %0, 0" : : "r" (eaddr));
579 eaddr += L1_CACHE_BYTES;
580 }
581
582 sh64_teardown_dtlb_cache_slot();
583 eaddr_start += PAGE_SIZE;
584 }
585}
586
Hugh Dickins60ec5582005-10-29 18:16:34 -0700587static void sh64_dcache_purge_user_pages(struct mm_struct *mm,
588 unsigned long addr, unsigned long end)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700589{
590 pgd_t *pgd;
591 pmd_t *pmd;
592 pte_t *pte;
593 pte_t entry;
Hugh Dickins60ec5582005-10-29 18:16:34 -0700594 spinlock_t *ptl;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700595 unsigned long paddr;
596
Hugh Dickins60ec5582005-10-29 18:16:34 -0700597 if (!mm)
598 return; /* No way to find physical address of page */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700599
Hugh Dickins60ec5582005-10-29 18:16:34 -0700600 pgd = pgd_offset(mm, addr);
601 if (pgd_bad(*pgd))
602 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700603
Hugh Dickins60ec5582005-10-29 18:16:34 -0700604 pmd = pmd_offset(pgd, addr);
605 if (pmd_none(*pmd) || pmd_bad(*pmd))
606 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700607
Hugh Dickins60ec5582005-10-29 18:16:34 -0700608 pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
609 do {
610 entry = *pte;
611 if (pte_none(entry) || !pte_present(entry))
612 continue;
613 paddr = pte_val(entry) & PAGE_MASK;
614 sh64_dcache_purge_coloured_phy_page(paddr, addr);
615 } while (pte++, addr += PAGE_SIZE, addr != end);
616 pte_unmap_unlock(pte - 1, ptl);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700617}
618/****************************************************************************/
619
620static void sh64_dcache_purge_user_range(struct mm_struct *mm,
621 unsigned long start, unsigned long end)
622{
623 /* There are at least 5 choices for the implementation of this, with
624 pros (+), cons(-), comments(*):
625
626 1. ocbp each line in the range through the original user's ASID
627 + no lines spuriously evicted
628 - tlbmiss handling (must either handle faults on demand => extra
629 special-case code in tlbmiss critical path), or map the page in
630 advance (=> flush_tlb_range in advance to avoid multiple hits)
631 - ASID switching
632 - expensive for large ranges
633
634 2. temporarily map each page in the range to a special effective
635 address and ocbp through the temporary mapping; relies on the
636 fact that SH-5 OCB* always do TLB lookup and match on ptags (they
637 never look at the etags)
638 + no spurious evictions
639 - expensive for large ranges
640 * surely cheaper than (1)
641
642 3. walk all the lines in the cache, check the tags, if a match
643 occurs create a page mapping to ocbp the line through
644 + no spurious evictions
645 - tag inspection overhead
646 - (especially for small ranges)
647 - potential cost of setting up/tearing down page mapping for
648 every line that matches the range
649 * cost partly independent of range size
650
651 4. walk all the lines in the cache, check the tags, if a match
652 occurs use 4 * alloco to purge the line (+3 other probably
653 innocent victims) by natural eviction
654 + no tlb mapping overheads
655 - spurious evictions
656 - tag inspection overhead
657
658 5. implement like flush_cache_all
659 + no tag inspection overhead
660 - spurious evictions
661 - bad for small ranges
662
663 (1) can be ruled out as more expensive than (2). (2) appears best
664 for small ranges. The choice between (3), (4) and (5) for large
665 ranges and the range size for the large/small boundary need
666 benchmarking to determine.
667
668 For now use approach (2) for small ranges and (5) for large ones.
669
670 */
671
672 int n_pages;
673
674 n_pages = ((end - start) >> PAGE_SHIFT);
Hugh Dickins60ec5582005-10-29 18:16:34 -0700675 if (n_pages >= 64 || ((start ^ (end - 1)) & PMD_MASK)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700676#if 1
677 sh64_dcache_purge_all();
678#else
679 unsigned long long set, way;
680 unsigned long mm_asid = mm->context & MMU_CONTEXT_ASID_MASK;
681 for (set = 0; set < cpu_data->dcache.sets; set++) {
682 unsigned long long set_base_config_addr = CACHE_OC_ADDRESS_ARRAY + (set << cpu_data->dcache.set_shift);
683 for (way = 0; way < cpu_data->dcache.ways; way++) {
684 unsigned long long config_addr = set_base_config_addr + (way << cpu_data->dcache.way_step_shift);
685 unsigned long long tag0;
686 unsigned long line_valid;
687
688 asm __volatile__("getcfg %1, 0, %0" : "=r" (tag0) : "r" (config_addr));
689 line_valid = tag0 & SH_CACHE_VALID;
690 if (line_valid) {
691 unsigned long cache_asid;
692 unsigned long epn;
693
694 cache_asid = (tag0 & cpu_data->dcache.asid_mask) >> cpu_data->dcache.asid_shift;
695 /* The next line needs some
696 explanation. The virtual tags
697 encode bits [31:13] of the virtual
698 address, bit [12] of the 'tag' being
699 implied by the cache set index. */
700 epn = (tag0 & cpu_data->dcache.epn_mask) | ((set & 0x80) << cpu_data->dcache.entry_shift);
701
702 if ((cache_asid == mm_asid) && (start <= epn) && (epn < end)) {
703 /* TODO : could optimise this
704 call by batching multiple
705 adjacent sets together. */
706 sh64_dcache_purge_sets(set, 1);
707 break; /* Don't waste time inspecting other ways for this set */
708 }
709 }
710 }
711 }
712#endif
713 } else {
Hugh Dickins60ec5582005-10-29 18:16:34 -0700714 /* Small range, covered by a single page table page */
715 start &= PAGE_MASK; /* should already be so */
716 end = PAGE_ALIGN(end); /* should already be so */
717 sh64_dcache_purge_user_pages(mm, start, end);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700718 }
719 return;
720}
721
722static void sh64_dcache_wback_current_user_range(unsigned long start, unsigned long end)
723{
724 unsigned long long aligned_start;
725 unsigned long long ull_end;
726 unsigned long long addr;
727
728 ull_end = end;
729
730 /* Just wback over the range using the natural addresses. TLB miss
731 handling will be OK (TBC) : the range has just been written to by
732 the signal frame setup code, so the PTEs must exist.
733
734 Note, if we have CONFIG_PREEMPT and get preempted inside this loop,
735 it doesn't matter, even if the pid->ASID mapping changes whilst
736 we're away. In that case the cache will have been flushed when the
737 mapping was renewed. So the writebacks below will be nugatory (and
738 we'll doubtless have to fault the TLB entry/ies in again with the
739 new ASID), but it's a rare case.
740 */
741 aligned_start = start & L1_CACHE_ALIGN_MASK;
742 addr = aligned_start;
743 while (addr < ull_end) {
744 asm __volatile__ ("ocbwb %0, 0" : : "r" (addr));
745 addr += L1_CACHE_BYTES;
746 }
747}
748
749/****************************************************************************/
750
751/* These *MUST* lie in an area of virtual address space that's otherwise unused. */
752#define UNIQUE_EADDR_START 0xe0000000UL
753#define UNIQUE_EADDR_END 0xe8000000UL
754
755static unsigned long sh64_make_unique_eaddr(unsigned long user_eaddr, unsigned long paddr)
756{
757 /* Given a physical address paddr, and a user virtual address
758 user_eaddr which will eventually be mapped to it, create a one-off
759 kernel-private eaddr mapped to the same paddr. This is used for
760 creating special destination pages for copy_user_page and
761 clear_user_page */
762
763 static unsigned long current_pointer = UNIQUE_EADDR_START;
764 unsigned long coloured_pointer;
765
766 if (current_pointer == UNIQUE_EADDR_END) {
767 sh64_dcache_purge_all();
768 current_pointer = UNIQUE_EADDR_START;
769 }
770
771 coloured_pointer = (current_pointer & ~CACHE_OC_SYN_MASK) | (user_eaddr & CACHE_OC_SYN_MASK);
772 sh64_setup_dtlb_cache_slot(coloured_pointer, get_asid(), paddr);
773
774 current_pointer += (PAGE_SIZE << CACHE_OC_N_SYNBITS);
775
776 return coloured_pointer;
777}
778
779/****************************************************************************/
780
781static void sh64_copy_user_page_coloured(void *to, void *from, unsigned long address)
782{
783 void *coloured_to;
784
785 /* Discard any existing cache entries of the wrong colour. These are
786 present quite often, if the kernel has recently used the page
787 internally, then given it up, then it's been allocated to the user.
788 */
789 sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long) to);
790
791 coloured_to = (void *) sh64_make_unique_eaddr(address, __pa(to));
792 sh64_page_copy(from, coloured_to);
793
794 sh64_teardown_dtlb_cache_slot();
795}
796
797static void sh64_clear_user_page_coloured(void *to, unsigned long address)
798{
799 void *coloured_to;
800
801 /* Discard any existing kernel-originated lines of the wrong colour (as
802 above) */
803 sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long) to);
804
805 coloured_to = (void *) sh64_make_unique_eaddr(address, __pa(to));
806 sh64_page_clear(coloured_to);
807
808 sh64_teardown_dtlb_cache_slot();
809}
810
811#endif /* !CONFIG_DCACHE_DISABLED */
812
813/****************************************************************************/
814
815/*##########################################################################
816 EXTERNALLY CALLABLE API.
817 ##########################################################################*/
818
819/* These functions are described in Documentation/cachetlb.txt.
820 Each one of these functions varies in behaviour depending on whether the
821 I-cache and/or D-cache are configured out.
822
823 Note that the Linux term 'flush' corresponds to what is termed 'purge' in
824 the sh/sh64 jargon for the D-cache, i.e. write back dirty data then
825 invalidate the cache lines, and 'invalidate' for the I-cache.
826 */
827
828#undef FLUSH_TRACE
829
830void flush_cache_all(void)
831{
832 /* Invalidate the entire contents of both caches, after writing back to
833 memory any dirty data from the D-cache. */
834 sh64_dcache_purge_all();
835 sh64_icache_inv_all();
836}
837
838/****************************************************************************/
839
840void flush_cache_mm(struct mm_struct *mm)
841{
842 /* Invalidate an entire user-address space from both caches, after
843 writing back dirty data (e.g. for shared mmap etc). */
844
845 /* This could be coded selectively by inspecting all the tags then
846 doing 4*alloco on any set containing a match (as for
847 flush_cache_range), but fork/exit/execve (where this is called from)
848 are expensive anyway. */
849
850 /* Have to do a purge here, despite the comments re I-cache below.
851 There could be odd-coloured dirty data associated with the mm still
852 in the cache - if this gets written out through natural eviction
853 after the kernel has reused the page there will be chaos.
854 */
855
856 sh64_dcache_purge_all();
857
858 /* The mm being torn down won't ever be active again, so any Icache
859 lines tagged with its ASID won't be visible for the rest of the
860 lifetime of this ASID cycle. Before the ASID gets reused, there
861 will be a flush_cache_all. Hence we don't need to touch the
862 I-cache. This is similar to the lack of action needed in
863 flush_tlb_mm - see fault.c. */
864}
865
866/****************************************************************************/
867
868void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
869 unsigned long end)
870{
871 struct mm_struct *mm = vma->vm_mm;
872
873 /* Invalidate (from both caches) the range [start,end) of virtual
874 addresses from the user address space specified by mm, after writing
875 back any dirty data.
876
Hugh Dickins60ec5582005-10-29 18:16:34 -0700877 Note, 'end' is 1 byte beyond the end of the range to flush. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700878
879 sh64_dcache_purge_user_range(mm, start, end);
880 sh64_icache_inv_user_page_range(mm, start, end);
881}
882
883/****************************************************************************/
884
885void flush_cache_page(struct vm_area_struct *vma, unsigned long eaddr, unsigned long pfn)
886{
887 /* Invalidate any entries in either cache for the vma within the user
888 address space vma->vm_mm for the page starting at virtual address
889 'eaddr'. This seems to be used primarily in breaking COW. Note,
890 the I-cache must be searched too in case the page in question is
891 both writable and being executed from (e.g. stack trampolines.)
892
Hugh Dickins60ec5582005-10-29 18:16:34 -0700893 Note, this is called with pte lock held.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700894 */
895
896 sh64_dcache_purge_phy_page(pfn << PAGE_SHIFT);
897
898 if (vma->vm_flags & VM_EXEC) {
899 sh64_icache_inv_user_page(vma, eaddr);
900 }
901}
902
903/****************************************************************************/
904
905#ifndef CONFIG_DCACHE_DISABLED
906
907void copy_user_page(void *to, void *from, unsigned long address, struct page *page)
908{
909 /* 'from' and 'to' are kernel virtual addresses (within the superpage
910 mapping of the physical RAM). 'address' is the user virtual address
911 where the copy 'to' will be mapped after. This allows a custom
912 mapping to be used to ensure that the new copy is placed in the
913 right cache sets for the user to see it without having to bounce it
914 out via memory. Note however : the call to flush_page_to_ram in
915 (generic)/mm/memory.c:(break_cow) undoes all this good work in that one
916 very important case!
917
918 TBD : can we guarantee that on every call, any cache entries for
919 'from' are in the same colour sets as 'address' also? i.e. is this
920 always used just to deal with COW? (I suspect not). */
921
922 /* There are two possibilities here for when the page 'from' was last accessed:
923 * by the kernel : this is OK, no purge required.
924 * by the/a user (e.g. for break_COW) : need to purge.
925
926 If the potential user mapping at 'address' is the same colour as
927 'from' there is no need to purge any cache lines from the 'from'
928 page mapped into cache sets of colour 'address'. (The copy will be
929 accessing the page through 'from').
930 */
931
932 if (((address ^ (unsigned long) from) & CACHE_OC_SYN_MASK) != 0) {
933 sh64_dcache_purge_coloured_phy_page(__pa(from), address);
934 }
935
936 if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0) {
937 /* No synonym problem on destination */
938 sh64_page_copy(from, to);
939 } else {
940 sh64_copy_user_page_coloured(to, from, address);
941 }
942
943 /* Note, don't need to flush 'from' page from the cache again - it's
944 done anyway by the generic code */
945}
946
947void clear_user_page(void *to, unsigned long address, struct page *page)
948{
949 /* 'to' is a kernel virtual address (within the superpage
950 mapping of the physical RAM). 'address' is the user virtual address
951 where the 'to' page will be mapped after. This allows a custom
952 mapping to be used to ensure that the new copy is placed in the
953 right cache sets for the user to see it without having to bounce it
954 out via memory.
955 */
956
957 if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0) {
958 /* No synonym problem on destination */
959 sh64_page_clear(to);
960 } else {
961 sh64_clear_user_page_coloured(to, address);
962 }
963}
964
965#endif /* !CONFIG_DCACHE_DISABLED */
966
967/****************************************************************************/
968
969void flush_dcache_page(struct page *page)
970{
971 sh64_dcache_purge_phy_page(page_to_phys(page));
972 wmb();
973}
974
975/****************************************************************************/
976
977void flush_icache_range(unsigned long start, unsigned long end)
978{
979 /* Flush the range [start,end] of kernel virtual adddress space from
980 the I-cache. The corresponding range must be purged from the
981 D-cache also because the SH-5 doesn't have cache snooping between
982 the caches. The addresses will be visible through the superpage
983 mapping, therefore it's guaranteed that there no cache entries for
984 the range in cache sets of the wrong colour.
985
986 Primarily used for cohering the I-cache after a module has
987 been loaded. */
988
989 /* We also make sure to purge the same range from the D-cache since
990 flush_page_to_ram() won't be doing this for us! */
991
992 sh64_dcache_purge_kernel_range(start, end);
993 wmb();
994 sh64_icache_inv_kernel_range(start, end);
995}
996
997/****************************************************************************/
998
999void flush_icache_user_range(struct vm_area_struct *vma,
1000 struct page *page, unsigned long addr, int len)
1001{
1002 /* Flush the range of user (defined by vma->vm_mm) address space
1003 starting at 'addr' for 'len' bytes from the cache. The range does
1004 not straddle a page boundary, the unique physical page containing
1005 the range is 'page'. This seems to be used mainly for invalidating
1006 an address range following a poke into the program text through the
1007 ptrace() call from another process (e.g. for BRK instruction
1008 insertion). */
1009
1010 sh64_dcache_purge_coloured_phy_page(page_to_phys(page), addr);
1011 mb();
1012
1013 if (vma->vm_flags & VM_EXEC) {
1014 sh64_icache_inv_user_small_range(vma->vm_mm, addr, len);
1015 }
1016}
1017
1018/*##########################################################################
1019 ARCH/SH64 PRIVATE CALLABLE API.
1020 ##########################################################################*/
1021
1022void flush_cache_sigtramp(unsigned long start, unsigned long end)
1023{
1024 /* For the address range [start,end), write back the data from the
1025 D-cache and invalidate the corresponding region of the I-cache for
1026 the current process. Used to flush signal trampolines on the stack
1027 to make them executable. */
1028
1029 sh64_dcache_wback_current_user_range(start, end);
1030 wmb();
1031 sh64_icache_inv_current_user_range(start, end);
1032}
1033