blob: a8f5142dc2cf9d6c979546953915e950e6ca6219 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
Paul Mundta23ba432007-11-28 20:19:38 +09002 * arch/sh/mm/cache-sh5.c
Linus Torvalds1da177e2005-04-16 15:20:36 -07003 *
Paul Mundt38350e02008-02-13 20:14:10 +09004 * Copyright (C) 2000, 2001 Paolo Alberelli
5 * Copyright (C) 2002 Benedict Gaster
6 * Copyright (C) 2003 Richard Curnow
7 * Copyright (C) 2003 - 2008 Paul Mundt
Paul Mundta23ba432007-11-28 20:19:38 +09008 *
9 * This file is subject to the terms and conditions of the GNU General Public
10 * License. See the file "COPYING" in the main directory of this archive
11 * for more details.
Linus Torvalds1da177e2005-04-16 15:20:36 -070012 */
Linus Torvalds1da177e2005-04-16 15:20:36 -070013#include <linux/init.h>
14#include <linux/mman.h>
15#include <linux/mm.h>
Paul Mundt38350e02008-02-13 20:14:10 +090016#include <asm/tlb.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070017#include <asm/processor.h>
18#include <asm/cache.h>
Paul Mundt38350e02008-02-13 20:14:10 +090019#include <asm/pgalloc.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070020#include <asm/uaccess.h>
21#include <asm/mmu_context.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070022
Paul Mundt37443ef2009-08-15 12:29:49 +090023extern void __weak sh4__flush_region_init(void);
24
Linus Torvalds1da177e2005-04-16 15:20:36 -070025/* Wired TLB entry for the D-cache */
26static unsigned long long dtlb_cache_slot;
27
Paul Mundtecba1062009-08-15 11:05:42 +090028void __init cpu_cache_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -070029{
Paul Mundt38350e02008-02-13 20:14:10 +090030 /* Reserve a slot for dcache colouring in the DTLB */
31 dtlb_cache_slot = sh64_get_wired_dtlb_entry();
Paul Mundt37443ef2009-08-15 12:29:49 +090032
33 sh4__flush_region_init();
Linus Torvalds1da177e2005-04-16 15:20:36 -070034}
35
Paul Mundt27397422009-08-15 09:19:19 +090036void __init kmap_coherent_init(void)
37{
38 /* XXX ... */
39}
40
41void *kmap_coherent(struct page *page, unsigned long addr)
42{
43 /* XXX ... */
44 return NULL;
45}
46
47void kunmap_coherent(void)
48{
49}
50
Linus Torvalds1da177e2005-04-16 15:20:36 -070051#ifdef CONFIG_DCACHE_DISABLED
52#define sh64_dcache_purge_all() do { } while (0)
53#define sh64_dcache_purge_coloured_phy_page(paddr, eaddr) do { } while (0)
54#define sh64_dcache_purge_user_range(mm, start, end) do { } while (0)
55#define sh64_dcache_purge_phy_page(paddr) do { } while (0)
56#define sh64_dcache_purge_virt_page(mm, eaddr) do { } while (0)
Linus Torvalds1da177e2005-04-16 15:20:36 -070057#endif
58
Paul Mundt38350e02008-02-13 20:14:10 +090059/*
60 * The following group of functions deal with mapping and unmapping a
61 * temporary page into a DTLB slot that has been set aside for exclusive
62 * use.
63 */
64static inline void
65sh64_setup_dtlb_cache_slot(unsigned long eaddr, unsigned long asid,
66 unsigned long paddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -070067{
Paul Mundt38350e02008-02-13 20:14:10 +090068 local_irq_disable();
Linus Torvalds1da177e2005-04-16 15:20:36 -070069 sh64_setup_tlb_slot(dtlb_cache_slot, eaddr, asid, paddr);
70}
71
72static inline void sh64_teardown_dtlb_cache_slot(void)
73{
74 sh64_teardown_tlb_slot(dtlb_cache_slot);
Paul Mundt38350e02008-02-13 20:14:10 +090075 local_irq_enable();
Linus Torvalds1da177e2005-04-16 15:20:36 -070076}
77
Linus Torvalds1da177e2005-04-16 15:20:36 -070078#ifndef CONFIG_ICACHE_DISABLED
Paul Mundt38350e02008-02-13 20:14:10 +090079static inline void sh64_icache_inv_all(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -070080{
81 unsigned long long addr, flag, data;
Paul Mundt2fedaac2009-05-09 14:38:49 +090082 unsigned long flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -070083
Paul Mundt38350e02008-02-13 20:14:10 +090084 addr = ICCR0;
85 flag = ICCR0_ICI;
86 data = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -070087
88 /* Make this a critical section for safety (probably not strictly necessary.) */
89 local_irq_save(flags);
90
91 /* Without %1 it gets unexplicably wrong */
Paul Mundt38350e02008-02-13 20:14:10 +090092 __asm__ __volatile__ (
93 "getcfg %3, 0, %0\n\t"
94 "or %0, %2, %0\n\t"
95 "putcfg %3, 0, %0\n\t"
96 "synci"
97 : "=&r" (data)
98 : "0" (data), "r" (flag), "r" (addr));
Linus Torvalds1da177e2005-04-16 15:20:36 -070099
100 local_irq_restore(flags);
101}
102
103static void sh64_icache_inv_kernel_range(unsigned long start, unsigned long end)
104{
105 /* Invalidate range of addresses [start,end] from the I-cache, where
106 * the addresses lie in the kernel superpage. */
107
108 unsigned long long ullend, addr, aligned_start;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109 aligned_start = (unsigned long long)(signed long long)(signed long) start;
Paul Mundt38350e02008-02-13 20:14:10 +0900110 addr = L1_CACHE_ALIGN(aligned_start);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111 ullend = (unsigned long long) (signed long long) (signed long) end;
Paul Mundt38350e02008-02-13 20:14:10 +0900112
Linus Torvalds1da177e2005-04-16 15:20:36 -0700113 while (addr <= ullend) {
Paul Mundt38350e02008-02-13 20:14:10 +0900114 __asm__ __volatile__ ("icbi %0, 0" : : "r" (addr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115 addr += L1_CACHE_BYTES;
116 }
117}
118
119static void sh64_icache_inv_user_page(struct vm_area_struct *vma, unsigned long eaddr)
120{
121 /* If we get called, we know that vma->vm_flags contains VM_EXEC.
122 Also, eaddr is page-aligned. */
Paul Mundt38350e02008-02-13 20:14:10 +0900123 unsigned int cpu = smp_processor_id();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700124 unsigned long long addr, end_addr;
125 unsigned long flags = 0;
126 unsigned long running_asid, vma_asid;
127 addr = eaddr;
128 end_addr = addr + PAGE_SIZE;
129
130 /* Check whether we can use the current ASID for the I-cache
131 invalidation. For example, if we're called via
132 access_process_vm->flush_cache_page->here, (e.g. when reading from
133 /proc), 'running_asid' will be that of the reader, not of the
134 victim.
135
136 Also, note the risk that we might get pre-empted between the ASID
137 compare and blocking IRQs, and before we regain control, the
138 pid->ASID mapping changes. However, the whole cache will get
139 invalidated when the mapping is renewed, so the worst that can
140 happen is that the loop below ends up invalidating somebody else's
141 cache entries.
142 */
143
144 running_asid = get_asid();
Paul Mundt38350e02008-02-13 20:14:10 +0900145 vma_asid = cpu_asid(cpu, vma->vm_mm);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700146 if (running_asid != vma_asid) {
147 local_irq_save(flags);
148 switch_and_save_asid(vma_asid);
149 }
150 while (addr < end_addr) {
151 /* Worth unrolling a little */
Paul Mundt38350e02008-02-13 20:14:10 +0900152 __asm__ __volatile__("icbi %0, 0" : : "r" (addr));
153 __asm__ __volatile__("icbi %0, 32" : : "r" (addr));
154 __asm__ __volatile__("icbi %0, 64" : : "r" (addr));
155 __asm__ __volatile__("icbi %0, 96" : : "r" (addr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700156 addr += 128;
157 }
158 if (running_asid != vma_asid) {
159 switch_and_save_asid(running_asid);
160 local_irq_restore(flags);
161 }
162}
163
Linus Torvalds1da177e2005-04-16 15:20:36 -0700164static void sh64_icache_inv_user_page_range(struct mm_struct *mm,
165 unsigned long start, unsigned long end)
166{
167 /* Used for invalidating big chunks of I-cache, i.e. assume the range
168 is whole pages. If 'start' or 'end' is not page aligned, the code
169 is conservative and invalidates to the ends of the enclosing pages.
170 This is functionally OK, just a performance loss. */
171
172 /* See the comments below in sh64_dcache_purge_user_range() regarding
173 the choice of algorithm. However, for the I-cache option (2) isn't
174 available because there are no physical tags so aliases can't be
175 resolved. The icbi instruction has to be used through the user
176 mapping. Because icbi is cheaper than ocbp on a cache hit, it
177 would be cheaper to use the selective code for a large range than is
178 possible with the D-cache. Just assume 64 for now as a working
179 figure.
180 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181 int n_pages;
182
Paul Mundt38350e02008-02-13 20:14:10 +0900183 if (!mm)
184 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700185
186 n_pages = ((end - start) >> PAGE_SHIFT);
187 if (n_pages >= 64) {
188 sh64_icache_inv_all();
189 } else {
190 unsigned long aligned_start;
191 unsigned long eaddr;
192 unsigned long after_last_page_start;
193 unsigned long mm_asid, current_asid;
Paul Mundt2fedaac2009-05-09 14:38:49 +0900194 unsigned long flags = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700195
Paul Mundt38350e02008-02-13 20:14:10 +0900196 mm_asid = cpu_asid(smp_processor_id(), mm);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700197 current_asid = get_asid();
198
199 if (mm_asid != current_asid) {
200 /* Switch ASID and run the invalidate loop under cli */
201 local_irq_save(flags);
202 switch_and_save_asid(mm_asid);
203 }
204
205 aligned_start = start & PAGE_MASK;
206 after_last_page_start = PAGE_SIZE + ((end - 1) & PAGE_MASK);
207
208 while (aligned_start < after_last_page_start) {
209 struct vm_area_struct *vma;
210 unsigned long vma_end;
211 vma = find_vma(mm, aligned_start);
212 if (!vma || (aligned_start <= vma->vm_end)) {
213 /* Avoid getting stuck in an error condition */
214 aligned_start += PAGE_SIZE;
215 continue;
216 }
217 vma_end = vma->vm_end;
218 if (vma->vm_flags & VM_EXEC) {
219 /* Executable */
220 eaddr = aligned_start;
221 while (eaddr < vma_end) {
222 sh64_icache_inv_user_page(vma, eaddr);
223 eaddr += PAGE_SIZE;
224 }
225 }
226 aligned_start = vma->vm_end; /* Skip to start of next region */
227 }
Paul Mundt38350e02008-02-13 20:14:10 +0900228
Linus Torvalds1da177e2005-04-16 15:20:36 -0700229 if (mm_asid != current_asid) {
230 switch_and_save_asid(current_asid);
231 local_irq_restore(flags);
232 }
233 }
234}
235
Paul Mundt38350e02008-02-13 20:14:10 +0900236/*
237 * Invalidate a small range of user context I-cache, not necessarily page
238 * (or even cache-line) aligned.
239 *
240 * Since this is used inside ptrace, the ASID in the mm context typically
241 * won't match current_asid. We'll have to switch ASID to do this. For
242 * safety, and given that the range will be small, do all this under cli.
243 *
244 * Note, there is a hazard that the ASID in mm->context is no longer
245 * actually associated with mm, i.e. if the mm->context has started a new
246 * cycle since mm was last active. However, this is just a performance
247 * issue: all that happens is that we invalidate lines belonging to
248 * another mm, so the owning process has to refill them when that mm goes
249 * live again. mm itself can't have any cache entries because there will
250 * have been a flush_cache_all when the new mm->context cycle started.
251 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700252static void sh64_icache_inv_user_small_range(struct mm_struct *mm,
253 unsigned long start, int len)
254{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255 unsigned long long eaddr = start;
256 unsigned long long eaddr_end = start + len;
257 unsigned long current_asid, mm_asid;
Paul Mundt2fedaac2009-05-09 14:38:49 +0900258 unsigned long flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259 unsigned long long epage_start;
260
Paul Mundt38350e02008-02-13 20:14:10 +0900261 /*
262 * Align to start of cache line. Otherwise, suppose len==8 and
263 * start was at 32N+28 : the last 4 bytes wouldn't get invalidated.
264 */
265 eaddr = L1_CACHE_ALIGN(start);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700266 eaddr_end = start + len;
267
Paul Mundt38350e02008-02-13 20:14:10 +0900268 mm_asid = cpu_asid(smp_processor_id(), mm);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700269 local_irq_save(flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700270 current_asid = switch_and_save_asid(mm_asid);
271
272 epage_start = eaddr & PAGE_MASK;
273
Paul Mundt38350e02008-02-13 20:14:10 +0900274 while (eaddr < eaddr_end) {
275 __asm__ __volatile__("icbi %0, 0" : : "r" (eaddr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700276 eaddr += L1_CACHE_BYTES;
277 }
278 switch_and_save_asid(current_asid);
279 local_irq_restore(flags);
280}
281
282static void sh64_icache_inv_current_user_range(unsigned long start, unsigned long end)
283{
284 /* The icbi instruction never raises ITLBMISS. i.e. if there's not a
285 cache hit on the virtual tag the instruction ends there, without a
286 TLB lookup. */
287
288 unsigned long long aligned_start;
289 unsigned long long ull_end;
290 unsigned long long addr;
291
292 ull_end = end;
293
294 /* Just invalidate over the range using the natural addresses. TLB
295 miss handling will be OK (TBC). Since it's for the current process,
296 either we're already in the right ASID context, or the ASIDs have
297 been recycled since we were last active in which case we might just
298 invalidate another processes I-cache entries : no worries, just a
299 performance drop for him. */
Paul Mundt38350e02008-02-13 20:14:10 +0900300 aligned_start = L1_CACHE_ALIGN(start);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700301 addr = aligned_start;
302 while (addr < ull_end) {
Paul Mundt38350e02008-02-13 20:14:10 +0900303 __asm__ __volatile__ ("icbi %0, 0" : : "r" (addr));
304 __asm__ __volatile__ ("nop");
305 __asm__ __volatile__ ("nop");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700306 addr += L1_CACHE_BYTES;
307 }
308}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700309#endif /* !CONFIG_ICACHE_DISABLED */
310
Linus Torvalds1da177e2005-04-16 15:20:36 -0700311#ifndef CONFIG_DCACHE_DISABLED
Linus Torvalds1da177e2005-04-16 15:20:36 -0700312/* Buffer used as the target of alloco instructions to purge data from cache
313 sets by natural eviction. -- RPC */
Paul Mundt38350e02008-02-13 20:14:10 +0900314#define DUMMY_ALLOCO_AREA_SIZE ((L1_CACHE_BYTES << 10) + (1024 * 4))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700315static unsigned char dummy_alloco_area[DUMMY_ALLOCO_AREA_SIZE] __cacheline_aligned = { 0, };
316
Paul Mundt38350e02008-02-13 20:14:10 +0900317static void inline sh64_dcache_purge_sets(int sets_to_purge_base, int n_sets)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700318{
319 /* Purge all ways in a particular block of sets, specified by the base
320 set number and number of sets. Can handle wrap-around, if that's
321 needed. */
322
323 int dummy_buffer_base_set;
324 unsigned long long eaddr, eaddr0, eaddr1;
325 int j;
326 int set_offset;
327
Paul Mundt38350e02008-02-13 20:14:10 +0900328 dummy_buffer_base_set = ((int)&dummy_alloco_area &
329 cpu_data->dcache.entry_mask) >>
330 cpu_data->dcache.entry_shift;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700331 set_offset = sets_to_purge_base - dummy_buffer_base_set;
332
Paul Mundt38350e02008-02-13 20:14:10 +0900333 for (j = 0; j < n_sets; j++, set_offset++) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700334 set_offset &= (cpu_data->dcache.sets - 1);
Paul Mundt38350e02008-02-13 20:14:10 +0900335 eaddr0 = (unsigned long long)dummy_alloco_area +
336 (set_offset << cpu_data->dcache.entry_shift);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700337
Paul Mundt38350e02008-02-13 20:14:10 +0900338 /*
339 * Do one alloco which hits the required set per cache
340 * way. For write-back mode, this will purge the #ways
341 * resident lines. There's little point unrolling this
342 * loop because the allocos stall more if they're too
343 * close together.
344 */
345 eaddr1 = eaddr0 + cpu_data->dcache.way_size *
346 cpu_data->dcache.ways;
347
348 for (eaddr = eaddr0; eaddr < eaddr1;
349 eaddr += cpu_data->dcache.way_size) {
350 __asm__ __volatile__ ("alloco %0, 0" : : "r" (eaddr));
351 __asm__ __volatile__ ("synco"); /* TAKum03020 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700352 }
353
Paul Mundt38350e02008-02-13 20:14:10 +0900354 eaddr1 = eaddr0 + cpu_data->dcache.way_size *
355 cpu_data->dcache.ways;
356
357 for (eaddr = eaddr0; eaddr < eaddr1;
358 eaddr += cpu_data->dcache.way_size) {
359 /*
360 * Load from each address. Required because
361 * alloco is a NOP if the cache is write-through.
362 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700363 if (test_bit(SH_CACHE_MODE_WT, &(cpu_data->dcache.flags)))
Paul Mundt2fedaac2009-05-09 14:38:49 +0900364 __raw_readb((unsigned long)eaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700365 }
366 }
367
Paul Mundt38350e02008-02-13 20:14:10 +0900368 /*
369 * Don't use OCBI to invalidate the lines. That costs cycles
370 * directly. If the dummy block is just left resident, it will
371 * naturally get evicted as required.
372 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700373}
374
Paul Mundt38350e02008-02-13 20:14:10 +0900375/*
376 * Purge the entire contents of the dcache. The most efficient way to
377 * achieve this is to use alloco instructions on a region of unused
378 * memory equal in size to the cache, thereby causing the current
379 * contents to be discarded by natural eviction. The alternative, namely
380 * reading every tag, setting up a mapping for the corresponding page and
381 * doing an OCBP for the line, would be much more expensive.
382 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700383static void sh64_dcache_purge_all(void)
384{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700385
386 sh64_dcache_purge_sets(0, cpu_data->dcache.sets);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700387}
388
Linus Torvalds1da177e2005-04-16 15:20:36 -0700389
390/* Assumes this address (+ (2**n_synbits) pages up from it) aren't used for
391 anything else in the kernel */
392#define MAGIC_PAGE0_START 0xffffffffec000000ULL
393
Paul Mundt38350e02008-02-13 20:14:10 +0900394/* Purge the physical page 'paddr' from the cache. It's known that any
395 * cache lines requiring attention have the same page colour as the the
396 * address 'eaddr'.
397 *
398 * This relies on the fact that the D-cache matches on physical tags when
399 * no virtual tag matches. So we create an alias for the original page
400 * and purge through that. (Alternatively, we could have done this by
401 * switching ASID to match the original mapping and purged through that,
402 * but that involves ASID switching cost + probably a TLBMISS + refill
403 * anyway.)
404 */
405static void sh64_dcache_purge_coloured_phy_page(unsigned long paddr,
406 unsigned long eaddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700407{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700408 unsigned long long magic_page_start;
409 unsigned long long magic_eaddr, magic_eaddr_end;
410
411 magic_page_start = MAGIC_PAGE0_START + (eaddr & CACHE_OC_SYN_MASK);
412
413 /* As long as the kernel is not pre-emptible, this doesn't need to be
414 under cli/sti. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700415 sh64_setup_dtlb_cache_slot(magic_page_start, get_asid(), paddr);
416
417 magic_eaddr = magic_page_start;
418 magic_eaddr_end = magic_eaddr + PAGE_SIZE;
Paul Mundt38350e02008-02-13 20:14:10 +0900419
Linus Torvalds1da177e2005-04-16 15:20:36 -0700420 while (magic_eaddr < magic_eaddr_end) {
421 /* Little point in unrolling this loop - the OCBPs are blocking
422 and won't go any quicker (i.e. the loop overhead is parallel
423 to part of the OCBP execution.) */
Paul Mundt38350e02008-02-13 20:14:10 +0900424 __asm__ __volatile__ ("ocbp %0, 0" : : "r" (magic_eaddr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700425 magic_eaddr += L1_CACHE_BYTES;
426 }
427
428 sh64_teardown_dtlb_cache_slot();
429}
430
Paul Mundt38350e02008-02-13 20:14:10 +0900431/*
432 * Purge a page given its physical start address, by creating a temporary
433 * 1 page mapping and purging across that. Even if we know the virtual
434 * address (& vma or mm) of the page, the method here is more elegant
435 * because it avoids issues of coping with page faults on the purge
436 * instructions (i.e. no special-case code required in the critical path
437 * in the TLB miss handling).
438 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700439static void sh64_dcache_purge_phy_page(unsigned long paddr)
440{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700441 unsigned long long eaddr_start, eaddr, eaddr_end;
442 int i;
443
444 /* As long as the kernel is not pre-emptible, this doesn't need to be
445 under cli/sti. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700446 eaddr_start = MAGIC_PAGE0_START;
Paul Mundt38350e02008-02-13 20:14:10 +0900447 for (i = 0; i < (1 << CACHE_OC_N_SYNBITS); i++) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700448 sh64_setup_dtlb_cache_slot(eaddr_start, get_asid(), paddr);
449
450 eaddr = eaddr_start;
451 eaddr_end = eaddr + PAGE_SIZE;
452 while (eaddr < eaddr_end) {
Paul Mundt38350e02008-02-13 20:14:10 +0900453 __asm__ __volatile__ ("ocbp %0, 0" : : "r" (eaddr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700454 eaddr += L1_CACHE_BYTES;
455 }
456
457 sh64_teardown_dtlb_cache_slot();
458 eaddr_start += PAGE_SIZE;
459 }
460}
461
Hugh Dickins60ec5582005-10-29 18:16:34 -0700462static void sh64_dcache_purge_user_pages(struct mm_struct *mm,
463 unsigned long addr, unsigned long end)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700464{
465 pgd_t *pgd;
Paul Mundt38350e02008-02-13 20:14:10 +0900466 pud_t *pud;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700467 pmd_t *pmd;
468 pte_t *pte;
469 pte_t entry;
Hugh Dickins60ec5582005-10-29 18:16:34 -0700470 spinlock_t *ptl;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700471 unsigned long paddr;
472
Hugh Dickins60ec5582005-10-29 18:16:34 -0700473 if (!mm)
474 return; /* No way to find physical address of page */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700475
Hugh Dickins60ec5582005-10-29 18:16:34 -0700476 pgd = pgd_offset(mm, addr);
477 if (pgd_bad(*pgd))
478 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700479
Paul Mundt38350e02008-02-13 20:14:10 +0900480 pud = pud_offset(pgd, addr);
481 if (pud_none(*pud) || pud_bad(*pud))
482 return;
483
484 pmd = pmd_offset(pud, addr);
Hugh Dickins60ec5582005-10-29 18:16:34 -0700485 if (pmd_none(*pmd) || pmd_bad(*pmd))
486 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700487
Hugh Dickins60ec5582005-10-29 18:16:34 -0700488 pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
489 do {
490 entry = *pte;
491 if (pte_none(entry) || !pte_present(entry))
492 continue;
493 paddr = pte_val(entry) & PAGE_MASK;
494 sh64_dcache_purge_coloured_phy_page(paddr, addr);
495 } while (pte++, addr += PAGE_SIZE, addr != end);
496 pte_unmap_unlock(pte - 1, ptl);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700497}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700498
Paul Mundt38350e02008-02-13 20:14:10 +0900499/*
500 * There are at least 5 choices for the implementation of this, with
501 * pros (+), cons(-), comments(*):
502 *
503 * 1. ocbp each line in the range through the original user's ASID
504 * + no lines spuriously evicted
505 * - tlbmiss handling (must either handle faults on demand => extra
506 * special-case code in tlbmiss critical path), or map the page in
507 * advance (=> flush_tlb_range in advance to avoid multiple hits)
508 * - ASID switching
509 * - expensive for large ranges
510 *
511 * 2. temporarily map each page in the range to a special effective
512 * address and ocbp through the temporary mapping; relies on the
513 * fact that SH-5 OCB* always do TLB lookup and match on ptags (they
514 * never look at the etags)
515 * + no spurious evictions
516 * - expensive for large ranges
517 * * surely cheaper than (1)
518 *
519 * 3. walk all the lines in the cache, check the tags, if a match
520 * occurs create a page mapping to ocbp the line through
521 * + no spurious evictions
522 * - tag inspection overhead
523 * - (especially for small ranges)
524 * - potential cost of setting up/tearing down page mapping for
525 * every line that matches the range
526 * * cost partly independent of range size
527 *
528 * 4. walk all the lines in the cache, check the tags, if a match
529 * occurs use 4 * alloco to purge the line (+3 other probably
530 * innocent victims) by natural eviction
531 * + no tlb mapping overheads
532 * - spurious evictions
533 * - tag inspection overhead
534 *
535 * 5. implement like flush_cache_all
536 * + no tag inspection overhead
537 * - spurious evictions
538 * - bad for small ranges
539 *
540 * (1) can be ruled out as more expensive than (2). (2) appears best
541 * for small ranges. The choice between (3), (4) and (5) for large
542 * ranges and the range size for the large/small boundary need
543 * benchmarking to determine.
544 *
545 * For now use approach (2) for small ranges and (5) for large ones.
546 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700547static void sh64_dcache_purge_user_range(struct mm_struct *mm,
548 unsigned long start, unsigned long end)
549{
Paul Mundt38350e02008-02-13 20:14:10 +0900550 int n_pages = ((end - start) >> PAGE_SHIFT);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700551
Hugh Dickins60ec5582005-10-29 18:16:34 -0700552 if (n_pages >= 64 || ((start ^ (end - 1)) & PMD_MASK)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700553 sh64_dcache_purge_all();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700554 } else {
Hugh Dickins60ec5582005-10-29 18:16:34 -0700555 /* Small range, covered by a single page table page */
556 start &= PAGE_MASK; /* should already be so */
557 end = PAGE_ALIGN(end); /* should already be so */
558 sh64_dcache_purge_user_pages(mm, start, end);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700559 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700560}
Paul Mundt38350e02008-02-13 20:14:10 +0900561#endif /* !CONFIG_DCACHE_DISABLED */
562
563/*
564 * Invalidate the entire contents of both caches, after writing back to
565 * memory any dirty data from the D-cache.
566 */
567void flush_cache_all(void)
568{
569 sh64_dcache_purge_all();
570 sh64_icache_inv_all();
571}
572
573/*
574 * Invalidate an entire user-address space from both caches, after
575 * writing back dirty data (e.g. for shared mmap etc).
576 *
577 * This could be coded selectively by inspecting all the tags then
578 * doing 4*alloco on any set containing a match (as for
579 * flush_cache_range), but fork/exit/execve (where this is called from)
580 * are expensive anyway.
581 *
582 * Have to do a purge here, despite the comments re I-cache below.
583 * There could be odd-coloured dirty data associated with the mm still
584 * in the cache - if this gets written out through natural eviction
585 * after the kernel has reused the page there will be chaos.
586 *
587 * The mm being torn down won't ever be active again, so any Icache
588 * lines tagged with its ASID won't be visible for the rest of the
589 * lifetime of this ASID cycle. Before the ASID gets reused, there
590 * will be a flush_cache_all. Hence we don't need to touch the
591 * I-cache. This is similar to the lack of action needed in
592 * flush_tlb_mm - see fault.c.
593 */
594void flush_cache_mm(struct mm_struct *mm)
595{
596 sh64_dcache_purge_all();
597}
598
599/*
600 * Invalidate (from both caches) the range [start,end) of virtual
601 * addresses from the user address space specified by mm, after writing
602 * back any dirty data.
603 *
604 * Note, 'end' is 1 byte beyond the end of the range to flush.
605 */
606void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
607 unsigned long end)
608{
609 struct mm_struct *mm = vma->vm_mm;
610
611 sh64_dcache_purge_user_range(mm, start, end);
612 sh64_icache_inv_user_page_range(mm, start, end);
613}
614
615/*
616 * Invalidate any entries in either cache for the vma within the user
617 * address space vma->vm_mm for the page starting at virtual address
618 * 'eaddr'. This seems to be used primarily in breaking COW. Note,
619 * the I-cache must be searched too in case the page in question is
620 * both writable and being executed from (e.g. stack trampolines.)
621 *
622 * Note, this is called with pte lock held.
623 */
624void flush_cache_page(struct vm_area_struct *vma, unsigned long eaddr,
625 unsigned long pfn)
626{
627 sh64_dcache_purge_phy_page(pfn << PAGE_SHIFT);
628
629 if (vma->vm_flags & VM_EXEC)
630 sh64_icache_inv_user_page(vma, eaddr);
631}
632
633void flush_dcache_page(struct page *page)
634{
635 sh64_dcache_purge_phy_page(page_to_phys(page));
636 wmb();
637}
638
639/*
640 * Flush the range [start,end] of kernel virtual adddress space from
641 * the I-cache. The corresponding range must be purged from the
642 * D-cache also because the SH-5 doesn't have cache snooping between
643 * the caches. The addresses will be visible through the superpage
644 * mapping, therefore it's guaranteed that there no cache entries for
645 * the range in cache sets of the wrong colour.
646 */
647void flush_icache_range(unsigned long start, unsigned long end)
648{
649 __flush_purge_region((void *)start, end);
650 wmb();
651 sh64_icache_inv_kernel_range(start, end);
652}
653
654/*
655 * Flush the range of user (defined by vma->vm_mm) address space starting
656 * at 'addr' for 'len' bytes from the cache. The range does not straddle
657 * a page boundary, the unique physical page containing the range is
658 * 'page'. This seems to be used mainly for invalidating an address
659 * range following a poke into the program text through the ptrace() call
660 * from another process (e.g. for BRK instruction insertion).
661 */
Paul Mundt916e9782009-08-15 11:38:05 +0900662static void flush_icache_user_range(struct vm_area_struct *vma,
Paul Mundt38350e02008-02-13 20:14:10 +0900663 struct page *page, unsigned long addr, int len)
664{
665
666 sh64_dcache_purge_coloured_phy_page(page_to_phys(page), addr);
667 mb();
668
669 if (vma->vm_flags & VM_EXEC)
670 sh64_icache_inv_user_small_range(vma->vm_mm, addr, len);
671}
672
673/*
674 * For the address range [start,end), write back the data from the
675 * D-cache and invalidate the corresponding region of the I-cache for the
676 * current process. Used to flush signal trampolines on the stack to
677 * make them executable.
678 */
679void flush_cache_sigtramp(unsigned long vaddr)
680{
681 unsigned long end = vaddr + L1_CACHE_BYTES;
682
683 __flush_wback_region((void *)vaddr, L1_CACHE_BYTES);
684 wmb();
685 sh64_icache_inv_current_user_range(vaddr, end);
686}
687
Paul Mundtccd80582008-04-25 12:58:40 +0900688#ifdef CONFIG_MMU
Paul Mundt38350e02008-02-13 20:14:10 +0900689/*
690 * These *MUST* lie in an area of virtual address space that's otherwise
691 * unused.
692 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700693#define UNIQUE_EADDR_START 0xe0000000UL
694#define UNIQUE_EADDR_END 0xe8000000UL
695
Paul Mundt38350e02008-02-13 20:14:10 +0900696/*
697 * Given a physical address paddr, and a user virtual address user_eaddr
698 * which will eventually be mapped to it, create a one-off kernel-private
699 * eaddr mapped to the same paddr. This is used for creating special
700 * destination pages for copy_user_page and clear_user_page.
701 */
702static unsigned long sh64_make_unique_eaddr(unsigned long user_eaddr,
703 unsigned long paddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700704{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700705 static unsigned long current_pointer = UNIQUE_EADDR_START;
706 unsigned long coloured_pointer;
707
708 if (current_pointer == UNIQUE_EADDR_END) {
709 sh64_dcache_purge_all();
710 current_pointer = UNIQUE_EADDR_START;
711 }
712
Paul Mundt38350e02008-02-13 20:14:10 +0900713 coloured_pointer = (current_pointer & ~CACHE_OC_SYN_MASK) |
714 (user_eaddr & CACHE_OC_SYN_MASK);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700715 sh64_setup_dtlb_cache_slot(coloured_pointer, get_asid(), paddr);
716
717 current_pointer += (PAGE_SIZE << CACHE_OC_N_SYNBITS);
718
719 return coloured_pointer;
720}
721
Paul Mundt38350e02008-02-13 20:14:10 +0900722static void sh64_copy_user_page_coloured(void *to, void *from,
723 unsigned long address)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700724{
725 void *coloured_to;
726
Paul Mundt38350e02008-02-13 20:14:10 +0900727 /*
728 * Discard any existing cache entries of the wrong colour. These are
729 * present quite often, if the kernel has recently used the page
730 * internally, then given it up, then it's been allocated to the user.
731 */
732 sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long)to);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700733
Paul Mundt38350e02008-02-13 20:14:10 +0900734 coloured_to = (void *)sh64_make_unique_eaddr(address, __pa(to));
735 copy_page(from, coloured_to);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700736
737 sh64_teardown_dtlb_cache_slot();
738}
739
740static void sh64_clear_user_page_coloured(void *to, unsigned long address)
741{
742 void *coloured_to;
743
Paul Mundt38350e02008-02-13 20:14:10 +0900744 /*
745 * Discard any existing kernel-originated lines of the wrong
746 * colour (as above)
747 */
748 sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long)to);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700749
Paul Mundt38350e02008-02-13 20:14:10 +0900750 coloured_to = (void *)sh64_make_unique_eaddr(address, __pa(to));
751 clear_page(coloured_to);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700752
753 sh64_teardown_dtlb_cache_slot();
754}
755
Paul Mundt38350e02008-02-13 20:14:10 +0900756/*
757 * 'from' and 'to' are kernel virtual addresses (within the superpage
758 * mapping of the physical RAM). 'address' is the user virtual address
759 * where the copy 'to' will be mapped after. This allows a custom
760 * mapping to be used to ensure that the new copy is placed in the
761 * right cache sets for the user to see it without having to bounce it
762 * out via memory. Note however : the call to flush_page_to_ram in
763 * (generic)/mm/memory.c:(break_cow) undoes all this good work in that one
764 * very important case!
765 *
766 * TBD : can we guarantee that on every call, any cache entries for
767 * 'from' are in the same colour sets as 'address' also? i.e. is this
768 * always used just to deal with COW? (I suspect not).
769 *
770 * There are two possibilities here for when the page 'from' was last accessed:
771 * - by the kernel : this is OK, no purge required.
772 * - by the/a user (e.g. for break_COW) : need to purge.
773 *
774 * If the potential user mapping at 'address' is the same colour as
775 * 'from' there is no need to purge any cache lines from the 'from'
776 * page mapped into cache sets of colour 'address'. (The copy will be
777 * accessing the page through 'from').
778 */
779void copy_user_page(void *to, void *from, unsigned long address,
780 struct page *page)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700781{
Paul Mundt38350e02008-02-13 20:14:10 +0900782 if (((address ^ (unsigned long) from) & CACHE_OC_SYN_MASK) != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700783 sh64_dcache_purge_coloured_phy_page(__pa(from), address);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700784
Paul Mundt38350e02008-02-13 20:14:10 +0900785 if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0)
786 copy_page(to, from);
787 else
Linus Torvalds1da177e2005-04-16 15:20:36 -0700788 sh64_copy_user_page_coloured(to, from, address);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700789}
790
Paul Mundt38350e02008-02-13 20:14:10 +0900791/*
792 * 'to' is a kernel virtual address (within the superpage mapping of the
793 * physical RAM). 'address' is the user virtual address where the 'to'
794 * page will be mapped after. This allows a custom mapping to be used to
795 * ensure that the new copy is placed in the right cache sets for the
796 * user to see it without having to bounce it out via memory.
797 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700798void clear_user_page(void *to, unsigned long address, struct page *page)
799{
Paul Mundt38350e02008-02-13 20:14:10 +0900800 if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0)
801 clear_page(to);
802 else
Linus Torvalds1da177e2005-04-16 15:20:36 -0700803 sh64_clear_user_page_coloured(to, address);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700804}
Paul Mundt0dfae7d2009-07-27 21:30:17 +0900805
806void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
807 unsigned long vaddr, void *dst, const void *src,
808 unsigned long len)
809{
810 flush_cache_page(vma, vaddr, page_to_pfn(page));
811 memcpy(dst, src, len);
812 flush_icache_user_range(vma, page, vaddr, len);
813}
814
815void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
816 unsigned long vaddr, void *dst, const void *src,
817 unsigned long len)
818{
819 flush_cache_page(vma, vaddr, page_to_pfn(page));
820 memcpy(dst, src, len);
821}
Paul Mundtccd80582008-04-25 12:58:40 +0900822#endif