Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* |
| 2 | * This file is subject to the terms and conditions of the GNU General Public |
| 3 | * License. See the file "COPYING" in the main directory of this archive |
| 4 | * for more details. |
| 5 | * |
| 6 | * arch/sh64/mm/cache.c |
| 7 | * |
| 8 | * Original version Copyright (C) 2000, 2001 Paolo Alberelli |
| 9 | * Second version Copyright (C) benedict.gaster@superh.com 2002 |
| 10 | * Third version Copyright Richard.Curnow@superh.com 2003 |
| 11 | * Hacks to third version Copyright (C) 2003 Paul Mundt |
| 12 | */ |
| 13 | |
| 14 | /****************************************************************************/ |
| 15 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 16 | #include <linux/init.h> |
| 17 | #include <linux/mman.h> |
| 18 | #include <linux/mm.h> |
| 19 | #include <linux/threads.h> |
| 20 | #include <asm/page.h> |
| 21 | #include <asm/pgtable.h> |
| 22 | #include <asm/processor.h> |
| 23 | #include <asm/cache.h> |
| 24 | #include <asm/tlb.h> |
| 25 | #include <asm/io.h> |
| 26 | #include <asm/uaccess.h> |
| 27 | #include <asm/mmu_context.h> |
| 28 | #include <asm/pgalloc.h> /* for flush_itlb_range */ |
| 29 | |
| 30 | #include <linux/proc_fs.h> |
| 31 | |
| 32 | /* This function is in entry.S */ |
| 33 | extern unsigned long switch_and_save_asid(unsigned long new_asid); |
| 34 | |
| 35 | /* Wired TLB entry for the D-cache */ |
| 36 | static unsigned long long dtlb_cache_slot; |
| 37 | |
| 38 | /** |
| 39 | * sh64_cache_init() |
| 40 | * |
| 41 | * This is pretty much just a straightforward clone of the SH |
| 42 | * detect_cpu_and_cache_system(). |
| 43 | * |
| 44 | * This function is responsible for setting up all of the cache |
| 45 | * info dynamically as well as taking care of CPU probing and |
| 46 | * setting up the relevant subtype data. |
| 47 | * |
| 48 | * FIXME: For the time being, we only really support the SH5-101 |
| 49 | * out of the box, and don't support dynamic probing for things |
| 50 | * like the SH5-103 or even cut2 of the SH5-101. Implement this |
| 51 | * later! |
| 52 | */ |
| 53 | int __init sh64_cache_init(void) |
| 54 | { |
| 55 | /* |
| 56 | * First, setup some sane values for the I-cache. |
| 57 | */ |
| 58 | cpu_data->icache.ways = 4; |
| 59 | cpu_data->icache.sets = 256; |
| 60 | cpu_data->icache.linesz = L1_CACHE_BYTES; |
| 61 | |
| 62 | /* |
| 63 | * FIXME: This can probably be cleaned up a bit as well.. for example, |
| 64 | * do we really need the way shift _and_ the way_step_shift ?? Judging |
| 65 | * by the existing code, I would guess no.. is there any valid reason |
| 66 | * why we need to be tracking this around? |
| 67 | */ |
| 68 | cpu_data->icache.way_shift = 13; |
| 69 | cpu_data->icache.entry_shift = 5; |
| 70 | cpu_data->icache.set_shift = 4; |
| 71 | cpu_data->icache.way_step_shift = 16; |
| 72 | cpu_data->icache.asid_shift = 2; |
| 73 | |
| 74 | /* |
| 75 | * way offset = cache size / associativity, so just don't factor in |
| 76 | * associativity in the first place.. |
| 77 | */ |
| 78 | cpu_data->icache.way_ofs = cpu_data->icache.sets * |
| 79 | cpu_data->icache.linesz; |
| 80 | |
| 81 | cpu_data->icache.asid_mask = 0x3fc; |
| 82 | cpu_data->icache.idx_mask = 0x1fe0; |
| 83 | cpu_data->icache.epn_mask = 0xffffe000; |
| 84 | cpu_data->icache.flags = 0; |
| 85 | |
| 86 | /* |
| 87 | * Next, setup some sane values for the D-cache. |
| 88 | * |
| 89 | * On the SH5, these are pretty consistent with the I-cache settings, |
| 90 | * so we just copy over the existing definitions.. these can be fixed |
| 91 | * up later, especially if we add runtime CPU probing. |
| 92 | * |
| 93 | * Though in the meantime it saves us from having to duplicate all of |
| 94 | * the above definitions.. |
| 95 | */ |
| 96 | cpu_data->dcache = cpu_data->icache; |
| 97 | |
| 98 | /* |
| 99 | * Setup any cache-related flags here |
| 100 | */ |
| 101 | #if defined(CONFIG_DCACHE_WRITE_THROUGH) |
| 102 | set_bit(SH_CACHE_MODE_WT, &(cpu_data->dcache.flags)); |
| 103 | #elif defined(CONFIG_DCACHE_WRITE_BACK) |
| 104 | set_bit(SH_CACHE_MODE_WB, &(cpu_data->dcache.flags)); |
| 105 | #endif |
| 106 | |
| 107 | /* |
| 108 | * We also need to reserve a slot for the D-cache in the DTLB, so we |
| 109 | * do this now .. |
| 110 | */ |
| 111 | dtlb_cache_slot = sh64_get_wired_dtlb_entry(); |
| 112 | |
| 113 | return 0; |
| 114 | } |
| 115 | |
| 116 | #ifdef CONFIG_DCACHE_DISABLED |
| 117 | #define sh64_dcache_purge_all() do { } while (0) |
| 118 | #define sh64_dcache_purge_coloured_phy_page(paddr, eaddr) do { } while (0) |
| 119 | #define sh64_dcache_purge_user_range(mm, start, end) do { } while (0) |
| 120 | #define sh64_dcache_purge_phy_page(paddr) do { } while (0) |
| 121 | #define sh64_dcache_purge_virt_page(mm, eaddr) do { } while (0) |
| 122 | #define sh64_dcache_purge_kernel_range(start, end) do { } while (0) |
| 123 | #define sh64_dcache_wback_current_user_range(start, end) do { } while (0) |
| 124 | #endif |
| 125 | |
| 126 | /*##########################################################################*/ |
| 127 | |
| 128 | /* From here onwards, a rewrite of the implementation, |
| 129 | by Richard.Curnow@superh.com. |
| 130 | |
| 131 | The major changes in this compared to the old version are; |
| 132 | 1. use more selective purging through OCBP instead of using ALLOCO to purge |
| 133 | by natural replacement. This avoids purging out unrelated cache lines |
| 134 | that happen to be in the same set. |
| 135 | 2. exploit the APIs copy_user_page and clear_user_page better |
| 136 | 3. be more selective about I-cache purging, in particular use invalidate_all |
| 137 | more sparingly. |
| 138 | |
| 139 | */ |
| 140 | |
| 141 | /*########################################################################## |
| 142 | SUPPORT FUNCTIONS |
| 143 | ##########################################################################*/ |
| 144 | |
| 145 | /****************************************************************************/ |
| 146 | /* The following group of functions deal with mapping and unmapping a temporary |
| 147 | page into the DTLB slot that have been set aside for our exclusive use. */ |
| 148 | /* In order to accomplish this, we use the generic interface for adding and |
| 149 | removing a wired slot entry as defined in arch/sh64/mm/tlb.c */ |
| 150 | /****************************************************************************/ |
| 151 | |
| 152 | static unsigned long slot_own_flags; |
| 153 | |
| 154 | static inline void sh64_setup_dtlb_cache_slot(unsigned long eaddr, unsigned long asid, unsigned long paddr) |
| 155 | { |
| 156 | local_irq_save(slot_own_flags); |
| 157 | sh64_setup_tlb_slot(dtlb_cache_slot, eaddr, asid, paddr); |
| 158 | } |
| 159 | |
| 160 | static inline void sh64_teardown_dtlb_cache_slot(void) |
| 161 | { |
| 162 | sh64_teardown_tlb_slot(dtlb_cache_slot); |
| 163 | local_irq_restore(slot_own_flags); |
| 164 | } |
| 165 | |
| 166 | /****************************************************************************/ |
| 167 | |
| 168 | #ifndef CONFIG_ICACHE_DISABLED |
| 169 | |
| 170 | static void __inline__ sh64_icache_inv_all(void) |
| 171 | { |
| 172 | unsigned long long addr, flag, data; |
| 173 | unsigned int flags; |
| 174 | |
| 175 | addr=ICCR0; |
| 176 | flag=ICCR0_ICI; |
| 177 | data=0; |
| 178 | |
| 179 | /* Make this a critical section for safety (probably not strictly necessary.) */ |
| 180 | local_irq_save(flags); |
| 181 | |
| 182 | /* Without %1 it gets unexplicably wrong */ |
| 183 | asm volatile("getcfg %3, 0, %0\n\t" |
| 184 | "or %0, %2, %0\n\t" |
| 185 | "putcfg %3, 0, %0\n\t" |
| 186 | "synci" |
| 187 | : "=&r" (data) |
| 188 | : "0" (data), "r" (flag), "r" (addr)); |
| 189 | |
| 190 | local_irq_restore(flags); |
| 191 | } |
| 192 | |
| 193 | static void sh64_icache_inv_kernel_range(unsigned long start, unsigned long end) |
| 194 | { |
| 195 | /* Invalidate range of addresses [start,end] from the I-cache, where |
| 196 | * the addresses lie in the kernel superpage. */ |
| 197 | |
| 198 | unsigned long long ullend, addr, aligned_start; |
| 199 | #if (NEFF == 32) |
| 200 | aligned_start = (unsigned long long)(signed long long)(signed long) start; |
| 201 | #else |
| 202 | #error "NEFF != 32" |
| 203 | #endif |
| 204 | aligned_start &= L1_CACHE_ALIGN_MASK; |
| 205 | addr = aligned_start; |
| 206 | #if (NEFF == 32) |
| 207 | ullend = (unsigned long long) (signed long long) (signed long) end; |
| 208 | #else |
| 209 | #error "NEFF != 32" |
| 210 | #endif |
| 211 | while (addr <= ullend) { |
| 212 | asm __volatile__ ("icbi %0, 0" : : "r" (addr)); |
| 213 | addr += L1_CACHE_BYTES; |
| 214 | } |
| 215 | } |
| 216 | |
| 217 | static void sh64_icache_inv_user_page(struct vm_area_struct *vma, unsigned long eaddr) |
| 218 | { |
| 219 | /* If we get called, we know that vma->vm_flags contains VM_EXEC. |
| 220 | Also, eaddr is page-aligned. */ |
| 221 | |
| 222 | unsigned long long addr, end_addr; |
| 223 | unsigned long flags = 0; |
| 224 | unsigned long running_asid, vma_asid; |
| 225 | addr = eaddr; |
| 226 | end_addr = addr + PAGE_SIZE; |
| 227 | |
| 228 | /* Check whether we can use the current ASID for the I-cache |
| 229 | invalidation. For example, if we're called via |
| 230 | access_process_vm->flush_cache_page->here, (e.g. when reading from |
| 231 | /proc), 'running_asid' will be that of the reader, not of the |
| 232 | victim. |
| 233 | |
| 234 | Also, note the risk that we might get pre-empted between the ASID |
| 235 | compare and blocking IRQs, and before we regain control, the |
| 236 | pid->ASID mapping changes. However, the whole cache will get |
| 237 | invalidated when the mapping is renewed, so the worst that can |
| 238 | happen is that the loop below ends up invalidating somebody else's |
| 239 | cache entries. |
| 240 | */ |
| 241 | |
| 242 | running_asid = get_asid(); |
| 243 | vma_asid = (vma->vm_mm->context & MMU_CONTEXT_ASID_MASK); |
| 244 | if (running_asid != vma_asid) { |
| 245 | local_irq_save(flags); |
| 246 | switch_and_save_asid(vma_asid); |
| 247 | } |
| 248 | while (addr < end_addr) { |
| 249 | /* Worth unrolling a little */ |
| 250 | asm __volatile__("icbi %0, 0" : : "r" (addr)); |
| 251 | asm __volatile__("icbi %0, 32" : : "r" (addr)); |
| 252 | asm __volatile__("icbi %0, 64" : : "r" (addr)); |
| 253 | asm __volatile__("icbi %0, 96" : : "r" (addr)); |
| 254 | addr += 128; |
| 255 | } |
| 256 | if (running_asid != vma_asid) { |
| 257 | switch_and_save_asid(running_asid); |
| 258 | local_irq_restore(flags); |
| 259 | } |
| 260 | } |
| 261 | |
| 262 | /****************************************************************************/ |
| 263 | |
| 264 | static void sh64_icache_inv_user_page_range(struct mm_struct *mm, |
| 265 | unsigned long start, unsigned long end) |
| 266 | { |
| 267 | /* Used for invalidating big chunks of I-cache, i.e. assume the range |
| 268 | is whole pages. If 'start' or 'end' is not page aligned, the code |
| 269 | is conservative and invalidates to the ends of the enclosing pages. |
| 270 | This is functionally OK, just a performance loss. */ |
| 271 | |
| 272 | /* See the comments below in sh64_dcache_purge_user_range() regarding |
| 273 | the choice of algorithm. However, for the I-cache option (2) isn't |
| 274 | available because there are no physical tags so aliases can't be |
| 275 | resolved. The icbi instruction has to be used through the user |
| 276 | mapping. Because icbi is cheaper than ocbp on a cache hit, it |
| 277 | would be cheaper to use the selective code for a large range than is |
| 278 | possible with the D-cache. Just assume 64 for now as a working |
| 279 | figure. |
| 280 | */ |
| 281 | |
| 282 | int n_pages; |
| 283 | |
| 284 | if (!mm) return; |
| 285 | |
| 286 | n_pages = ((end - start) >> PAGE_SHIFT); |
| 287 | if (n_pages >= 64) { |
| 288 | sh64_icache_inv_all(); |
| 289 | } else { |
| 290 | unsigned long aligned_start; |
| 291 | unsigned long eaddr; |
| 292 | unsigned long after_last_page_start; |
| 293 | unsigned long mm_asid, current_asid; |
| 294 | unsigned long long flags = 0ULL; |
| 295 | |
| 296 | mm_asid = mm->context & MMU_CONTEXT_ASID_MASK; |
| 297 | current_asid = get_asid(); |
| 298 | |
| 299 | if (mm_asid != current_asid) { |
| 300 | /* Switch ASID and run the invalidate loop under cli */ |
| 301 | local_irq_save(flags); |
| 302 | switch_and_save_asid(mm_asid); |
| 303 | } |
| 304 | |
| 305 | aligned_start = start & PAGE_MASK; |
| 306 | after_last_page_start = PAGE_SIZE + ((end - 1) & PAGE_MASK); |
| 307 | |
| 308 | while (aligned_start < after_last_page_start) { |
| 309 | struct vm_area_struct *vma; |
| 310 | unsigned long vma_end; |
| 311 | vma = find_vma(mm, aligned_start); |
| 312 | if (!vma || (aligned_start <= vma->vm_end)) { |
| 313 | /* Avoid getting stuck in an error condition */ |
| 314 | aligned_start += PAGE_SIZE; |
| 315 | continue; |
| 316 | } |
| 317 | vma_end = vma->vm_end; |
| 318 | if (vma->vm_flags & VM_EXEC) { |
| 319 | /* Executable */ |
| 320 | eaddr = aligned_start; |
| 321 | while (eaddr < vma_end) { |
| 322 | sh64_icache_inv_user_page(vma, eaddr); |
| 323 | eaddr += PAGE_SIZE; |
| 324 | } |
| 325 | } |
| 326 | aligned_start = vma->vm_end; /* Skip to start of next region */ |
| 327 | } |
| 328 | if (mm_asid != current_asid) { |
| 329 | switch_and_save_asid(current_asid); |
| 330 | local_irq_restore(flags); |
| 331 | } |
| 332 | } |
| 333 | } |
| 334 | |
| 335 | static void sh64_icache_inv_user_small_range(struct mm_struct *mm, |
| 336 | unsigned long start, int len) |
| 337 | { |
| 338 | |
| 339 | /* Invalidate a small range of user context I-cache, not necessarily |
| 340 | page (or even cache-line) aligned. */ |
| 341 | |
| 342 | unsigned long long eaddr = start; |
| 343 | unsigned long long eaddr_end = start + len; |
| 344 | unsigned long current_asid, mm_asid; |
| 345 | unsigned long long flags; |
| 346 | unsigned long long epage_start; |
| 347 | |
| 348 | /* Since this is used inside ptrace, the ASID in the mm context |
| 349 | typically won't match current_asid. We'll have to switch ASID to do |
| 350 | this. For safety, and given that the range will be small, do all |
| 351 | this under cli. |
| 352 | |
| 353 | Note, there is a hazard that the ASID in mm->context is no longer |
| 354 | actually associated with mm, i.e. if the mm->context has started a |
| 355 | new cycle since mm was last active. However, this is just a |
| 356 | performance issue: all that happens is that we invalidate lines |
| 357 | belonging to another mm, so the owning process has to refill them |
| 358 | when that mm goes live again. mm itself can't have any cache |
| 359 | entries because there will have been a flush_cache_all when the new |
| 360 | mm->context cycle started. */ |
| 361 | |
| 362 | /* Align to start of cache line. Otherwise, suppose len==8 and start |
| 363 | was at 32N+28 : the last 4 bytes wouldn't get invalidated. */ |
| 364 | eaddr = start & L1_CACHE_ALIGN_MASK; |
| 365 | eaddr_end = start + len; |
| 366 | |
| 367 | local_irq_save(flags); |
| 368 | mm_asid = mm->context & MMU_CONTEXT_ASID_MASK; |
| 369 | current_asid = switch_and_save_asid(mm_asid); |
| 370 | |
| 371 | epage_start = eaddr & PAGE_MASK; |
| 372 | |
| 373 | while (eaddr < eaddr_end) |
| 374 | { |
| 375 | asm __volatile__("icbi %0, 0" : : "r" (eaddr)); |
| 376 | eaddr += L1_CACHE_BYTES; |
| 377 | } |
| 378 | switch_and_save_asid(current_asid); |
| 379 | local_irq_restore(flags); |
| 380 | } |
| 381 | |
| 382 | static void sh64_icache_inv_current_user_range(unsigned long start, unsigned long end) |
| 383 | { |
| 384 | /* The icbi instruction never raises ITLBMISS. i.e. if there's not a |
| 385 | cache hit on the virtual tag the instruction ends there, without a |
| 386 | TLB lookup. */ |
| 387 | |
| 388 | unsigned long long aligned_start; |
| 389 | unsigned long long ull_end; |
| 390 | unsigned long long addr; |
| 391 | |
| 392 | ull_end = end; |
| 393 | |
| 394 | /* Just invalidate over the range using the natural addresses. TLB |
| 395 | miss handling will be OK (TBC). Since it's for the current process, |
| 396 | either we're already in the right ASID context, or the ASIDs have |
| 397 | been recycled since we were last active in which case we might just |
| 398 | invalidate another processes I-cache entries : no worries, just a |
| 399 | performance drop for him. */ |
| 400 | aligned_start = start & L1_CACHE_ALIGN_MASK; |
| 401 | addr = aligned_start; |
| 402 | while (addr < ull_end) { |
| 403 | asm __volatile__ ("icbi %0, 0" : : "r" (addr)); |
| 404 | asm __volatile__ ("nop"); |
| 405 | asm __volatile__ ("nop"); |
| 406 | addr += L1_CACHE_BYTES; |
| 407 | } |
| 408 | } |
| 409 | |
| 410 | #endif /* !CONFIG_ICACHE_DISABLED */ |
| 411 | |
| 412 | /****************************************************************************/ |
| 413 | |
| 414 | #ifndef CONFIG_DCACHE_DISABLED |
| 415 | |
| 416 | /* Buffer used as the target of alloco instructions to purge data from cache |
| 417 | sets by natural eviction. -- RPC */ |
| 418 | #define DUMMY_ALLOCO_AREA_SIZE L1_CACHE_SIZE_BYTES + (1024 * 4) |
| 419 | static unsigned char dummy_alloco_area[DUMMY_ALLOCO_AREA_SIZE] __cacheline_aligned = { 0, }; |
| 420 | |
| 421 | /****************************************************************************/ |
| 422 | |
| 423 | static void __inline__ sh64_dcache_purge_sets(int sets_to_purge_base, int n_sets) |
| 424 | { |
| 425 | /* Purge all ways in a particular block of sets, specified by the base |
| 426 | set number and number of sets. Can handle wrap-around, if that's |
| 427 | needed. */ |
| 428 | |
| 429 | int dummy_buffer_base_set; |
| 430 | unsigned long long eaddr, eaddr0, eaddr1; |
| 431 | int j; |
| 432 | int set_offset; |
| 433 | |
| 434 | dummy_buffer_base_set = ((int)&dummy_alloco_area & cpu_data->dcache.idx_mask) >> cpu_data->dcache.entry_shift; |
| 435 | set_offset = sets_to_purge_base - dummy_buffer_base_set; |
| 436 | |
| 437 | for (j=0; j<n_sets; j++, set_offset++) { |
| 438 | set_offset &= (cpu_data->dcache.sets - 1); |
| 439 | eaddr0 = (unsigned long long)dummy_alloco_area + (set_offset << cpu_data->dcache.entry_shift); |
| 440 | |
| 441 | /* Do one alloco which hits the required set per cache way. For |
| 442 | write-back mode, this will purge the #ways resident lines. There's |
| 443 | little point unrolling this loop because the allocos stall more if |
| 444 | they're too close together. */ |
| 445 | eaddr1 = eaddr0 + cpu_data->dcache.way_ofs * cpu_data->dcache.ways; |
| 446 | for (eaddr=eaddr0; eaddr<eaddr1; eaddr+=cpu_data->dcache.way_ofs) { |
| 447 | asm __volatile__ ("alloco %0, 0" : : "r" (eaddr)); |
| 448 | asm __volatile__ ("synco"); /* TAKum03020 */ |
| 449 | } |
| 450 | |
| 451 | eaddr1 = eaddr0 + cpu_data->dcache.way_ofs * cpu_data->dcache.ways; |
| 452 | for (eaddr=eaddr0; eaddr<eaddr1; eaddr+=cpu_data->dcache.way_ofs) { |
| 453 | /* Load from each address. Required because alloco is a NOP if |
| 454 | the cache is write-through. Write-through is a config option. */ |
| 455 | if (test_bit(SH_CACHE_MODE_WT, &(cpu_data->dcache.flags))) |
| 456 | *(volatile unsigned char *)(int)eaddr; |
| 457 | } |
| 458 | } |
| 459 | |
| 460 | /* Don't use OCBI to invalidate the lines. That costs cycles directly. |
| 461 | If the dummy block is just left resident, it will naturally get |
| 462 | evicted as required. */ |
| 463 | |
| 464 | return; |
| 465 | } |
| 466 | |
| 467 | /****************************************************************************/ |
| 468 | |
| 469 | static void sh64_dcache_purge_all(void) |
| 470 | { |
| 471 | /* Purge the entire contents of the dcache. The most efficient way to |
| 472 | achieve this is to use alloco instructions on a region of unused |
| 473 | memory equal in size to the cache, thereby causing the current |
| 474 | contents to be discarded by natural eviction. The alternative, |
| 475 | namely reading every tag, setting up a mapping for the corresponding |
| 476 | page and doing an OCBP for the line, would be much more expensive. |
| 477 | */ |
| 478 | |
| 479 | sh64_dcache_purge_sets(0, cpu_data->dcache.sets); |
| 480 | |
| 481 | return; |
| 482 | |
| 483 | } |
| 484 | |
| 485 | /****************************************************************************/ |
| 486 | |
| 487 | static void sh64_dcache_purge_kernel_range(unsigned long start, unsigned long end) |
| 488 | { |
| 489 | /* Purge the range of addresses [start,end] from the D-cache. The |
| 490 | addresses lie in the superpage mapping. There's no harm if we |
| 491 | overpurge at either end - just a small performance loss. */ |
| 492 | unsigned long long ullend, addr, aligned_start; |
| 493 | #if (NEFF == 32) |
| 494 | aligned_start = (unsigned long long)(signed long long)(signed long) start; |
| 495 | #else |
| 496 | #error "NEFF != 32" |
| 497 | #endif |
| 498 | aligned_start &= L1_CACHE_ALIGN_MASK; |
| 499 | addr = aligned_start; |
| 500 | #if (NEFF == 32) |
| 501 | ullend = (unsigned long long) (signed long long) (signed long) end; |
| 502 | #else |
| 503 | #error "NEFF != 32" |
| 504 | #endif |
| 505 | while (addr <= ullend) { |
| 506 | asm __volatile__ ("ocbp %0, 0" : : "r" (addr)); |
| 507 | addr += L1_CACHE_BYTES; |
| 508 | } |
| 509 | return; |
| 510 | } |
| 511 | |
| 512 | /* Assumes this address (+ (2**n_synbits) pages up from it) aren't used for |
| 513 | anything else in the kernel */ |
| 514 | #define MAGIC_PAGE0_START 0xffffffffec000000ULL |
| 515 | |
| 516 | static void sh64_dcache_purge_coloured_phy_page(unsigned long paddr, unsigned long eaddr) |
| 517 | { |
| 518 | /* Purge the physical page 'paddr' from the cache. It's known that any |
| 519 | cache lines requiring attention have the same page colour as the the |
| 520 | address 'eaddr'. |
| 521 | |
| 522 | This relies on the fact that the D-cache matches on physical tags |
| 523 | when no virtual tag matches. So we create an alias for the original |
| 524 | page and purge through that. (Alternatively, we could have done |
| 525 | this by switching ASID to match the original mapping and purged |
| 526 | through that, but that involves ASID switching cost + probably a |
| 527 | TLBMISS + refill anyway.) |
| 528 | */ |
| 529 | |
| 530 | unsigned long long magic_page_start; |
| 531 | unsigned long long magic_eaddr, magic_eaddr_end; |
| 532 | |
| 533 | magic_page_start = MAGIC_PAGE0_START + (eaddr & CACHE_OC_SYN_MASK); |
| 534 | |
| 535 | /* As long as the kernel is not pre-emptible, this doesn't need to be |
| 536 | under cli/sti. */ |
| 537 | |
| 538 | sh64_setup_dtlb_cache_slot(magic_page_start, get_asid(), paddr); |
| 539 | |
| 540 | magic_eaddr = magic_page_start; |
| 541 | magic_eaddr_end = magic_eaddr + PAGE_SIZE; |
| 542 | while (magic_eaddr < magic_eaddr_end) { |
| 543 | /* Little point in unrolling this loop - the OCBPs are blocking |
| 544 | and won't go any quicker (i.e. the loop overhead is parallel |
| 545 | to part of the OCBP execution.) */ |
| 546 | asm __volatile__ ("ocbp %0, 0" : : "r" (magic_eaddr)); |
| 547 | magic_eaddr += L1_CACHE_BYTES; |
| 548 | } |
| 549 | |
| 550 | sh64_teardown_dtlb_cache_slot(); |
| 551 | } |
| 552 | |
| 553 | /****************************************************************************/ |
| 554 | |
| 555 | static void sh64_dcache_purge_phy_page(unsigned long paddr) |
| 556 | { |
| 557 | /* Pure a page given its physical start address, by creating a |
| 558 | temporary 1 page mapping and purging across that. Even if we know |
| 559 | the virtual address (& vma or mm) of the page, the method here is |
| 560 | more elegant because it avoids issues of coping with page faults on |
| 561 | the purge instructions (i.e. no special-case code required in the |
| 562 | critical path in the TLB miss handling). */ |
| 563 | |
| 564 | unsigned long long eaddr_start, eaddr, eaddr_end; |
| 565 | int i; |
| 566 | |
| 567 | /* As long as the kernel is not pre-emptible, this doesn't need to be |
| 568 | under cli/sti. */ |
| 569 | |
| 570 | eaddr_start = MAGIC_PAGE0_START; |
| 571 | for (i=0; i < (1 << CACHE_OC_N_SYNBITS); i++) { |
| 572 | sh64_setup_dtlb_cache_slot(eaddr_start, get_asid(), paddr); |
| 573 | |
| 574 | eaddr = eaddr_start; |
| 575 | eaddr_end = eaddr + PAGE_SIZE; |
| 576 | while (eaddr < eaddr_end) { |
| 577 | asm __volatile__ ("ocbp %0, 0" : : "r" (eaddr)); |
| 578 | eaddr += L1_CACHE_BYTES; |
| 579 | } |
| 580 | |
| 581 | sh64_teardown_dtlb_cache_slot(); |
| 582 | eaddr_start += PAGE_SIZE; |
| 583 | } |
| 584 | } |
| 585 | |
Hugh Dickins | 60ec558 | 2005-10-29 18:16:34 -0700 | [diff] [blame] | 586 | static void sh64_dcache_purge_user_pages(struct mm_struct *mm, |
| 587 | unsigned long addr, unsigned long end) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 588 | { |
| 589 | pgd_t *pgd; |
| 590 | pmd_t *pmd; |
| 591 | pte_t *pte; |
| 592 | pte_t entry; |
Hugh Dickins | 60ec558 | 2005-10-29 18:16:34 -0700 | [diff] [blame] | 593 | spinlock_t *ptl; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 594 | unsigned long paddr; |
| 595 | |
Hugh Dickins | 60ec558 | 2005-10-29 18:16:34 -0700 | [diff] [blame] | 596 | if (!mm) |
| 597 | return; /* No way to find physical address of page */ |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 598 | |
Hugh Dickins | 60ec558 | 2005-10-29 18:16:34 -0700 | [diff] [blame] | 599 | pgd = pgd_offset(mm, addr); |
| 600 | if (pgd_bad(*pgd)) |
| 601 | return; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 602 | |
Hugh Dickins | 60ec558 | 2005-10-29 18:16:34 -0700 | [diff] [blame] | 603 | pmd = pmd_offset(pgd, addr); |
| 604 | if (pmd_none(*pmd) || pmd_bad(*pmd)) |
| 605 | return; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 606 | |
Hugh Dickins | 60ec558 | 2005-10-29 18:16:34 -0700 | [diff] [blame] | 607 | pte = pte_offset_map_lock(mm, pmd, addr, &ptl); |
| 608 | do { |
| 609 | entry = *pte; |
| 610 | if (pte_none(entry) || !pte_present(entry)) |
| 611 | continue; |
| 612 | paddr = pte_val(entry) & PAGE_MASK; |
| 613 | sh64_dcache_purge_coloured_phy_page(paddr, addr); |
| 614 | } while (pte++, addr += PAGE_SIZE, addr != end); |
| 615 | pte_unmap_unlock(pte - 1, ptl); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 616 | } |
| 617 | /****************************************************************************/ |
| 618 | |
| 619 | static void sh64_dcache_purge_user_range(struct mm_struct *mm, |
| 620 | unsigned long start, unsigned long end) |
| 621 | { |
| 622 | /* There are at least 5 choices for the implementation of this, with |
| 623 | pros (+), cons(-), comments(*): |
| 624 | |
| 625 | 1. ocbp each line in the range through the original user's ASID |
| 626 | + no lines spuriously evicted |
| 627 | - tlbmiss handling (must either handle faults on demand => extra |
| 628 | special-case code in tlbmiss critical path), or map the page in |
| 629 | advance (=> flush_tlb_range in advance to avoid multiple hits) |
| 630 | - ASID switching |
| 631 | - expensive for large ranges |
| 632 | |
| 633 | 2. temporarily map each page in the range to a special effective |
| 634 | address and ocbp through the temporary mapping; relies on the |
| 635 | fact that SH-5 OCB* always do TLB lookup and match on ptags (they |
| 636 | never look at the etags) |
| 637 | + no spurious evictions |
| 638 | - expensive for large ranges |
| 639 | * surely cheaper than (1) |
| 640 | |
| 641 | 3. walk all the lines in the cache, check the tags, if a match |
| 642 | occurs create a page mapping to ocbp the line through |
| 643 | + no spurious evictions |
| 644 | - tag inspection overhead |
| 645 | - (especially for small ranges) |
| 646 | - potential cost of setting up/tearing down page mapping for |
| 647 | every line that matches the range |
| 648 | * cost partly independent of range size |
| 649 | |
| 650 | 4. walk all the lines in the cache, check the tags, if a match |
| 651 | occurs use 4 * alloco to purge the line (+3 other probably |
| 652 | innocent victims) by natural eviction |
| 653 | + no tlb mapping overheads |
| 654 | - spurious evictions |
| 655 | - tag inspection overhead |
| 656 | |
| 657 | 5. implement like flush_cache_all |
| 658 | + no tag inspection overhead |
| 659 | - spurious evictions |
| 660 | - bad for small ranges |
| 661 | |
| 662 | (1) can be ruled out as more expensive than (2). (2) appears best |
| 663 | for small ranges. The choice between (3), (4) and (5) for large |
| 664 | ranges and the range size for the large/small boundary need |
| 665 | benchmarking to determine. |
| 666 | |
| 667 | For now use approach (2) for small ranges and (5) for large ones. |
| 668 | |
| 669 | */ |
| 670 | |
| 671 | int n_pages; |
| 672 | |
| 673 | n_pages = ((end - start) >> PAGE_SHIFT); |
Hugh Dickins | 60ec558 | 2005-10-29 18:16:34 -0700 | [diff] [blame] | 674 | if (n_pages >= 64 || ((start ^ (end - 1)) & PMD_MASK)) { |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 675 | #if 1 |
| 676 | sh64_dcache_purge_all(); |
| 677 | #else |
| 678 | unsigned long long set, way; |
| 679 | unsigned long mm_asid = mm->context & MMU_CONTEXT_ASID_MASK; |
| 680 | for (set = 0; set < cpu_data->dcache.sets; set++) { |
| 681 | unsigned long long set_base_config_addr = CACHE_OC_ADDRESS_ARRAY + (set << cpu_data->dcache.set_shift); |
| 682 | for (way = 0; way < cpu_data->dcache.ways; way++) { |
| 683 | unsigned long long config_addr = set_base_config_addr + (way << cpu_data->dcache.way_step_shift); |
| 684 | unsigned long long tag0; |
| 685 | unsigned long line_valid; |
| 686 | |
| 687 | asm __volatile__("getcfg %1, 0, %0" : "=r" (tag0) : "r" (config_addr)); |
| 688 | line_valid = tag0 & SH_CACHE_VALID; |
| 689 | if (line_valid) { |
| 690 | unsigned long cache_asid; |
| 691 | unsigned long epn; |
| 692 | |
| 693 | cache_asid = (tag0 & cpu_data->dcache.asid_mask) >> cpu_data->dcache.asid_shift; |
| 694 | /* The next line needs some |
| 695 | explanation. The virtual tags |
| 696 | encode bits [31:13] of the virtual |
| 697 | address, bit [12] of the 'tag' being |
| 698 | implied by the cache set index. */ |
| 699 | epn = (tag0 & cpu_data->dcache.epn_mask) | ((set & 0x80) << cpu_data->dcache.entry_shift); |
| 700 | |
| 701 | if ((cache_asid == mm_asid) && (start <= epn) && (epn < end)) { |
| 702 | /* TODO : could optimise this |
| 703 | call by batching multiple |
| 704 | adjacent sets together. */ |
| 705 | sh64_dcache_purge_sets(set, 1); |
| 706 | break; /* Don't waste time inspecting other ways for this set */ |
| 707 | } |
| 708 | } |
| 709 | } |
| 710 | } |
| 711 | #endif |
| 712 | } else { |
Hugh Dickins | 60ec558 | 2005-10-29 18:16:34 -0700 | [diff] [blame] | 713 | /* Small range, covered by a single page table page */ |
| 714 | start &= PAGE_MASK; /* should already be so */ |
| 715 | end = PAGE_ALIGN(end); /* should already be so */ |
| 716 | sh64_dcache_purge_user_pages(mm, start, end); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 717 | } |
| 718 | return; |
| 719 | } |
| 720 | |
| 721 | static void sh64_dcache_wback_current_user_range(unsigned long start, unsigned long end) |
| 722 | { |
| 723 | unsigned long long aligned_start; |
| 724 | unsigned long long ull_end; |
| 725 | unsigned long long addr; |
| 726 | |
| 727 | ull_end = end; |
| 728 | |
| 729 | /* Just wback over the range using the natural addresses. TLB miss |
| 730 | handling will be OK (TBC) : the range has just been written to by |
| 731 | the signal frame setup code, so the PTEs must exist. |
| 732 | |
| 733 | Note, if we have CONFIG_PREEMPT and get preempted inside this loop, |
| 734 | it doesn't matter, even if the pid->ASID mapping changes whilst |
| 735 | we're away. In that case the cache will have been flushed when the |
| 736 | mapping was renewed. So the writebacks below will be nugatory (and |
| 737 | we'll doubtless have to fault the TLB entry/ies in again with the |
| 738 | new ASID), but it's a rare case. |
| 739 | */ |
| 740 | aligned_start = start & L1_CACHE_ALIGN_MASK; |
| 741 | addr = aligned_start; |
| 742 | while (addr < ull_end) { |
| 743 | asm __volatile__ ("ocbwb %0, 0" : : "r" (addr)); |
| 744 | addr += L1_CACHE_BYTES; |
| 745 | } |
| 746 | } |
| 747 | |
| 748 | /****************************************************************************/ |
| 749 | |
| 750 | /* These *MUST* lie in an area of virtual address space that's otherwise unused. */ |
| 751 | #define UNIQUE_EADDR_START 0xe0000000UL |
| 752 | #define UNIQUE_EADDR_END 0xe8000000UL |
| 753 | |
| 754 | static unsigned long sh64_make_unique_eaddr(unsigned long user_eaddr, unsigned long paddr) |
| 755 | { |
| 756 | /* Given a physical address paddr, and a user virtual address |
| 757 | user_eaddr which will eventually be mapped to it, create a one-off |
| 758 | kernel-private eaddr mapped to the same paddr. This is used for |
| 759 | creating special destination pages for copy_user_page and |
| 760 | clear_user_page */ |
| 761 | |
| 762 | static unsigned long current_pointer = UNIQUE_EADDR_START; |
| 763 | unsigned long coloured_pointer; |
| 764 | |
| 765 | if (current_pointer == UNIQUE_EADDR_END) { |
| 766 | sh64_dcache_purge_all(); |
| 767 | current_pointer = UNIQUE_EADDR_START; |
| 768 | } |
| 769 | |
| 770 | coloured_pointer = (current_pointer & ~CACHE_OC_SYN_MASK) | (user_eaddr & CACHE_OC_SYN_MASK); |
| 771 | sh64_setup_dtlb_cache_slot(coloured_pointer, get_asid(), paddr); |
| 772 | |
| 773 | current_pointer += (PAGE_SIZE << CACHE_OC_N_SYNBITS); |
| 774 | |
| 775 | return coloured_pointer; |
| 776 | } |
| 777 | |
| 778 | /****************************************************************************/ |
| 779 | |
| 780 | static void sh64_copy_user_page_coloured(void *to, void *from, unsigned long address) |
| 781 | { |
| 782 | void *coloured_to; |
| 783 | |
| 784 | /* Discard any existing cache entries of the wrong colour. These are |
| 785 | present quite often, if the kernel has recently used the page |
| 786 | internally, then given it up, then it's been allocated to the user. |
| 787 | */ |
| 788 | sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long) to); |
| 789 | |
| 790 | coloured_to = (void *) sh64_make_unique_eaddr(address, __pa(to)); |
| 791 | sh64_page_copy(from, coloured_to); |
| 792 | |
| 793 | sh64_teardown_dtlb_cache_slot(); |
| 794 | } |
| 795 | |
| 796 | static void sh64_clear_user_page_coloured(void *to, unsigned long address) |
| 797 | { |
| 798 | void *coloured_to; |
| 799 | |
| 800 | /* Discard any existing kernel-originated lines of the wrong colour (as |
| 801 | above) */ |
| 802 | sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long) to); |
| 803 | |
| 804 | coloured_to = (void *) sh64_make_unique_eaddr(address, __pa(to)); |
| 805 | sh64_page_clear(coloured_to); |
| 806 | |
| 807 | sh64_teardown_dtlb_cache_slot(); |
| 808 | } |
| 809 | |
| 810 | #endif /* !CONFIG_DCACHE_DISABLED */ |
| 811 | |
| 812 | /****************************************************************************/ |
| 813 | |
| 814 | /*########################################################################## |
| 815 | EXTERNALLY CALLABLE API. |
| 816 | ##########################################################################*/ |
| 817 | |
| 818 | /* These functions are described in Documentation/cachetlb.txt. |
| 819 | Each one of these functions varies in behaviour depending on whether the |
| 820 | I-cache and/or D-cache are configured out. |
| 821 | |
| 822 | Note that the Linux term 'flush' corresponds to what is termed 'purge' in |
| 823 | the sh/sh64 jargon for the D-cache, i.e. write back dirty data then |
| 824 | invalidate the cache lines, and 'invalidate' for the I-cache. |
| 825 | */ |
| 826 | |
| 827 | #undef FLUSH_TRACE |
| 828 | |
| 829 | void flush_cache_all(void) |
| 830 | { |
| 831 | /* Invalidate the entire contents of both caches, after writing back to |
| 832 | memory any dirty data from the D-cache. */ |
| 833 | sh64_dcache_purge_all(); |
| 834 | sh64_icache_inv_all(); |
| 835 | } |
| 836 | |
| 837 | /****************************************************************************/ |
| 838 | |
| 839 | void flush_cache_mm(struct mm_struct *mm) |
| 840 | { |
| 841 | /* Invalidate an entire user-address space from both caches, after |
| 842 | writing back dirty data (e.g. for shared mmap etc). */ |
| 843 | |
| 844 | /* This could be coded selectively by inspecting all the tags then |
| 845 | doing 4*alloco on any set containing a match (as for |
| 846 | flush_cache_range), but fork/exit/execve (where this is called from) |
| 847 | are expensive anyway. */ |
| 848 | |
| 849 | /* Have to do a purge here, despite the comments re I-cache below. |
| 850 | There could be odd-coloured dirty data associated with the mm still |
| 851 | in the cache - if this gets written out through natural eviction |
| 852 | after the kernel has reused the page there will be chaos. |
| 853 | */ |
| 854 | |
| 855 | sh64_dcache_purge_all(); |
| 856 | |
| 857 | /* The mm being torn down won't ever be active again, so any Icache |
| 858 | lines tagged with its ASID won't be visible for the rest of the |
| 859 | lifetime of this ASID cycle. Before the ASID gets reused, there |
| 860 | will be a flush_cache_all. Hence we don't need to touch the |
| 861 | I-cache. This is similar to the lack of action needed in |
| 862 | flush_tlb_mm - see fault.c. */ |
| 863 | } |
| 864 | |
| 865 | /****************************************************************************/ |
| 866 | |
| 867 | void flush_cache_range(struct vm_area_struct *vma, unsigned long start, |
| 868 | unsigned long end) |
| 869 | { |
| 870 | struct mm_struct *mm = vma->vm_mm; |
| 871 | |
| 872 | /* Invalidate (from both caches) the range [start,end) of virtual |
| 873 | addresses from the user address space specified by mm, after writing |
| 874 | back any dirty data. |
| 875 | |
Hugh Dickins | 60ec558 | 2005-10-29 18:16:34 -0700 | [diff] [blame] | 876 | Note, 'end' is 1 byte beyond the end of the range to flush. */ |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 877 | |
| 878 | sh64_dcache_purge_user_range(mm, start, end); |
| 879 | sh64_icache_inv_user_page_range(mm, start, end); |
| 880 | } |
| 881 | |
| 882 | /****************************************************************************/ |
| 883 | |
| 884 | void flush_cache_page(struct vm_area_struct *vma, unsigned long eaddr, unsigned long pfn) |
| 885 | { |
| 886 | /* Invalidate any entries in either cache for the vma within the user |
| 887 | address space vma->vm_mm for the page starting at virtual address |
| 888 | 'eaddr'. This seems to be used primarily in breaking COW. Note, |
| 889 | the I-cache must be searched too in case the page in question is |
| 890 | both writable and being executed from (e.g. stack trampolines.) |
| 891 | |
Hugh Dickins | 60ec558 | 2005-10-29 18:16:34 -0700 | [diff] [blame] | 892 | Note, this is called with pte lock held. |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 893 | */ |
| 894 | |
| 895 | sh64_dcache_purge_phy_page(pfn << PAGE_SHIFT); |
| 896 | |
| 897 | if (vma->vm_flags & VM_EXEC) { |
| 898 | sh64_icache_inv_user_page(vma, eaddr); |
| 899 | } |
| 900 | } |
| 901 | |
| 902 | /****************************************************************************/ |
| 903 | |
| 904 | #ifndef CONFIG_DCACHE_DISABLED |
| 905 | |
| 906 | void copy_user_page(void *to, void *from, unsigned long address, struct page *page) |
| 907 | { |
| 908 | /* 'from' and 'to' are kernel virtual addresses (within the superpage |
| 909 | mapping of the physical RAM). 'address' is the user virtual address |
| 910 | where the copy 'to' will be mapped after. This allows a custom |
| 911 | mapping to be used to ensure that the new copy is placed in the |
| 912 | right cache sets for the user to see it without having to bounce it |
| 913 | out via memory. Note however : the call to flush_page_to_ram in |
| 914 | (generic)/mm/memory.c:(break_cow) undoes all this good work in that one |
| 915 | very important case! |
| 916 | |
| 917 | TBD : can we guarantee that on every call, any cache entries for |
| 918 | 'from' are in the same colour sets as 'address' also? i.e. is this |
| 919 | always used just to deal with COW? (I suspect not). */ |
| 920 | |
| 921 | /* There are two possibilities here for when the page 'from' was last accessed: |
| 922 | * by the kernel : this is OK, no purge required. |
| 923 | * by the/a user (e.g. for break_COW) : need to purge. |
| 924 | |
| 925 | If the potential user mapping at 'address' is the same colour as |
| 926 | 'from' there is no need to purge any cache lines from the 'from' |
| 927 | page mapped into cache sets of colour 'address'. (The copy will be |
| 928 | accessing the page through 'from'). |
| 929 | */ |
| 930 | |
| 931 | if (((address ^ (unsigned long) from) & CACHE_OC_SYN_MASK) != 0) { |
| 932 | sh64_dcache_purge_coloured_phy_page(__pa(from), address); |
| 933 | } |
| 934 | |
| 935 | if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0) { |
| 936 | /* No synonym problem on destination */ |
| 937 | sh64_page_copy(from, to); |
| 938 | } else { |
| 939 | sh64_copy_user_page_coloured(to, from, address); |
| 940 | } |
| 941 | |
| 942 | /* Note, don't need to flush 'from' page from the cache again - it's |
| 943 | done anyway by the generic code */ |
| 944 | } |
| 945 | |
| 946 | void clear_user_page(void *to, unsigned long address, struct page *page) |
| 947 | { |
| 948 | /* 'to' is a kernel virtual address (within the superpage |
| 949 | mapping of the physical RAM). 'address' is the user virtual address |
| 950 | where the 'to' page will be mapped after. This allows a custom |
| 951 | mapping to be used to ensure that the new copy is placed in the |
| 952 | right cache sets for the user to see it without having to bounce it |
| 953 | out via memory. |
| 954 | */ |
| 955 | |
| 956 | if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0) { |
| 957 | /* No synonym problem on destination */ |
| 958 | sh64_page_clear(to); |
| 959 | } else { |
| 960 | sh64_clear_user_page_coloured(to, address); |
| 961 | } |
| 962 | } |
| 963 | |
| 964 | #endif /* !CONFIG_DCACHE_DISABLED */ |
| 965 | |
| 966 | /****************************************************************************/ |
| 967 | |
| 968 | void flush_dcache_page(struct page *page) |
| 969 | { |
| 970 | sh64_dcache_purge_phy_page(page_to_phys(page)); |
| 971 | wmb(); |
| 972 | } |
| 973 | |
| 974 | /****************************************************************************/ |
| 975 | |
| 976 | void flush_icache_range(unsigned long start, unsigned long end) |
| 977 | { |
| 978 | /* Flush the range [start,end] of kernel virtual adddress space from |
| 979 | the I-cache. The corresponding range must be purged from the |
| 980 | D-cache also because the SH-5 doesn't have cache snooping between |
| 981 | the caches. The addresses will be visible through the superpage |
| 982 | mapping, therefore it's guaranteed that there no cache entries for |
| 983 | the range in cache sets of the wrong colour. |
| 984 | |
| 985 | Primarily used for cohering the I-cache after a module has |
| 986 | been loaded. */ |
| 987 | |
| 988 | /* We also make sure to purge the same range from the D-cache since |
| 989 | flush_page_to_ram() won't be doing this for us! */ |
| 990 | |
| 991 | sh64_dcache_purge_kernel_range(start, end); |
| 992 | wmb(); |
| 993 | sh64_icache_inv_kernel_range(start, end); |
| 994 | } |
| 995 | |
| 996 | /****************************************************************************/ |
| 997 | |
| 998 | void flush_icache_user_range(struct vm_area_struct *vma, |
| 999 | struct page *page, unsigned long addr, int len) |
| 1000 | { |
| 1001 | /* Flush the range of user (defined by vma->vm_mm) address space |
| 1002 | starting at 'addr' for 'len' bytes from the cache. The range does |
| 1003 | not straddle a page boundary, the unique physical page containing |
| 1004 | the range is 'page'. This seems to be used mainly for invalidating |
| 1005 | an address range following a poke into the program text through the |
| 1006 | ptrace() call from another process (e.g. for BRK instruction |
| 1007 | insertion). */ |
| 1008 | |
| 1009 | sh64_dcache_purge_coloured_phy_page(page_to_phys(page), addr); |
| 1010 | mb(); |
| 1011 | |
| 1012 | if (vma->vm_flags & VM_EXEC) { |
| 1013 | sh64_icache_inv_user_small_range(vma->vm_mm, addr, len); |
| 1014 | } |
| 1015 | } |
| 1016 | |
| 1017 | /*########################################################################## |
| 1018 | ARCH/SH64 PRIVATE CALLABLE API. |
| 1019 | ##########################################################################*/ |
| 1020 | |
| 1021 | void flush_cache_sigtramp(unsigned long start, unsigned long end) |
| 1022 | { |
| 1023 | /* For the address range [start,end), write back the data from the |
| 1024 | D-cache and invalidate the corresponding region of the I-cache for |
| 1025 | the current process. Used to flush signal trampolines on the stack |
| 1026 | to make them executable. */ |
| 1027 | |
| 1028 | sh64_dcache_wback_current_user_range(start, end); |
| 1029 | wmb(); |
| 1030 | sh64_icache_inv_current_user_range(start, end); |
| 1031 | } |
| 1032 | |