Chris Metcalf | 867e359 | 2010-05-28 23:09:12 -0400 | [diff] [blame] | 1 | /* |
| 2 | * Copyright 2010 Tilera Corporation. All Rights Reserved. |
| 3 | * |
| 4 | * This program is free software; you can redistribute it and/or |
| 5 | * modify it under the terms of the GNU General Public License |
| 6 | * as published by the Free Software Foundation, version 2. |
| 7 | * |
| 8 | * This program is distributed in the hope that it will be useful, but |
| 9 | * WITHOUT ANY WARRANTY; without even the implied warranty of |
| 10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or |
| 11 | * NON INFRINGEMENT. See the GNU General Public License for |
| 12 | * more details. |
| 13 | */ |
| 14 | |
| 15 | #include <linux/string.h> |
| 16 | #include <linux/smp.h> |
| 17 | #include <linux/module.h> |
| 18 | #include <linux/uaccess.h> |
| 19 | #include <asm/fixmap.h> |
| 20 | #include <asm/kmap_types.h> |
| 21 | #include <asm/tlbflush.h> |
| 22 | #include <hv/hypervisor.h> |
| 23 | #include <arch/chip.h> |
| 24 | |
| 25 | |
| 26 | #if !CHIP_HAS_COHERENT_LOCAL_CACHE() |
| 27 | |
| 28 | /* Defined in memcpy.S */ |
| 29 | extern unsigned long __memcpy_asm(void *to, const void *from, unsigned long n); |
| 30 | extern unsigned long __copy_to_user_inatomic_asm( |
| 31 | void __user *to, const void *from, unsigned long n); |
| 32 | extern unsigned long __copy_from_user_inatomic_asm( |
| 33 | void *to, const void __user *from, unsigned long n); |
| 34 | extern unsigned long __copy_from_user_zeroing_asm( |
| 35 | void *to, const void __user *from, unsigned long n); |
| 36 | |
| 37 | typedef unsigned long (*memcpy_t)(void *, const void *, unsigned long); |
| 38 | |
| 39 | /* Size above which to consider TLB games for performance */ |
| 40 | #define LARGE_COPY_CUTOFF 2048 |
| 41 | |
| 42 | /* Communicate to the simulator what we are trying to do. */ |
| 43 | #define sim_allow_multiple_caching(b) \ |
| 44 | __insn_mtspr(SPR_SIM_CONTROL, \ |
| 45 | SIM_CONTROL_ALLOW_MULTIPLE_CACHING | ((b) << _SIM_CONTROL_OPERATOR_BITS)) |
| 46 | |
| 47 | /* |
| 48 | * Copy memory by briefly enabling incoherent cacheline-at-a-time mode. |
| 49 | * |
| 50 | * We set up our own source and destination PTEs that we fully control. |
| 51 | * This is the only way to guarantee that we don't race with another |
| 52 | * thread that is modifying the PTE; we can't afford to try the |
| 53 | * copy_{to,from}_user() technique of catching the interrupt, since |
| 54 | * we must run with interrupts disabled to avoid the risk of some |
| 55 | * other code seeing the incoherent data in our cache. (Recall that |
| 56 | * our cache is indexed by PA, so even if the other code doesn't use |
Chris Metcalf | 38a6f42 | 2010-11-01 15:21:35 -0400 | [diff] [blame] | 57 | * our kmap_atomic virtual addresses, they'll still hit in cache using |
Chris Metcalf | 867e359 | 2010-05-28 23:09:12 -0400 | [diff] [blame] | 58 | * the normal VAs that aren't supposed to hit in cache.) |
| 59 | */ |
| 60 | static void memcpy_multicache(void *dest, const void *source, |
| 61 | pte_t dst_pte, pte_t src_pte, int len) |
| 62 | { |
Chris Metcalf | 0707ad3 | 2010-06-25 17:04:17 -0400 | [diff] [blame] | 63 | int idx; |
| 64 | unsigned long flags, newsrc, newdst; |
Chris Metcalf | 867e359 | 2010-05-28 23:09:12 -0400 | [diff] [blame] | 65 | pmd_t *pmdp; |
| 66 | pte_t *ptep; |
Chris Metcalf | 38a6f42 | 2010-11-01 15:21:35 -0400 | [diff] [blame] | 67 | int type0, type1; |
Chris Metcalf | 867e359 | 2010-05-28 23:09:12 -0400 | [diff] [blame] | 68 | int cpu = get_cpu(); |
| 69 | |
| 70 | /* |
| 71 | * Disable interrupts so that we don't recurse into memcpy() |
| 72 | * in an interrupt handler, nor accidentally reference |
| 73 | * the PA of the source from an interrupt routine. Also |
| 74 | * notify the simulator that we're playing games so we don't |
| 75 | * generate spurious coherency warnings. |
| 76 | */ |
| 77 | local_irq_save(flags); |
| 78 | sim_allow_multiple_caching(1); |
| 79 | |
| 80 | /* Set up the new dest mapping */ |
Chris Metcalf | 38a6f42 | 2010-11-01 15:21:35 -0400 | [diff] [blame] | 81 | type0 = kmap_atomic_idx_push(); |
| 82 | idx = FIX_KMAP_BEGIN + (KM_TYPE_NR * cpu) + type0; |
Chris Metcalf | 867e359 | 2010-05-28 23:09:12 -0400 | [diff] [blame] | 83 | newdst = __fix_to_virt(idx) + ((unsigned long)dest & (PAGE_SIZE-1)); |
| 84 | pmdp = pmd_offset(pud_offset(pgd_offset_k(newdst), newdst), newdst); |
| 85 | ptep = pte_offset_kernel(pmdp, newdst); |
| 86 | if (pte_val(*ptep) != pte_val(dst_pte)) { |
| 87 | set_pte(ptep, dst_pte); |
| 88 | local_flush_tlb_page(NULL, newdst, PAGE_SIZE); |
| 89 | } |
| 90 | |
| 91 | /* Set up the new source mapping */ |
Chris Metcalf | 38a6f42 | 2010-11-01 15:21:35 -0400 | [diff] [blame] | 92 | type1 = kmap_atomic_idx_push(); |
| 93 | idx += (type0 - type1); |
Chris Metcalf | 867e359 | 2010-05-28 23:09:12 -0400 | [diff] [blame] | 94 | src_pte = hv_pte_set_nc(src_pte); |
| 95 | src_pte = hv_pte_clear_writable(src_pte); /* be paranoid */ |
| 96 | newsrc = __fix_to_virt(idx) + ((unsigned long)source & (PAGE_SIZE-1)); |
| 97 | pmdp = pmd_offset(pud_offset(pgd_offset_k(newsrc), newsrc), newsrc); |
| 98 | ptep = pte_offset_kernel(pmdp, newsrc); |
| 99 | *ptep = src_pte; /* set_pte() would be confused by this */ |
| 100 | local_flush_tlb_page(NULL, newsrc, PAGE_SIZE); |
| 101 | |
| 102 | /* Actually move the data. */ |
| 103 | __memcpy_asm((void *)newdst, (const void *)newsrc, len); |
| 104 | |
| 105 | /* |
| 106 | * Remap the source as locally-cached and not OLOC'ed so that |
| 107 | * we can inval without also invaling the remote cpu's cache. |
| 108 | * This also avoids known errata with inv'ing cacheable oloc data. |
| 109 | */ |
| 110 | src_pte = hv_pte_set_mode(src_pte, HV_PTE_MODE_CACHE_NO_L3); |
| 111 | src_pte = hv_pte_set_writable(src_pte); /* need write access for inv */ |
| 112 | *ptep = src_pte; /* set_pte() would be confused by this */ |
| 113 | local_flush_tlb_page(NULL, newsrc, PAGE_SIZE); |
| 114 | |
| 115 | /* |
| 116 | * Do the actual invalidation, covering the full L2 cache line |
| 117 | * at the end since __memcpy_asm() is somewhat aggressive. |
| 118 | */ |
| 119 | __inv_buffer((void *)newsrc, len); |
| 120 | |
| 121 | /* |
| 122 | * We're done: notify the simulator that all is back to normal, |
| 123 | * and re-enable interrupts and pre-emption. |
| 124 | */ |
Chris Metcalf | 38a6f42 | 2010-11-01 15:21:35 -0400 | [diff] [blame] | 125 | kmap_atomic_idx_pop(); |
| 126 | kmap_atomic_idx_pop(); |
Chris Metcalf | 867e359 | 2010-05-28 23:09:12 -0400 | [diff] [blame] | 127 | sim_allow_multiple_caching(0); |
| 128 | local_irq_restore(flags); |
Chris Metcalf | 0707ad3 | 2010-06-25 17:04:17 -0400 | [diff] [blame] | 129 | put_cpu(); |
Chris Metcalf | 867e359 | 2010-05-28 23:09:12 -0400 | [diff] [blame] | 130 | } |
| 131 | |
| 132 | /* |
| 133 | * Identify large copies from remotely-cached memory, and copy them |
| 134 | * via memcpy_multicache() if they look good, otherwise fall back |
| 135 | * to the particular kind of copying passed as the memcpy_t function. |
| 136 | */ |
| 137 | static unsigned long fast_copy(void *dest, const void *source, int len, |
| 138 | memcpy_t func) |
| 139 | { |
| 140 | /* |
| 141 | * Check if it's big enough to bother with. We may end up doing a |
| 142 | * small copy via TLB manipulation if we're near a page boundary, |
| 143 | * but presumably we'll make it up when we hit the second page. |
| 144 | */ |
| 145 | while (len >= LARGE_COPY_CUTOFF) { |
| 146 | int copy_size, bytes_left_on_page; |
| 147 | pte_t *src_ptep, *dst_ptep; |
| 148 | pte_t src_pte, dst_pte; |
| 149 | struct page *src_page, *dst_page; |
| 150 | |
| 151 | /* Is the source page oloc'ed to a remote cpu? */ |
| 152 | retry_source: |
| 153 | src_ptep = virt_to_pte(current->mm, (unsigned long)source); |
| 154 | if (src_ptep == NULL) |
| 155 | break; |
| 156 | src_pte = *src_ptep; |
| 157 | if (!hv_pte_get_present(src_pte) || |
| 158 | !hv_pte_get_readable(src_pte) || |
| 159 | hv_pte_get_mode(src_pte) != HV_PTE_MODE_CACHE_TILE_L3) |
| 160 | break; |
| 161 | if (get_remote_cache_cpu(src_pte) == smp_processor_id()) |
| 162 | break; |
| 163 | src_page = pfn_to_page(hv_pte_get_pfn(src_pte)); |
| 164 | get_page(src_page); |
| 165 | if (pte_val(src_pte) != pte_val(*src_ptep)) { |
| 166 | put_page(src_page); |
| 167 | goto retry_source; |
| 168 | } |
| 169 | if (pte_huge(src_pte)) { |
| 170 | /* Adjust the PTE to correspond to a small page */ |
| 171 | int pfn = hv_pte_get_pfn(src_pte); |
| 172 | pfn += (((unsigned long)source & (HPAGE_SIZE-1)) |
| 173 | >> PAGE_SHIFT); |
| 174 | src_pte = pfn_pte(pfn, src_pte); |
| 175 | src_pte = pte_mksmall(src_pte); |
| 176 | } |
| 177 | |
| 178 | /* Is the destination page writable? */ |
| 179 | retry_dest: |
| 180 | dst_ptep = virt_to_pte(current->mm, (unsigned long)dest); |
| 181 | if (dst_ptep == NULL) { |
| 182 | put_page(src_page); |
| 183 | break; |
| 184 | } |
| 185 | dst_pte = *dst_ptep; |
| 186 | if (!hv_pte_get_present(dst_pte) || |
| 187 | !hv_pte_get_writable(dst_pte)) { |
| 188 | put_page(src_page); |
| 189 | break; |
| 190 | } |
| 191 | dst_page = pfn_to_page(hv_pte_get_pfn(dst_pte)); |
| 192 | if (dst_page == src_page) { |
| 193 | /* |
| 194 | * Source and dest are on the same page; this |
| 195 | * potentially exposes us to incoherence if any |
| 196 | * part of src and dest overlap on a cache line. |
| 197 | * Just give up rather than trying to be precise. |
| 198 | */ |
| 199 | put_page(src_page); |
| 200 | break; |
| 201 | } |
| 202 | get_page(dst_page); |
| 203 | if (pte_val(dst_pte) != pte_val(*dst_ptep)) { |
| 204 | put_page(dst_page); |
| 205 | goto retry_dest; |
| 206 | } |
| 207 | if (pte_huge(dst_pte)) { |
| 208 | /* Adjust the PTE to correspond to a small page */ |
| 209 | int pfn = hv_pte_get_pfn(dst_pte); |
| 210 | pfn += (((unsigned long)dest & (HPAGE_SIZE-1)) |
| 211 | >> PAGE_SHIFT); |
| 212 | dst_pte = pfn_pte(pfn, dst_pte); |
| 213 | dst_pte = pte_mksmall(dst_pte); |
| 214 | } |
| 215 | |
| 216 | /* All looks good: create a cachable PTE and copy from it */ |
| 217 | copy_size = len; |
| 218 | bytes_left_on_page = |
| 219 | PAGE_SIZE - (((int)source) & (PAGE_SIZE-1)); |
| 220 | if (copy_size > bytes_left_on_page) |
| 221 | copy_size = bytes_left_on_page; |
| 222 | bytes_left_on_page = |
| 223 | PAGE_SIZE - (((int)dest) & (PAGE_SIZE-1)); |
| 224 | if (copy_size > bytes_left_on_page) |
| 225 | copy_size = bytes_left_on_page; |
| 226 | memcpy_multicache(dest, source, dst_pte, src_pte, copy_size); |
| 227 | |
| 228 | /* Release the pages */ |
| 229 | put_page(dst_page); |
| 230 | put_page(src_page); |
| 231 | |
| 232 | /* Continue on the next page */ |
| 233 | dest += copy_size; |
| 234 | source += copy_size; |
| 235 | len -= copy_size; |
| 236 | } |
| 237 | |
| 238 | return func(dest, source, len); |
| 239 | } |
| 240 | |
| 241 | void *memcpy(void *to, const void *from, __kernel_size_t n) |
| 242 | { |
| 243 | if (n < LARGE_COPY_CUTOFF) |
| 244 | return (void *)__memcpy_asm(to, from, n); |
| 245 | else |
| 246 | return (void *)fast_copy(to, from, n, __memcpy_asm); |
| 247 | } |
| 248 | |
| 249 | unsigned long __copy_to_user_inatomic(void __user *to, const void *from, |
| 250 | unsigned long n) |
| 251 | { |
| 252 | if (n < LARGE_COPY_CUTOFF) |
| 253 | return __copy_to_user_inatomic_asm(to, from, n); |
| 254 | else |
| 255 | return fast_copy(to, from, n, __copy_to_user_inatomic_asm); |
| 256 | } |
| 257 | |
| 258 | unsigned long __copy_from_user_inatomic(void *to, const void __user *from, |
| 259 | unsigned long n) |
| 260 | { |
| 261 | if (n < LARGE_COPY_CUTOFF) |
| 262 | return __copy_from_user_inatomic_asm(to, from, n); |
| 263 | else |
| 264 | return fast_copy(to, from, n, __copy_from_user_inatomic_asm); |
| 265 | } |
| 266 | |
| 267 | unsigned long __copy_from_user_zeroing(void *to, const void __user *from, |
| 268 | unsigned long n) |
| 269 | { |
| 270 | if (n < LARGE_COPY_CUTOFF) |
| 271 | return __copy_from_user_zeroing_asm(to, from, n); |
| 272 | else |
| 273 | return fast_copy(to, from, n, __copy_from_user_zeroing_asm); |
| 274 | } |
| 275 | |
| 276 | #endif /* !CHIP_HAS_COHERENT_LOCAL_CACHE() */ |