Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* |
| 2 | ** Tablewalk MMU emulator |
| 3 | ** |
| 4 | ** by Toshiyasu Morita |
| 5 | ** |
| 6 | ** Started 1/16/98 @ 2:22 am |
| 7 | */ |
| 8 | |
| 9 | #include <linux/mman.h> |
| 10 | #include <linux/mm.h> |
| 11 | #include <linux/kernel.h> |
| 12 | #include <linux/ptrace.h> |
| 13 | #include <linux/delay.h> |
| 14 | #include <linux/bootmem.h> |
| 15 | #include <linux/bitops.h> |
| 16 | #include <linux/module.h> |
| 17 | |
| 18 | #include <asm/setup.h> |
| 19 | #include <asm/traps.h> |
| 20 | #include <asm/system.h> |
| 21 | #include <asm/uaccess.h> |
| 22 | #include <asm/page.h> |
| 23 | #include <asm/pgtable.h> |
| 24 | #include <asm/sun3mmu.h> |
| 25 | #include <asm/segment.h> |
| 26 | #include <asm/oplib.h> |
| 27 | #include <asm/mmu_context.h> |
| 28 | #include <asm/dvma.h> |
| 29 | |
| 30 | extern void prom_reboot (char *) __attribute__ ((__noreturn__)); |
| 31 | |
| 32 | #undef DEBUG_MMU_EMU |
| 33 | #define DEBUG_PROM_MAPS |
| 34 | |
| 35 | /* |
| 36 | ** Defines |
| 37 | */ |
| 38 | |
| 39 | #define CONTEXTS_NUM 8 |
| 40 | #define SEGMAPS_PER_CONTEXT_NUM 2048 |
| 41 | #define PAGES_PER_SEGMENT 16 |
| 42 | #define PMEGS_NUM 256 |
| 43 | #define PMEG_MASK 0xFF |
| 44 | |
| 45 | /* |
| 46 | ** Globals |
| 47 | */ |
| 48 | |
| 49 | unsigned long vmalloc_end; |
| 50 | EXPORT_SYMBOL(vmalloc_end); |
| 51 | |
| 52 | unsigned long pmeg_vaddr[PMEGS_NUM]; |
| 53 | unsigned char pmeg_alloc[PMEGS_NUM]; |
| 54 | unsigned char pmeg_ctx[PMEGS_NUM]; |
| 55 | |
| 56 | /* pointers to the mm structs for each task in each |
| 57 | context. 0xffffffff is a marker for kernel context */ |
| 58 | struct mm_struct *ctx_alloc[CONTEXTS_NUM] = { |
| 59 | [0] = (struct mm_struct *)0xffffffff |
| 60 | }; |
| 61 | |
| 62 | /* has this context been mmdrop'd? */ |
| 63 | static unsigned char ctx_avail = CONTEXTS_NUM-1; |
| 64 | |
| 65 | /* array of pages to be marked off for the rom when we do mem_init later */ |
| 66 | /* 256 pages lets the rom take up to 2mb of physical ram.. I really |
| 67 | hope it never wants mote than that. */ |
| 68 | unsigned long rom_pages[256]; |
| 69 | |
| 70 | /* Print a PTE value in symbolic form. For debugging. */ |
| 71 | void print_pte (pte_t pte) |
| 72 | { |
| 73 | #if 0 |
| 74 | /* Verbose version. */ |
| 75 | unsigned long val = pte_val (pte); |
| 76 | printk (" pte=%lx [addr=%lx", |
| 77 | val, (val & SUN3_PAGE_PGNUM_MASK) << PAGE_SHIFT); |
| 78 | if (val & SUN3_PAGE_VALID) printk (" valid"); |
| 79 | if (val & SUN3_PAGE_WRITEABLE) printk (" write"); |
| 80 | if (val & SUN3_PAGE_SYSTEM) printk (" sys"); |
| 81 | if (val & SUN3_PAGE_NOCACHE) printk (" nocache"); |
| 82 | if (val & SUN3_PAGE_ACCESSED) printk (" accessed"); |
| 83 | if (val & SUN3_PAGE_MODIFIED) printk (" modified"); |
| 84 | switch (val & SUN3_PAGE_TYPE_MASK) { |
| 85 | case SUN3_PAGE_TYPE_MEMORY: printk (" memory"); break; |
| 86 | case SUN3_PAGE_TYPE_IO: printk (" io"); break; |
| 87 | case SUN3_PAGE_TYPE_VME16: printk (" vme16"); break; |
| 88 | case SUN3_PAGE_TYPE_VME32: printk (" vme32"); break; |
| 89 | } |
| 90 | printk ("]\n"); |
| 91 | #else |
| 92 | /* Terse version. More likely to fit on a line. */ |
| 93 | unsigned long val = pte_val (pte); |
| 94 | char flags[7], *type; |
| 95 | |
| 96 | flags[0] = (val & SUN3_PAGE_VALID) ? 'v' : '-'; |
| 97 | flags[1] = (val & SUN3_PAGE_WRITEABLE) ? 'w' : '-'; |
| 98 | flags[2] = (val & SUN3_PAGE_SYSTEM) ? 's' : '-'; |
| 99 | flags[3] = (val & SUN3_PAGE_NOCACHE) ? 'x' : '-'; |
| 100 | flags[4] = (val & SUN3_PAGE_ACCESSED) ? 'a' : '-'; |
| 101 | flags[5] = (val & SUN3_PAGE_MODIFIED) ? 'm' : '-'; |
| 102 | flags[6] = '\0'; |
| 103 | |
| 104 | switch (val & SUN3_PAGE_TYPE_MASK) { |
| 105 | case SUN3_PAGE_TYPE_MEMORY: type = "memory"; break; |
| 106 | case SUN3_PAGE_TYPE_IO: type = "io" ; break; |
| 107 | case SUN3_PAGE_TYPE_VME16: type = "vme16" ; break; |
| 108 | case SUN3_PAGE_TYPE_VME32: type = "vme32" ; break; |
| 109 | default: type = "unknown?"; break; |
| 110 | } |
| 111 | |
| 112 | printk (" pte=%08lx [%07lx %s %s]\n", |
| 113 | val, (val & SUN3_PAGE_PGNUM_MASK) << PAGE_SHIFT, flags, type); |
| 114 | #endif |
| 115 | } |
| 116 | |
| 117 | /* Print the PTE value for a given virtual address. For debugging. */ |
| 118 | void print_pte_vaddr (unsigned long vaddr) |
| 119 | { |
| 120 | printk (" vaddr=%lx [%02lx]", vaddr, sun3_get_segmap (vaddr)); |
| 121 | print_pte (__pte (sun3_get_pte (vaddr))); |
| 122 | } |
| 123 | |
| 124 | /* |
| 125 | * Initialise the MMU emulator. |
| 126 | */ |
| 127 | void mmu_emu_init(unsigned long bootmem_end) |
| 128 | { |
| 129 | unsigned long seg, num; |
| 130 | int i,j; |
| 131 | |
| 132 | memset(rom_pages, 0, sizeof(rom_pages)); |
| 133 | memset(pmeg_vaddr, 0, sizeof(pmeg_vaddr)); |
| 134 | memset(pmeg_alloc, 0, sizeof(pmeg_alloc)); |
| 135 | memset(pmeg_ctx, 0, sizeof(pmeg_ctx)); |
| 136 | |
| 137 | /* pmeg align the end of bootmem, adding another pmeg, |
| 138 | * later bootmem allocations will likely need it */ |
| 139 | bootmem_end = (bootmem_end + (2 * SUN3_PMEG_SIZE)) & ~SUN3_PMEG_MASK; |
| 140 | |
| 141 | /* mark all of the pmegs used thus far as reserved */ |
| 142 | for (i=0; i < __pa(bootmem_end) / SUN3_PMEG_SIZE ; ++i) |
| 143 | pmeg_alloc[i] = 2; |
| 144 | |
| 145 | |
| 146 | /* I'm thinking that most of the top pmeg's are going to be |
| 147 | used for something, and we probably shouldn't risk it */ |
| 148 | for(num = 0xf0; num <= 0xff; num++) |
| 149 | pmeg_alloc[num] = 2; |
| 150 | |
| 151 | /* liberate all existing mappings in the rest of kernel space */ |
| 152 | for(seg = bootmem_end; seg < 0x0f800000; seg += SUN3_PMEG_SIZE) { |
| 153 | i = sun3_get_segmap(seg); |
| 154 | |
| 155 | if(!pmeg_alloc[i]) { |
| 156 | #ifdef DEBUG_MMU_EMU |
| 157 | printk("freed: "); |
| 158 | print_pte_vaddr (seg); |
| 159 | #endif |
| 160 | sun3_put_segmap(seg, SUN3_INVALID_PMEG); |
| 161 | } |
| 162 | } |
| 163 | |
| 164 | j = 0; |
| 165 | for (num=0, seg=0x0F800000; seg<0x10000000; seg+=16*PAGE_SIZE) { |
| 166 | if (sun3_get_segmap (seg) != SUN3_INVALID_PMEG) { |
| 167 | #ifdef DEBUG_PROM_MAPS |
| 168 | for(i = 0; i < 16; i++) { |
| 169 | printk ("mapped:"); |
| 170 | print_pte_vaddr (seg + (i*PAGE_SIZE)); |
| 171 | break; |
| 172 | } |
| 173 | #endif |
| 174 | // the lowest mapping here is the end of our |
| 175 | // vmalloc region |
| 176 | if(!vmalloc_end) |
| 177 | vmalloc_end = seg; |
| 178 | |
| 179 | // mark the segmap alloc'd, and reserve any |
| 180 | // of the first 0xbff pages the hardware is |
| 181 | // already using... does any sun3 support > 24mb? |
| 182 | pmeg_alloc[sun3_get_segmap(seg)] = 2; |
| 183 | } |
| 184 | } |
| 185 | |
| 186 | dvma_init(); |
| 187 | |
| 188 | |
| 189 | /* blank everything below the kernel, and we've got the base |
| 190 | mapping to start all the contexts off with... */ |
| 191 | for(seg = 0; seg < PAGE_OFFSET; seg += SUN3_PMEG_SIZE) |
| 192 | sun3_put_segmap(seg, SUN3_INVALID_PMEG); |
| 193 | |
| 194 | set_fs(MAKE_MM_SEG(3)); |
| 195 | for(seg = 0; seg < 0x10000000; seg += SUN3_PMEG_SIZE) { |
| 196 | i = sun3_get_segmap(seg); |
| 197 | for(j = 1; j < CONTEXTS_NUM; j++) |
| 198 | (*(romvec->pv_setctxt))(j, (void *)seg, i); |
| 199 | } |
| 200 | set_fs(KERNEL_DS); |
| 201 | |
| 202 | } |
| 203 | |
| 204 | /* erase the mappings for a dead context. Uses the pg_dir for hints |
| 205 | as the pmeg tables proved somewhat unreliable, and unmapping all of |
| 206 | TASK_SIZE was much slower and no more stable. */ |
| 207 | /* todo: find a better way to keep track of the pmegs used by a |
| 208 | context for when they're cleared */ |
| 209 | void clear_context(unsigned long context) |
| 210 | { |
| 211 | unsigned char oldctx; |
| 212 | unsigned long i; |
| 213 | |
| 214 | if(context) { |
| 215 | if(!ctx_alloc[context]) |
| 216 | panic("clear_context: context not allocated\n"); |
| 217 | |
| 218 | ctx_alloc[context]->context = SUN3_INVALID_CONTEXT; |
| 219 | ctx_alloc[context] = (struct mm_struct *)0; |
| 220 | ctx_avail++; |
| 221 | } |
| 222 | |
| 223 | oldctx = sun3_get_context(); |
| 224 | |
| 225 | sun3_put_context(context); |
| 226 | |
| 227 | for(i = 0; i < SUN3_INVALID_PMEG; i++) { |
| 228 | if((pmeg_ctx[i] == context) && (pmeg_alloc[i] == 1)) { |
| 229 | sun3_put_segmap(pmeg_vaddr[i], SUN3_INVALID_PMEG); |
| 230 | pmeg_ctx[i] = 0; |
| 231 | pmeg_alloc[i] = 0; |
| 232 | pmeg_vaddr[i] = 0; |
| 233 | } |
| 234 | } |
| 235 | |
| 236 | sun3_put_context(oldctx); |
| 237 | } |
| 238 | |
| 239 | /* gets an empty context. if full, kills the next context listed to |
| 240 | die first */ |
| 241 | /* This context invalidation scheme is, well, totally arbitrary, I'm |
| 242 | sure it could be much more intellegent... but it gets the job done |
| 243 | for now without much overhead in making it's decision. */ |
| 244 | /* todo: come up with optimized scheme for flushing contexts */ |
| 245 | unsigned long get_free_context(struct mm_struct *mm) |
| 246 | { |
| 247 | unsigned long new = 1; |
| 248 | static unsigned char next_to_die = 1; |
| 249 | |
| 250 | if(!ctx_avail) { |
| 251 | /* kill someone to get our context */ |
| 252 | new = next_to_die; |
| 253 | clear_context(new); |
| 254 | next_to_die = (next_to_die + 1) & 0x7; |
| 255 | if(!next_to_die) |
| 256 | next_to_die++; |
| 257 | } else { |
| 258 | while(new < CONTEXTS_NUM) { |
| 259 | if(ctx_alloc[new]) |
| 260 | new++; |
| 261 | else |
| 262 | break; |
| 263 | } |
| 264 | // check to make sure one was really free... |
| 265 | if(new == CONTEXTS_NUM) |
| 266 | panic("get_free_context: failed to find free context"); |
| 267 | } |
| 268 | |
| 269 | ctx_alloc[new] = mm; |
| 270 | ctx_avail--; |
| 271 | |
| 272 | return new; |
| 273 | } |
| 274 | |
| 275 | /* |
| 276 | * Dynamically select a `spare' PMEG and use it to map virtual `vaddr' in |
| 277 | * `context'. Maintain internal PMEG management structures. This doesn't |
| 278 | * actually map the physical address, but does clear the old mappings. |
| 279 | */ |
| 280 | //todo: better allocation scheme? but is extra complexity worthwhile? |
| 281 | //todo: only clear old entries if necessary? how to tell? |
| 282 | |
| 283 | inline void mmu_emu_map_pmeg (int context, int vaddr) |
| 284 | { |
| 285 | static unsigned char curr_pmeg = 128; |
| 286 | int i; |
| 287 | |
| 288 | /* Round address to PMEG boundary. */ |
| 289 | vaddr &= ~SUN3_PMEG_MASK; |
| 290 | |
| 291 | /* Find a spare one. */ |
| 292 | while (pmeg_alloc[curr_pmeg] == 2) |
| 293 | ++curr_pmeg; |
| 294 | |
| 295 | |
| 296 | #ifdef DEBUG_MMU_EMU |
| 297 | printk("mmu_emu_map_pmeg: pmeg %x to context %d vaddr %x\n", |
| 298 | curr_pmeg, context, vaddr); |
| 299 | #endif |
| 300 | |
| 301 | /* Invalidate old mapping for the pmeg, if any */ |
| 302 | if (pmeg_alloc[curr_pmeg] == 1) { |
| 303 | sun3_put_context(pmeg_ctx[curr_pmeg]); |
| 304 | sun3_put_segmap (pmeg_vaddr[curr_pmeg], SUN3_INVALID_PMEG); |
| 305 | sun3_put_context(context); |
| 306 | } |
| 307 | |
| 308 | /* Update PMEG management structures. */ |
| 309 | // don't take pmeg's away from the kernel... |
| 310 | if(vaddr >= PAGE_OFFSET) { |
| 311 | /* map kernel pmegs into all contexts */ |
| 312 | unsigned char i; |
| 313 | |
| 314 | for(i = 0; i < CONTEXTS_NUM; i++) { |
| 315 | sun3_put_context(i); |
| 316 | sun3_put_segmap (vaddr, curr_pmeg); |
| 317 | } |
| 318 | sun3_put_context(context); |
| 319 | pmeg_alloc[curr_pmeg] = 2; |
| 320 | pmeg_ctx[curr_pmeg] = 0; |
| 321 | |
| 322 | } |
| 323 | else { |
| 324 | pmeg_alloc[curr_pmeg] = 1; |
| 325 | pmeg_ctx[curr_pmeg] = context; |
| 326 | sun3_put_segmap (vaddr, curr_pmeg); |
| 327 | |
| 328 | } |
| 329 | pmeg_vaddr[curr_pmeg] = vaddr; |
| 330 | |
| 331 | /* Set hardware mapping and clear the old PTE entries. */ |
| 332 | for (i=0; i<SUN3_PMEG_SIZE; i+=SUN3_PTE_SIZE) |
| 333 | sun3_put_pte (vaddr + i, SUN3_PAGE_SYSTEM); |
| 334 | |
| 335 | /* Consider a different one next time. */ |
| 336 | ++curr_pmeg; |
| 337 | } |
| 338 | |
| 339 | /* |
| 340 | * Handle a pagefault at virtual address `vaddr'; check if there should be a |
| 341 | * page there (specifically, whether the software pagetables indicate that |
| 342 | * there is). This is necessary due to the limited size of the second-level |
| 343 | * Sun3 hardware pagetables (256 groups of 16 pages). If there should be a |
| 344 | * mapping present, we select a `spare' PMEG and use it to create a mapping. |
| 345 | * `read_flag' is nonzero for a read fault; zero for a write. Returns nonzero |
| 346 | * if we successfully handled the fault. |
| 347 | */ |
| 348 | //todo: should we bump minor pagefault counter? if so, here or in caller? |
| 349 | //todo: possibly inline this into bus_error030 in <asm/buserror.h> ? |
| 350 | |
| 351 | // kernel_fault is set when a kernel page couldn't be demand mapped, |
| 352 | // and forces another try using the kernel page table. basically a |
| 353 | // hack so that vmalloc would work correctly. |
| 354 | |
| 355 | int mmu_emu_handle_fault (unsigned long vaddr, int read_flag, int kernel_fault) |
| 356 | { |
| 357 | unsigned long segment, offset; |
| 358 | unsigned char context; |
| 359 | pte_t *pte; |
| 360 | pgd_t * crp; |
| 361 | |
| 362 | if(current->mm == NULL) { |
| 363 | crp = swapper_pg_dir; |
| 364 | context = 0; |
| 365 | } else { |
| 366 | context = current->mm->context; |
| 367 | if(kernel_fault) |
| 368 | crp = swapper_pg_dir; |
| 369 | else |
| 370 | crp = current->mm->pgd; |
| 371 | } |
| 372 | |
| 373 | #ifdef DEBUG_MMU_EMU |
| 374 | printk ("mmu_emu_handle_fault: vaddr=%lx type=%s crp=%p\n", |
| 375 | vaddr, read_flag ? "read" : "write", crp); |
| 376 | #endif |
| 377 | |
| 378 | segment = (vaddr >> SUN3_PMEG_SIZE_BITS) & 0x7FF; |
| 379 | offset = (vaddr >> SUN3_PTE_SIZE_BITS) & 0xF; |
| 380 | |
| 381 | #ifdef DEBUG_MMU_EMU |
| 382 | printk ("mmu_emu_handle_fault: segment=%lx offset=%lx\n", segment, offset); |
| 383 | #endif |
| 384 | |
| 385 | pte = (pte_t *) pgd_val (*(crp + segment)); |
| 386 | |
| 387 | //todo: next line should check for valid pmd properly. |
| 388 | if (!pte) { |
| 389 | // printk ("mmu_emu_handle_fault: invalid pmd\n"); |
| 390 | return 0; |
| 391 | } |
| 392 | |
| 393 | pte = (pte_t *) __va ((unsigned long)(pte + offset)); |
| 394 | |
| 395 | /* Make sure this is a valid page */ |
| 396 | if (!(pte_val (*pte) & SUN3_PAGE_VALID)) |
| 397 | return 0; |
| 398 | |
| 399 | /* Make sure there's a pmeg allocated for the page */ |
| 400 | if (sun3_get_segmap (vaddr&~SUN3_PMEG_MASK) == SUN3_INVALID_PMEG) |
| 401 | mmu_emu_map_pmeg (context, vaddr); |
| 402 | |
| 403 | /* Write the pte value to hardware MMU */ |
| 404 | sun3_put_pte (vaddr&PAGE_MASK, pte_val (*pte)); |
| 405 | |
| 406 | /* Update software copy of the pte value */ |
| 407 | // I'm not sure this is necessary. If this is required, we ought to simply |
| 408 | // copy this out when we reuse the PMEG or at some other convenient time. |
| 409 | // Doing it here is fairly meaningless, anyway, as we only know about the |
| 410 | // first access to a given page. --m |
| 411 | if (!read_flag) { |
| 412 | if (pte_val (*pte) & SUN3_PAGE_WRITEABLE) |
| 413 | pte_val (*pte) |= (SUN3_PAGE_ACCESSED |
| 414 | | SUN3_PAGE_MODIFIED); |
| 415 | else |
| 416 | return 0; /* Write-protect error. */ |
| 417 | } else |
| 418 | pte_val (*pte) |= SUN3_PAGE_ACCESSED; |
| 419 | |
| 420 | #ifdef DEBUG_MMU_EMU |
| 421 | printk ("seg:%d crp:%p ->", get_fs().seg, crp); |
| 422 | print_pte_vaddr (vaddr); |
| 423 | printk ("\n"); |
| 424 | #endif |
| 425 | |
| 426 | return 1; |
| 427 | } |