David Gibson | 8d2169e | 2007-04-27 11:53:52 +1000 | [diff] [blame] | 1 | #ifndef _ASM_POWERPC_MMU_HASH64_H_ |
| 2 | #define _ASM_POWERPC_MMU_HASH64_H_ |
| 3 | /* |
| 4 | * PowerPC64 memory management structures |
| 5 | * |
| 6 | * Dave Engebretsen & Mike Corrigan <{engebret|mikejc}@us.ibm.com> |
| 7 | * PPC64 rework. |
| 8 | * |
| 9 | * This program is free software; you can redistribute it and/or |
| 10 | * modify it under the terms of the GNU General Public License |
| 11 | * as published by the Free Software Foundation; either version |
| 12 | * 2 of the License, or (at your option) any later version. |
| 13 | */ |
| 14 | |
| 15 | #include <asm/asm-compat.h> |
| 16 | #include <asm/page.h> |
| 17 | |
| 18 | /* |
| 19 | * Segment table |
| 20 | */ |
| 21 | |
| 22 | #define STE_ESID_V 0x80 |
| 23 | #define STE_ESID_KS 0x20 |
| 24 | #define STE_ESID_KP 0x10 |
| 25 | #define STE_ESID_N 0x08 |
| 26 | |
| 27 | #define STE_VSID_SHIFT 12 |
| 28 | |
| 29 | /* Location of cpu0's segment table */ |
| 30 | #define STAB0_PAGE 0x6 |
| 31 | #define STAB0_OFFSET (STAB0_PAGE << 12) |
| 32 | #define STAB0_PHYS_ADDR (STAB0_OFFSET + PHYSICAL_START) |
| 33 | |
| 34 | #ifndef __ASSEMBLY__ |
| 35 | extern char initial_stab[]; |
| 36 | #endif /* ! __ASSEMBLY */ |
| 37 | |
| 38 | /* |
| 39 | * SLB |
| 40 | */ |
| 41 | |
| 42 | #define SLB_NUM_BOLTED 3 |
| 43 | #define SLB_CACHE_ENTRIES 8 |
| 44 | |
| 45 | /* Bits in the SLB ESID word */ |
| 46 | #define SLB_ESID_V ASM_CONST(0x0000000008000000) /* valid */ |
| 47 | |
| 48 | /* Bits in the SLB VSID word */ |
| 49 | #define SLB_VSID_SHIFT 12 |
| 50 | #define SLB_VSID_B ASM_CONST(0xc000000000000000) |
| 51 | #define SLB_VSID_B_256M ASM_CONST(0x0000000000000000) |
| 52 | #define SLB_VSID_B_1T ASM_CONST(0x4000000000000000) |
| 53 | #define SLB_VSID_KS ASM_CONST(0x0000000000000800) |
| 54 | #define SLB_VSID_KP ASM_CONST(0x0000000000000400) |
| 55 | #define SLB_VSID_N ASM_CONST(0x0000000000000200) /* no-execute */ |
| 56 | #define SLB_VSID_L ASM_CONST(0x0000000000000100) |
| 57 | #define SLB_VSID_C ASM_CONST(0x0000000000000080) /* class */ |
| 58 | #define SLB_VSID_LP ASM_CONST(0x0000000000000030) |
| 59 | #define SLB_VSID_LP_00 ASM_CONST(0x0000000000000000) |
| 60 | #define SLB_VSID_LP_01 ASM_CONST(0x0000000000000010) |
| 61 | #define SLB_VSID_LP_10 ASM_CONST(0x0000000000000020) |
| 62 | #define SLB_VSID_LP_11 ASM_CONST(0x0000000000000030) |
| 63 | #define SLB_VSID_LLP (SLB_VSID_L|SLB_VSID_LP) |
| 64 | |
| 65 | #define SLB_VSID_KERNEL (SLB_VSID_KP) |
| 66 | #define SLB_VSID_USER (SLB_VSID_KP|SLB_VSID_KS|SLB_VSID_C) |
| 67 | |
| 68 | #define SLBIE_C (0x08000000) |
| 69 | |
| 70 | /* |
| 71 | * Hash table |
| 72 | */ |
| 73 | |
| 74 | #define HPTES_PER_GROUP 8 |
| 75 | |
Paul Mackerras | 2454c7e | 2007-05-10 15:28:44 +1000 | [diff] [blame] | 76 | #define HPTE_V_SSIZE_SHIFT 62 |
David Gibson | 8d2169e | 2007-04-27 11:53:52 +1000 | [diff] [blame] | 77 | #define HPTE_V_AVPN_SHIFT 7 |
Paul Mackerras | 2454c7e | 2007-05-10 15:28:44 +1000 | [diff] [blame] | 78 | #define HPTE_V_AVPN ASM_CONST(0x3fffffffffffff80) |
David Gibson | 8d2169e | 2007-04-27 11:53:52 +1000 | [diff] [blame] | 79 | #define HPTE_V_AVPN_VAL(x) (((x) & HPTE_V_AVPN) >> HPTE_V_AVPN_SHIFT) |
| 80 | #define HPTE_V_COMPARE(x,y) (!(((x) ^ (y)) & HPTE_V_AVPN)) |
| 81 | #define HPTE_V_BOLTED ASM_CONST(0x0000000000000010) |
| 82 | #define HPTE_V_LOCK ASM_CONST(0x0000000000000008) |
| 83 | #define HPTE_V_LARGE ASM_CONST(0x0000000000000004) |
| 84 | #define HPTE_V_SECONDARY ASM_CONST(0x0000000000000002) |
| 85 | #define HPTE_V_VALID ASM_CONST(0x0000000000000001) |
| 86 | |
| 87 | #define HPTE_R_PP0 ASM_CONST(0x8000000000000000) |
| 88 | #define HPTE_R_TS ASM_CONST(0x4000000000000000) |
| 89 | #define HPTE_R_RPN_SHIFT 12 |
| 90 | #define HPTE_R_RPN ASM_CONST(0x3ffffffffffff000) |
| 91 | #define HPTE_R_FLAGS ASM_CONST(0x00000000000003ff) |
| 92 | #define HPTE_R_PP ASM_CONST(0x0000000000000003) |
| 93 | #define HPTE_R_N ASM_CONST(0x0000000000000004) |
| 94 | #define HPTE_R_C ASM_CONST(0x0000000000000080) |
| 95 | #define HPTE_R_R ASM_CONST(0x0000000000000100) |
| 96 | |
Sachin P. Sant | b7abc5c | 2007-06-14 15:31:34 +1000 | [diff] [blame] | 97 | #define HPTE_V_1TB_SEG ASM_CONST(0x4000000000000000) |
| 98 | #define HPTE_V_VRMA_MASK ASM_CONST(0x4001ffffff000000) |
| 99 | |
David Gibson | 8d2169e | 2007-04-27 11:53:52 +1000 | [diff] [blame] | 100 | /* Values for PP (assumes Ks=0, Kp=1) */ |
| 101 | /* pp0 will always be 0 for linux */ |
| 102 | #define PP_RWXX 0 /* Supervisor read/write, User none */ |
| 103 | #define PP_RWRX 1 /* Supervisor read/write, User read */ |
| 104 | #define PP_RWRW 2 /* Supervisor read/write, User read/write */ |
| 105 | #define PP_RXRX 3 /* Supervisor read, User read */ |
| 106 | |
| 107 | #ifndef __ASSEMBLY__ |
| 108 | |
David Gibson | 8e561e7 | 2007-06-13 14:52:56 +1000 | [diff] [blame] | 109 | struct hash_pte { |
David Gibson | 8d2169e | 2007-04-27 11:53:52 +1000 | [diff] [blame] | 110 | unsigned long v; |
| 111 | unsigned long r; |
David Gibson | 8e561e7 | 2007-06-13 14:52:56 +1000 | [diff] [blame] | 112 | }; |
David Gibson | 8d2169e | 2007-04-27 11:53:52 +1000 | [diff] [blame] | 113 | |
David Gibson | 8e561e7 | 2007-06-13 14:52:56 +1000 | [diff] [blame] | 114 | extern struct hash_pte *htab_address; |
David Gibson | 8d2169e | 2007-04-27 11:53:52 +1000 | [diff] [blame] | 115 | extern unsigned long htab_size_bytes; |
| 116 | extern unsigned long htab_hash_mask; |
| 117 | |
| 118 | /* |
| 119 | * Page size definition |
| 120 | * |
| 121 | * shift : is the "PAGE_SHIFT" value for that page size |
| 122 | * sllp : is a bit mask with the value of SLB L || LP to be or'ed |
| 123 | * directly to a slbmte "vsid" value |
| 124 | * penc : is the HPTE encoding mask for the "LP" field: |
| 125 | * |
| 126 | */ |
| 127 | struct mmu_psize_def |
| 128 | { |
| 129 | unsigned int shift; /* number of bits */ |
| 130 | unsigned int penc; /* HPTE encoding */ |
| 131 | unsigned int tlbiel; /* tlbiel supported for that page size */ |
| 132 | unsigned long avpnm; /* bits to mask out in AVPN in the HPTE */ |
| 133 | unsigned long sllp; /* SLB L||LP (exact mask to use in slbmte) */ |
| 134 | }; |
| 135 | |
| 136 | #endif /* __ASSEMBLY__ */ |
| 137 | |
| 138 | /* |
| 139 | * The kernel use the constants below to index in the page sizes array. |
| 140 | * The use of fixed constants for this purpose is better for performances |
| 141 | * of the low level hash refill handlers. |
| 142 | * |
| 143 | * A non supported page size has a "shift" field set to 0 |
| 144 | * |
| 145 | * Any new page size being implemented can get a new entry in here. Whether |
| 146 | * the kernel will use it or not is a different matter though. The actual page |
| 147 | * size used by hugetlbfs is not defined here and may be made variable |
| 148 | */ |
| 149 | |
| 150 | #define MMU_PAGE_4K 0 /* 4K */ |
| 151 | #define MMU_PAGE_64K 1 /* 64K */ |
| 152 | #define MMU_PAGE_64K_AP 2 /* 64K Admixed (in a 4K segment) */ |
| 153 | #define MMU_PAGE_1M 3 /* 1M */ |
| 154 | #define MMU_PAGE_16M 4 /* 16M */ |
| 155 | #define MMU_PAGE_16G 5 /* 16G */ |
| 156 | #define MMU_PAGE_COUNT 6 |
| 157 | |
Paul Mackerras | 2454c7e | 2007-05-10 15:28:44 +1000 | [diff] [blame] | 158 | /* |
| 159 | * Segment sizes. |
| 160 | * These are the values used by hardware in the B field of |
| 161 | * SLB entries and the first dword of MMU hashtable entries. |
| 162 | * The B field is 2 bits; the values 2 and 3 are unused and reserved. |
| 163 | */ |
| 164 | #define MMU_SEGSIZE_256M 0 |
| 165 | #define MMU_SEGSIZE_1T 1 |
| 166 | |
David Gibson | 8d2169e | 2007-04-27 11:53:52 +1000 | [diff] [blame] | 167 | #ifndef __ASSEMBLY__ |
| 168 | |
| 169 | /* |
| 170 | * The current system page sizes |
| 171 | */ |
| 172 | extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; |
| 173 | extern int mmu_linear_psize; |
| 174 | extern int mmu_virtual_psize; |
| 175 | extern int mmu_vmalloc_psize; |
| 176 | extern int mmu_io_psize; |
| 177 | |
| 178 | /* |
| 179 | * If the processor supports 64k normal pages but not 64k cache |
| 180 | * inhibited pages, we have to be prepared to switch processes |
| 181 | * to use 4k pages when they create cache-inhibited mappings. |
| 182 | * If this is the case, mmu_ci_restrictions will be set to 1. |
| 183 | */ |
| 184 | extern int mmu_ci_restrictions; |
| 185 | |
| 186 | #ifdef CONFIG_HUGETLB_PAGE |
| 187 | /* |
| 188 | * The page size index of the huge pages for use by hugetlbfs |
| 189 | */ |
| 190 | extern int mmu_huge_psize; |
| 191 | |
| 192 | #endif /* CONFIG_HUGETLB_PAGE */ |
| 193 | |
| 194 | /* |
| 195 | * This function sets the AVPN and L fields of the HPTE appropriately |
| 196 | * for the page size |
| 197 | */ |
| 198 | static inline unsigned long hpte_encode_v(unsigned long va, int psize) |
| 199 | { |
| 200 | unsigned long v = |
| 201 | v = (va >> 23) & ~(mmu_psize_defs[psize].avpnm); |
| 202 | v <<= HPTE_V_AVPN_SHIFT; |
| 203 | if (psize != MMU_PAGE_4K) |
| 204 | v |= HPTE_V_LARGE; |
| 205 | return v; |
| 206 | } |
| 207 | |
| 208 | /* |
| 209 | * This function sets the ARPN, and LP fields of the HPTE appropriately |
| 210 | * for the page size. We assume the pa is already "clean" that is properly |
| 211 | * aligned for the requested page size |
| 212 | */ |
| 213 | static inline unsigned long hpte_encode_r(unsigned long pa, int psize) |
| 214 | { |
| 215 | unsigned long r; |
| 216 | |
| 217 | /* A 4K page needs no special encoding */ |
| 218 | if (psize == MMU_PAGE_4K) |
| 219 | return pa & HPTE_R_RPN; |
| 220 | else { |
| 221 | unsigned int penc = mmu_psize_defs[psize].penc; |
| 222 | unsigned int shift = mmu_psize_defs[psize].shift; |
| 223 | return (pa & ~((1ul << shift) - 1)) | (penc << 12); |
| 224 | } |
| 225 | return r; |
| 226 | } |
| 227 | |
| 228 | /* |
| 229 | * This hashes a virtual address for a 256Mb segment only for now |
| 230 | */ |
| 231 | |
| 232 | static inline unsigned long hpt_hash(unsigned long va, unsigned int shift) |
| 233 | { |
| 234 | return ((va >> 28) & 0x7fffffffffUL) ^ ((va & 0x0fffffffUL) >> shift); |
| 235 | } |
| 236 | |
| 237 | extern int __hash_page_4K(unsigned long ea, unsigned long access, |
| 238 | unsigned long vsid, pte_t *ptep, unsigned long trap, |
| 239 | unsigned int local); |
| 240 | extern int __hash_page_64K(unsigned long ea, unsigned long access, |
| 241 | unsigned long vsid, pte_t *ptep, unsigned long trap, |
| 242 | unsigned int local); |
| 243 | struct mm_struct; |
| 244 | extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap); |
| 245 | extern int hash_huge_page(struct mm_struct *mm, unsigned long access, |
| 246 | unsigned long ea, unsigned long vsid, int local, |
| 247 | unsigned long trap); |
| 248 | |
| 249 | extern int htab_bolt_mapping(unsigned long vstart, unsigned long vend, |
| 250 | unsigned long pstart, unsigned long mode, |
| 251 | int psize); |
| 252 | |
| 253 | extern void htab_initialize(void); |
| 254 | extern void htab_initialize_secondary(void); |
| 255 | extern void hpte_init_native(void); |
| 256 | extern void hpte_init_lpar(void); |
| 257 | extern void hpte_init_iSeries(void); |
| 258 | extern void hpte_init_beat(void); |
| 259 | |
| 260 | extern void stabs_alloc(void); |
| 261 | extern void slb_initialize(void); |
| 262 | extern void slb_flush_and_rebolt(void); |
| 263 | extern void stab_initialize(unsigned long stab); |
| 264 | |
| 265 | #endif /* __ASSEMBLY__ */ |
| 266 | |
| 267 | /* |
| 268 | * VSID allocation |
| 269 | * |
| 270 | * We first generate a 36-bit "proto-VSID". For kernel addresses this |
| 271 | * is equal to the ESID, for user addresses it is: |
| 272 | * (context << 15) | (esid & 0x7fff) |
| 273 | * |
| 274 | * The two forms are distinguishable because the top bit is 0 for user |
| 275 | * addresses, whereas the top two bits are 1 for kernel addresses. |
| 276 | * Proto-VSIDs with the top two bits equal to 0b10 are reserved for |
| 277 | * now. |
| 278 | * |
| 279 | * The proto-VSIDs are then scrambled into real VSIDs with the |
| 280 | * multiplicative hash: |
| 281 | * |
| 282 | * VSID = (proto-VSID * VSID_MULTIPLIER) % VSID_MODULUS |
| 283 | * where VSID_MULTIPLIER = 268435399 = 0xFFFFFC7 |
| 284 | * VSID_MODULUS = 2^36-1 = 0xFFFFFFFFF |
| 285 | * |
| 286 | * This scramble is only well defined for proto-VSIDs below |
| 287 | * 0xFFFFFFFFF, so both proto-VSID and actual VSID 0xFFFFFFFFF are |
| 288 | * reserved. VSID_MULTIPLIER is prime, so in particular it is |
| 289 | * co-prime to VSID_MODULUS, making this a 1:1 scrambling function. |
| 290 | * Because the modulus is 2^n-1 we can compute it efficiently without |
| 291 | * a divide or extra multiply (see below). |
| 292 | * |
| 293 | * This scheme has several advantages over older methods: |
| 294 | * |
| 295 | * - We have VSIDs allocated for every kernel address |
| 296 | * (i.e. everything above 0xC000000000000000), except the very top |
| 297 | * segment, which simplifies several things. |
| 298 | * |
| 299 | * - We allow for 15 significant bits of ESID and 20 bits of |
| 300 | * context for user addresses. i.e. 8T (43 bits) of address space for |
| 301 | * up to 1M contexts (although the page table structure and context |
| 302 | * allocation will need changes to take advantage of this). |
| 303 | * |
| 304 | * - The scramble function gives robust scattering in the hash |
| 305 | * table (at least based on some initial results). The previous |
| 306 | * method was more susceptible to pathological cases giving excessive |
| 307 | * hash collisions. |
| 308 | */ |
| 309 | /* |
| 310 | * WARNING - If you change these you must make sure the asm |
| 311 | * implementations in slb_allocate (slb_low.S), do_stab_bolted |
| 312 | * (head.S) and ASM_VSID_SCRAMBLE (below) are changed accordingly. |
| 313 | * |
| 314 | * You'll also need to change the precomputed VSID values in head.S |
| 315 | * which are used by the iSeries firmware. |
| 316 | */ |
| 317 | |
| 318 | #define VSID_MULTIPLIER ASM_CONST(200730139) /* 28-bit prime */ |
| 319 | #define VSID_BITS 36 |
| 320 | #define VSID_MODULUS ((1UL<<VSID_BITS)-1) |
| 321 | |
| 322 | #define CONTEXT_BITS 19 |
| 323 | #define USER_ESID_BITS 16 |
| 324 | |
| 325 | #define USER_VSID_RANGE (1UL << (USER_ESID_BITS + SID_SHIFT)) |
| 326 | |
| 327 | /* |
| 328 | * This macro generates asm code to compute the VSID scramble |
| 329 | * function. Used in slb_allocate() and do_stab_bolted. The function |
| 330 | * computed is: (protovsid*VSID_MULTIPLIER) % VSID_MODULUS |
| 331 | * |
| 332 | * rt = register continaing the proto-VSID and into which the |
| 333 | * VSID will be stored |
| 334 | * rx = scratch register (clobbered) |
| 335 | * |
| 336 | * - rt and rx must be different registers |
| 337 | * - The answer will end up in the low 36 bits of rt. The higher |
| 338 | * bits may contain other garbage, so you may need to mask the |
| 339 | * result. |
| 340 | */ |
| 341 | #define ASM_VSID_SCRAMBLE(rt, rx) \ |
| 342 | lis rx,VSID_MULTIPLIER@h; \ |
| 343 | ori rx,rx,VSID_MULTIPLIER@l; \ |
| 344 | mulld rt,rt,rx; /* rt = rt * MULTIPLIER */ \ |
| 345 | \ |
| 346 | srdi rx,rt,VSID_BITS; \ |
| 347 | clrldi rt,rt,(64-VSID_BITS); \ |
| 348 | add rt,rt,rx; /* add high and low bits */ \ |
| 349 | /* Now, r3 == VSID (mod 2^36-1), and lies between 0 and \ |
| 350 | * 2^36-1+2^28-1. That in particular means that if r3 >= \ |
| 351 | * 2^36-1, then r3+1 has the 2^36 bit set. So, if r3+1 has \ |
| 352 | * the bit clear, r3 already has the answer we want, if it \ |
| 353 | * doesn't, the answer is the low 36 bits of r3+1. So in all \ |
| 354 | * cases the answer is the low 36 bits of (r3 + ((r3+1) >> 36))*/\ |
| 355 | addi rx,rt,1; \ |
| 356 | srdi rx,rx,VSID_BITS; /* extract 2^36 bit */ \ |
| 357 | add rt,rt,rx |
| 358 | |
| 359 | |
| 360 | #ifndef __ASSEMBLY__ |
| 361 | |
| 362 | typedef unsigned long mm_context_id_t; |
| 363 | |
| 364 | typedef struct { |
| 365 | mm_context_id_t id; |
Benjamin Herrenschmidt | d0f13e3 | 2007-05-08 16:27:27 +1000 | [diff] [blame] | 366 | u16 user_psize; /* page size index */ |
| 367 | |
| 368 | #ifdef CONFIG_PPC_MM_SLICES |
| 369 | u64 low_slices_psize; /* SLB page size encodings */ |
| 370 | u64 high_slices_psize; /* 4 bits per slice for now */ |
| 371 | #else |
| 372 | u16 sllp; /* SLB page size encoding */ |
David Gibson | 8d2169e | 2007-04-27 11:53:52 +1000 | [diff] [blame] | 373 | #endif |
| 374 | unsigned long vdso_base; |
| 375 | } mm_context_t; |
| 376 | |
| 377 | |
| 378 | static inline unsigned long vsid_scramble(unsigned long protovsid) |
| 379 | { |
| 380 | #if 0 |
| 381 | /* The code below is equivalent to this function for arguments |
| 382 | * < 2^VSID_BITS, which is all this should ever be called |
| 383 | * with. However gcc is not clever enough to compute the |
| 384 | * modulus (2^n-1) without a second multiply. */ |
| 385 | return ((protovsid * VSID_MULTIPLIER) % VSID_MODULUS); |
| 386 | #else /* 1 */ |
| 387 | unsigned long x; |
| 388 | |
| 389 | x = protovsid * VSID_MULTIPLIER; |
| 390 | x = (x >> VSID_BITS) + (x & VSID_MODULUS); |
| 391 | return (x + ((x+1) >> VSID_BITS)) & VSID_MODULUS; |
| 392 | #endif /* 1 */ |
| 393 | } |
| 394 | |
| 395 | /* This is only valid for addresses >= KERNELBASE */ |
| 396 | static inline unsigned long get_kernel_vsid(unsigned long ea) |
| 397 | { |
| 398 | return vsid_scramble(ea >> SID_SHIFT); |
| 399 | } |
| 400 | |
| 401 | /* This is only valid for user addresses (which are below 2^41) */ |
| 402 | static inline unsigned long get_vsid(unsigned long context, unsigned long ea) |
| 403 | { |
| 404 | return vsid_scramble((context << USER_ESID_BITS) |
| 405 | | (ea >> SID_SHIFT)); |
| 406 | } |
| 407 | |
| 408 | #define VSID_SCRAMBLE(pvsid) (((pvsid) * VSID_MULTIPLIER) % VSID_MODULUS) |
| 409 | #define KERNEL_VSID(ea) VSID_SCRAMBLE(GET_ESID(ea)) |
| 410 | |
| 411 | /* Physical address used by some IO functions */ |
| 412 | typedef unsigned long phys_addr_t; |
| 413 | |
| 414 | #endif /* __ASSEMBLY__ */ |
| 415 | |
| 416 | #endif /* _ASM_POWERPC_MMU_HASH64_H_ */ |