Aneesh Kumar K.V | ab537dc | 2015-12-01 09:06:30 +0530 | [diff] [blame] | 1 | #ifndef _ASM_POWERPC_BOOK3S_64_HASH_64K_H |
| 2 | #define _ASM_POWERPC_BOOK3S_64_HASH_64K_H |
| 3 | |
| 4 | #include <asm-generic/pgtable-nopud.h> |
| 5 | |
| 6 | #define PTE_INDEX_SIZE 8 |
| 7 | #define PMD_INDEX_SIZE 10 |
| 8 | #define PUD_INDEX_SIZE 0 |
| 9 | #define PGD_INDEX_SIZE 12 |
| 10 | |
| 11 | #define PTRS_PER_PTE (1 << PTE_INDEX_SIZE) |
| 12 | #define PTRS_PER_PMD (1 << PMD_INDEX_SIZE) |
| 13 | #define PTRS_PER_PGD (1 << PGD_INDEX_SIZE) |
| 14 | |
| 15 | /* With 4k base page size, hugepage PTEs go at the PMD level */ |
| 16 | #define MIN_HUGEPTE_SHIFT PAGE_SHIFT |
| 17 | |
| 18 | /* PMD_SHIFT determines what a second-level page table entry can map */ |
| 19 | #define PMD_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE) |
| 20 | #define PMD_SIZE (1UL << PMD_SHIFT) |
| 21 | #define PMD_MASK (~(PMD_SIZE-1)) |
| 22 | |
| 23 | /* PGDIR_SHIFT determines what a third-level page table entry can map */ |
| 24 | #define PGDIR_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) |
| 25 | #define PGDIR_SIZE (1UL << PGDIR_SHIFT) |
| 26 | #define PGDIR_MASK (~(PGDIR_SIZE-1)) |
| 27 | |
Laurent Dufour | 7207f43 | 2015-12-03 11:29:19 +0100 | [diff] [blame] | 28 | #define _PAGE_COMBO 0x00040000 /* this is a combo 4k page */ |
| 29 | #define _PAGE_4K_PFN 0x00080000 /* PFN is for a single 4k page */ |
Aneesh Kumar K.V | bf680d5 | 2015-12-01 09:06:45 +0530 | [diff] [blame] | 30 | /* |
| 31 | * Used to track subpage group valid if _PAGE_COMBO is set |
| 32 | * This overloads _PAGE_F_GIX and _PAGE_F_SECOND |
Benjamin Herrenschmidt | c605782 | 2009-03-10 17:53:29 +0000 | [diff] [blame] | 33 | */ |
Aneesh Kumar K.V | bf680d5 | 2015-12-01 09:06:45 +0530 | [diff] [blame] | 34 | #define _PAGE_COMBO_VALID (_PAGE_F_GIX | _PAGE_F_SECOND) |
Benjamin Herrenschmidt | c605782 | 2009-03-10 17:53:29 +0000 | [diff] [blame] | 35 | |
| 36 | /* PTE flags to conserve for HPTE identification */ |
Aneesh Kumar K.V | 89ff725 | 2015-12-01 09:06:48 +0530 | [diff] [blame] | 37 | #define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_F_SECOND | \ |
| 38 | _PAGE_F_GIX | _PAGE_HASHPTE | _PAGE_COMBO) |
Benjamin Herrenschmidt | c605782 | 2009-03-10 17:53:29 +0000 | [diff] [blame] | 39 | |
| 40 | /* Shift to put page number into pte. |
| 41 | * |
Paul Mackerras | f1a9ae0 | 2016-02-22 13:41:13 +1100 | [diff] [blame] | 42 | * That gives us a max RPN of 37 bits, which means a max of 53 bits |
| 43 | * of addressable physical space, or 49 bits for the special 4k PFNs. |
Benjamin Herrenschmidt | c605782 | 2009-03-10 17:53:29 +0000 | [diff] [blame] | 44 | */ |
Paul Mackerras | f1a9ae0 | 2016-02-22 13:41:13 +1100 | [diff] [blame] | 45 | #define PTE_RPN_SHIFT (20) |
| 46 | #define PTE_RPN_SIZE (37) |
| 47 | |
Aneesh Kumar K.V | 62607bc | 2015-12-01 09:06:55 +0530 | [diff] [blame] | 48 | /* |
| 49 | * we support 16 fragments per PTE page of 64K size. |
| 50 | */ |
| 51 | #define PTE_FRAG_NR 16 |
| 52 | /* |
| 53 | * We use a 2K PTE page fragment and another 2K for storing |
| 54 | * real_pte_t hash index |
| 55 | */ |
| 56 | #define PTE_FRAG_SIZE_SHIFT 12 |
| 57 | #define PTE_FRAG_SIZE (1UL << PTE_FRAG_SIZE_SHIFT) |
| 58 | |
| 59 | /* |
| 60 | * Bits to mask out from a PMD to get to the PTE page |
| 61 | * PMDs point to PTE table fragments which are PTE_FRAG_SIZE aligned. |
| 62 | */ |
| 63 | #define PMD_MASKED_BITS (PTE_FRAG_SIZE - 1) |
| 64 | /* Bits to mask out from a PGD/PUD to get to the PMD page */ |
| 65 | #define PUD_MASKED_BITS 0x1ff |
Benjamin Herrenschmidt | 3c726f8 | 2005-11-07 11:06:55 +1100 | [diff] [blame] | 66 | |
Stephen Rothwell | ee7a76d | 2007-09-18 17:22:59 +1000 | [diff] [blame] | 67 | #ifndef __ASSEMBLY__ |
Benjamin Herrenschmidt | 3c726f8 | 2005-11-07 11:06:55 +1100 | [diff] [blame] | 68 | |
Benjamin Herrenschmidt | c605782 | 2009-03-10 17:53:29 +0000 | [diff] [blame] | 69 | /* |
| 70 | * With 64K pages on hash table, we have a special PTE format that |
| 71 | * uses a second "half" of the page table to encode sub-page information |
| 72 | * in order to deal with 64K made of 4K HW pages. Thus we override the |
| 73 | * generic accessors and iterators here |
| 74 | */ |
Aneesh Kumar K.V | 85c1faf | 2014-08-13 12:32:03 +0530 | [diff] [blame] | 75 | #define __real_pte __real_pte |
| 76 | static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep) |
| 77 | { |
| 78 | real_pte_t rpte; |
Aneesh Kumar K.V | 506b863 | 2015-12-01 09:06:46 +0530 | [diff] [blame] | 79 | unsigned long *hidxp; |
Aneesh Kumar K.V | 85c1faf | 2014-08-13 12:32:03 +0530 | [diff] [blame] | 80 | |
| 81 | rpte.pte = pte; |
| 82 | rpte.hidx = 0; |
| 83 | if (pte_val(pte) & _PAGE_COMBO) { |
| 84 | /* |
| 85 | * Make sure we order the hidx load against the _PAGE_COMBO |
| 86 | * check. The store side ordering is done in __hash_page_4K |
| 87 | */ |
| 88 | smp_rmb(); |
Aneesh Kumar K.V | 506b863 | 2015-12-01 09:06:46 +0530 | [diff] [blame] | 89 | hidxp = (unsigned long *)(ptep + PTRS_PER_PTE); |
| 90 | rpte.hidx = *hidxp; |
Aneesh Kumar K.V | 85c1faf | 2014-08-13 12:32:03 +0530 | [diff] [blame] | 91 | } |
| 92 | return rpte; |
| 93 | } |
| 94 | |
| 95 | static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index) |
| 96 | { |
| 97 | if ((pte_val(rpte.pte) & _PAGE_COMBO)) |
| 98 | return (rpte.hidx >> (index<<2)) & 0xf; |
Aneesh Kumar K.V | 4d9057c | 2015-12-01 09:06:56 +0530 | [diff] [blame] | 99 | return (pte_val(rpte.pte) >> _PAGE_F_GIX_SHIFT) & 0xf; |
Aneesh Kumar K.V | 85c1faf | 2014-08-13 12:32:03 +0530 | [diff] [blame] | 100 | } |
| 101 | |
Benjamin Herrenschmidt | c605782 | 2009-03-10 17:53:29 +0000 | [diff] [blame] | 102 | #define __rpte_to_pte(r) ((r).pte) |
Aneesh Kumar K.V | bf680d5 | 2015-12-01 09:06:45 +0530 | [diff] [blame] | 103 | extern bool __rpte_sub_valid(real_pte_t rpte, unsigned long index); |
Aneesh Kumar K.V | ab537dc | 2015-12-01 09:06:30 +0530 | [diff] [blame] | 104 | /* |
| 105 | * Trick: we set __end to va + 64k, which happens works for |
Benjamin Herrenschmidt | c605782 | 2009-03-10 17:53:29 +0000 | [diff] [blame] | 106 | * a 16M page as well as we want only one iteration |
| 107 | */ |
Aneesh Kumar K.V | 5524a27 | 2012-09-10 02:52:50 +0000 | [diff] [blame] | 108 | #define pte_iterate_hashed_subpages(rpte, psize, vpn, index, shift) \ |
| 109 | do { \ |
| 110 | unsigned long __end = vpn + (1UL << (PAGE_SHIFT - VPN_SHIFT)); \ |
| 111 | unsigned __split = (psize == MMU_PAGE_4K || \ |
| 112 | psize == MMU_PAGE_64K_AP); \ |
| 113 | shift = mmu_psize_defs[psize].shift; \ |
| 114 | for (index = 0; vpn < __end; index++, \ |
| 115 | vpn += (1L << (shift - VPN_SHIFT))) { \ |
| 116 | if (!__split || __rpte_sub_valid(rpte, index)) \ |
| 117 | do { |
Benjamin Herrenschmidt | c605782 | 2009-03-10 17:53:29 +0000 | [diff] [blame] | 118 | |
| 119 | #define pte_iterate_hashed_end() } while(0); } } while(0) |
| 120 | |
| 121 | #define pte_pagesize_index(mm, addr, pte) \ |
| 122 | (((pte) & _PAGE_COMBO)? MMU_PAGE_4K: MMU_PAGE_64K) |
| 123 | |
| 124 | #define remap_4k_pfn(vma, addr, pfn, prot) \ |
Paul Mackerras | f1a9ae0 | 2016-02-22 13:41:13 +1100 | [diff] [blame] | 125 | (WARN_ON(((pfn) >= (1UL << PTE_RPN_SIZE))) ? -EINVAL : \ |
Madhusudanan Kandasamy | eeb03a6 | 2014-07-10 20:45:13 +0530 | [diff] [blame] | 126 | remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE, \ |
| 127 | __pgprot(pgprot_val((prot)) | _PAGE_4K_PFN))) |
Benjamin Herrenschmidt | c605782 | 2009-03-10 17:53:29 +0000 | [diff] [blame] | 128 | |
Aneesh Kumar K.V | 62607bc | 2015-12-01 09:06:55 +0530 | [diff] [blame] | 129 | #define PTE_TABLE_SIZE PTE_FRAG_SIZE |
| 130 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
| 131 | #define PMD_TABLE_SIZE ((sizeof(pmd_t) << PMD_INDEX_SIZE) + (sizeof(unsigned long) << PMD_INDEX_SIZE)) |
| 132 | #else |
Aneesh Kumar K.V | ab537dc | 2015-12-01 09:06:30 +0530 | [diff] [blame] | 133 | #define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE) |
Aneesh Kumar K.V | 62607bc | 2015-12-01 09:06:55 +0530 | [diff] [blame] | 134 | #endif |
Aneesh Kumar K.V | ab537dc | 2015-12-01 09:06:30 +0530 | [diff] [blame] | 135 | #define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE) |
| 136 | |
| 137 | #define pgd_pte(pgd) (pud_pte(((pud_t){ pgd }))) |
| 138 | #define pte_pgd(pte) ((pgd_t)pte_pud(pte)) |
| 139 | |
Aneesh Kumar K.V | 26a344a | 2015-12-01 09:06:52 +0530 | [diff] [blame] | 140 | #ifdef CONFIG_HUGETLB_PAGE |
| 141 | /* |
| 142 | * We have PGD_INDEX_SIZ = 12 and PTE_INDEX_SIZE = 8, so that we can have |
| 143 | * 16GB hugepage pte in PGD and 16MB hugepage pte at PMD; |
| 144 | * |
| 145 | * Defined in such a way that we can optimize away code block at build time |
| 146 | * if CONFIG_HUGETLB_PAGE=n. |
| 147 | */ |
| 148 | static inline int pmd_huge(pmd_t pmd) |
| 149 | { |
| 150 | /* |
Aneesh Kumar K.V | 6a119ea | 2015-12-01 09:06:54 +0530 | [diff] [blame] | 151 | * leaf pte for huge page |
Aneesh Kumar K.V | 26a344a | 2015-12-01 09:06:52 +0530 | [diff] [blame] | 152 | */ |
Aneesh Kumar K.V | 6a119ea | 2015-12-01 09:06:54 +0530 | [diff] [blame] | 153 | return !!(pmd_val(pmd) & _PAGE_PTE); |
Aneesh Kumar K.V | 26a344a | 2015-12-01 09:06:52 +0530 | [diff] [blame] | 154 | } |
| 155 | |
| 156 | static inline int pud_huge(pud_t pud) |
| 157 | { |
| 158 | /* |
Aneesh Kumar K.V | 6a119ea | 2015-12-01 09:06:54 +0530 | [diff] [blame] | 159 | * leaf pte for huge page |
Aneesh Kumar K.V | 26a344a | 2015-12-01 09:06:52 +0530 | [diff] [blame] | 160 | */ |
Aneesh Kumar K.V | 6a119ea | 2015-12-01 09:06:54 +0530 | [diff] [blame] | 161 | return !!(pud_val(pud) & _PAGE_PTE); |
Aneesh Kumar K.V | 26a344a | 2015-12-01 09:06:52 +0530 | [diff] [blame] | 162 | } |
| 163 | |
| 164 | static inline int pgd_huge(pgd_t pgd) |
| 165 | { |
| 166 | /* |
Aneesh Kumar K.V | 6a119ea | 2015-12-01 09:06:54 +0530 | [diff] [blame] | 167 | * leaf pte for huge page |
Aneesh Kumar K.V | 26a344a | 2015-12-01 09:06:52 +0530 | [diff] [blame] | 168 | */ |
Aneesh Kumar K.V | 6a119ea | 2015-12-01 09:06:54 +0530 | [diff] [blame] | 169 | return !!(pgd_val(pgd) & _PAGE_PTE); |
Aneesh Kumar K.V | 26a344a | 2015-12-01 09:06:52 +0530 | [diff] [blame] | 170 | } |
| 171 | #define pgd_huge pgd_huge |
| 172 | |
| 173 | #ifdef CONFIG_DEBUG_VM |
| 174 | extern int hugepd_ok(hugepd_t hpd); |
| 175 | #define is_hugepd(hpd) (hugepd_ok(hpd)) |
| 176 | #else |
| 177 | /* |
| 178 | * With 64k page size, we have hugepage ptes in the pgd and pmd entries. We don't |
| 179 | * need to setup hugepage directory for them. Our pte and page directory format |
| 180 | * enable us to have this enabled. |
| 181 | */ |
| 182 | static inline int hugepd_ok(hugepd_t hpd) |
| 183 | { |
| 184 | return 0; |
| 185 | } |
| 186 | #define is_hugepd(pdep) 0 |
| 187 | #endif /* CONFIG_DEBUG_VM */ |
| 188 | |
| 189 | #endif /* CONFIG_HUGETLB_PAGE */ |
| 190 | |
Aneesh Kumar K.V | e34aa03 | 2015-12-01 09:06:53 +0530 | [diff] [blame] | 191 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
| 192 | extern unsigned long pmd_hugepage_update(struct mm_struct *mm, |
| 193 | unsigned long addr, |
| 194 | pmd_t *pmdp, |
| 195 | unsigned long clr, |
| 196 | unsigned long set); |
| 197 | static inline char *get_hpte_slot_array(pmd_t *pmdp) |
| 198 | { |
| 199 | /* |
| 200 | * The hpte hindex is stored in the pgtable whose address is in the |
| 201 | * second half of the PMD |
| 202 | * |
| 203 | * Order this load with the test for pmd_trans_huge in the caller |
| 204 | */ |
| 205 | smp_rmb(); |
| 206 | return *(char **)(pmdp + PTRS_PER_PMD); |
| 207 | |
| 208 | |
| 209 | } |
| 210 | /* |
| 211 | * The linux hugepage PMD now include the pmd entries followed by the address |
| 212 | * to the stashed pgtable_t. The stashed pgtable_t contains the hpte bits. |
Paul Mackerras | 849f86a | 2016-02-22 13:41:15 +1100 | [diff] [blame^] | 213 | * [ 000 | 1 bit secondary | 3 bit hidx | 1 bit valid]. We use one byte per |
Aneesh Kumar K.V | e34aa03 | 2015-12-01 09:06:53 +0530 | [diff] [blame] | 214 | * each HPTE entry. With 16MB hugepage and 64K HPTE we need 256 entries and |
| 215 | * with 4K HPTE we need 4096 entries. Both will fit in a 4K pgtable_t. |
| 216 | * |
Paul Mackerras | 849f86a | 2016-02-22 13:41:15 +1100 | [diff] [blame^] | 217 | * The top three bits are intentionally left as zero. This memory location |
Aneesh Kumar K.V | e34aa03 | 2015-12-01 09:06:53 +0530 | [diff] [blame] | 218 | * are also used as normal page PTE pointers. So if we have any pointers |
| 219 | * left around while we collapse a hugepage, we need to make sure |
| 220 | * _PAGE_PRESENT bit of that is zero when we look at them |
| 221 | */ |
| 222 | static inline unsigned int hpte_valid(unsigned char *hpte_slot_array, int index) |
| 223 | { |
Paul Mackerras | 849f86a | 2016-02-22 13:41:15 +1100 | [diff] [blame^] | 224 | return hpte_slot_array[index] & 0x1; |
Aneesh Kumar K.V | e34aa03 | 2015-12-01 09:06:53 +0530 | [diff] [blame] | 225 | } |
| 226 | |
| 227 | static inline unsigned int hpte_hash_index(unsigned char *hpte_slot_array, |
| 228 | int index) |
| 229 | { |
Paul Mackerras | 849f86a | 2016-02-22 13:41:15 +1100 | [diff] [blame^] | 230 | return hpte_slot_array[index] >> 1; |
Aneesh Kumar K.V | e34aa03 | 2015-12-01 09:06:53 +0530 | [diff] [blame] | 231 | } |
| 232 | |
| 233 | static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array, |
| 234 | unsigned int index, unsigned int hidx) |
| 235 | { |
Paul Mackerras | 849f86a | 2016-02-22 13:41:15 +1100 | [diff] [blame^] | 236 | hpte_slot_array[index] = (hidx << 1) | 0x1; |
Aneesh Kumar K.V | e34aa03 | 2015-12-01 09:06:53 +0530 | [diff] [blame] | 237 | } |
| 238 | |
| 239 | /* |
| 240 | * |
| 241 | * For core kernel code by design pmd_trans_huge is never run on any hugetlbfs |
| 242 | * page. The hugetlbfs page table walking and mangling paths are totally |
| 243 | * separated form the core VM paths and they're differentiated by |
| 244 | * VM_HUGETLB being set on vm_flags well before any pmd_trans_huge could run. |
| 245 | * |
| 246 | * pmd_trans_huge() is defined as false at build time if |
| 247 | * CONFIG_TRANSPARENT_HUGEPAGE=n to optimize away code blocks at build |
| 248 | * time in such case. |
| 249 | * |
| 250 | * For ppc64 we need to differntiate from explicit hugepages from THP, because |
| 251 | * for THP we also track the subpage details at the pmd level. We don't do |
| 252 | * that for explicit huge pages. |
| 253 | * |
| 254 | */ |
| 255 | static inline int pmd_trans_huge(pmd_t pmd) |
| 256 | { |
Aneesh Kumar K.V | 6a119ea | 2015-12-01 09:06:54 +0530 | [diff] [blame] | 257 | return !!((pmd_val(pmd) & (_PAGE_PTE | _PAGE_THP_HUGE)) == |
| 258 | (_PAGE_PTE | _PAGE_THP_HUGE)); |
Aneesh Kumar K.V | e34aa03 | 2015-12-01 09:06:53 +0530 | [diff] [blame] | 259 | } |
| 260 | |
Aneesh Kumar K.V | e34aa03 | 2015-12-01 09:06:53 +0530 | [diff] [blame] | 261 | static inline int pmd_large(pmd_t pmd) |
| 262 | { |
Aneesh Kumar K.V | 6a119ea | 2015-12-01 09:06:54 +0530 | [diff] [blame] | 263 | return !!(pmd_val(pmd) & _PAGE_PTE); |
Aneesh Kumar K.V | e34aa03 | 2015-12-01 09:06:53 +0530 | [diff] [blame] | 264 | } |
| 265 | |
| 266 | static inline pmd_t pmd_mknotpresent(pmd_t pmd) |
| 267 | { |
| 268 | return __pmd(pmd_val(pmd) & ~_PAGE_PRESENT); |
| 269 | } |
| 270 | |
Aneesh Kumar K.V | e34aa03 | 2015-12-01 09:06:53 +0530 | [diff] [blame] | 271 | #define __HAVE_ARCH_PMD_SAME |
| 272 | static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) |
| 273 | { |
| 274 | return (((pmd_val(pmd_a) ^ pmd_val(pmd_b)) & ~_PAGE_HPTEFLAGS) == 0); |
| 275 | } |
| 276 | |
| 277 | static inline int __pmdp_test_and_clear_young(struct mm_struct *mm, |
| 278 | unsigned long addr, pmd_t *pmdp) |
| 279 | { |
| 280 | unsigned long old; |
| 281 | |
| 282 | if ((pmd_val(*pmdp) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0) |
| 283 | return 0; |
| 284 | old = pmd_hugepage_update(mm, addr, pmdp, _PAGE_ACCESSED, 0); |
| 285 | return ((old & _PAGE_ACCESSED) != 0); |
| 286 | } |
| 287 | |
| 288 | #define __HAVE_ARCH_PMDP_SET_WRPROTECT |
| 289 | static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr, |
| 290 | pmd_t *pmdp) |
| 291 | { |
| 292 | |
| 293 | if ((pmd_val(*pmdp) & _PAGE_RW) == 0) |
| 294 | return; |
| 295 | |
| 296 | pmd_hugepage_update(mm, addr, pmdp, _PAGE_RW, 0); |
| 297 | } |
| 298 | |
| 299 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ |
Paul Mackerras | fa28237 | 2008-01-24 08:35:13 +1100 | [diff] [blame] | 300 | #endif /* __ASSEMBLY__ */ |
Aneesh Kumar K.V | ab537dc | 2015-12-01 09:06:30 +0530 | [diff] [blame] | 301 | |
| 302 | #endif /* _ASM_POWERPC_BOOK3S_64_HASH_64K_H */ |