blob: 6c062f92b9e4b4843f4215cac6112aa3abb486ec [file] [log] [blame]
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +10001/*
2 * Page table handling routines for radix page table.
3 *
4 * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
Ingo Molnar589ee622017-02-04 00:16:44 +010011#include <linux/sched/mm.h>
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +100012#include <linux/memblock.h>
13#include <linux/of_fdt.h>
14
15#include <asm/pgtable.h>
16#include <asm/pgalloc.h>
17#include <asm/dma.h>
18#include <asm/machdep.h>
19#include <asm/mmu.h>
20#include <asm/firmware.h>
Alistair Popple1d0761d2016-12-14 13:36:51 +110021#include <asm/powernv.h>
Michael Ellerman9abcc982017-06-06 15:48:57 +100022#include <asm/sections.h>
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +100023
Aneesh Kumar K.Vbde3eb62016-04-29 23:26:30 +100024#include <trace/events/thp.h>
25
Aneesh Kumar K.V83209bc2016-07-13 15:05:28 +053026static int native_register_process_table(unsigned long base, unsigned long pg_sz,
27 unsigned long table_size)
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +100028{
Aneesh Kumar K.V83209bc2016-07-13 15:05:28 +053029 unsigned long patb1 = base | table_size | PATB_GR;
30
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +100031 partition_tb->patb1 = cpu_to_be64(patb1);
32 return 0;
33}
34
35static __ref void *early_alloc_pgtable(unsigned long size)
36{
37 void *pt;
38
39 pt = __va(memblock_alloc_base(size, size, MEMBLOCK_ALLOC_ANYWHERE));
40 memset(pt, 0, size);
41
42 return pt;
43}
44
45int radix__map_kernel_page(unsigned long ea, unsigned long pa,
46 pgprot_t flags,
47 unsigned int map_page_size)
48{
49 pgd_t *pgdp;
50 pud_t *pudp;
51 pmd_t *pmdp;
52 pte_t *ptep;
53 /*
54 * Make sure task size is correct as per the max adddr
55 */
56 BUILD_BUG_ON(TASK_SIZE_USER64 > RADIX_PGTABLE_RANGE);
57 if (slab_is_available()) {
58 pgdp = pgd_offset_k(ea);
59 pudp = pud_alloc(&init_mm, pgdp, ea);
60 if (!pudp)
61 return -ENOMEM;
62 if (map_page_size == PUD_SIZE) {
63 ptep = (pte_t *)pudp;
64 goto set_the_pte;
65 }
66 pmdp = pmd_alloc(&init_mm, pudp, ea);
67 if (!pmdp)
68 return -ENOMEM;
69 if (map_page_size == PMD_SIZE) {
Reza Arbaba0615a12017-01-25 09:54:33 -060070 ptep = pmdp_ptep(pmdp);
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +100071 goto set_the_pte;
72 }
73 ptep = pte_alloc_kernel(pmdp, ea);
74 if (!ptep)
75 return -ENOMEM;
76 } else {
77 pgdp = pgd_offset_k(ea);
78 if (pgd_none(*pgdp)) {
79 pudp = early_alloc_pgtable(PUD_TABLE_SIZE);
80 BUG_ON(pudp == NULL);
81 pgd_populate(&init_mm, pgdp, pudp);
82 }
83 pudp = pud_offset(pgdp, ea);
84 if (map_page_size == PUD_SIZE) {
85 ptep = (pte_t *)pudp;
86 goto set_the_pte;
87 }
88 if (pud_none(*pudp)) {
89 pmdp = early_alloc_pgtable(PMD_TABLE_SIZE);
90 BUG_ON(pmdp == NULL);
91 pud_populate(&init_mm, pudp, pmdp);
92 }
93 pmdp = pmd_offset(pudp, ea);
94 if (map_page_size == PMD_SIZE) {
Reza Arbaba0615a12017-01-25 09:54:33 -060095 ptep = pmdp_ptep(pmdp);
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +100096 goto set_the_pte;
97 }
98 if (!pmd_present(*pmdp)) {
99 ptep = early_alloc_pgtable(PAGE_SIZE);
100 BUG_ON(ptep == NULL);
101 pmd_populate_kernel(&init_mm, pmdp, ptep);
102 }
103 ptep = pte_offset_kernel(pmdp, ea);
104 }
105
106set_the_pte:
107 set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, flags));
108 smp_wmb();
109 return 0;
110}
111
Reza Arbabb5200ec2017-01-16 13:07:43 -0600112static inline void __meminit print_mapping(unsigned long start,
113 unsigned long end,
114 unsigned long size)
115{
116 if (end <= start)
117 return;
118
119 pr_info("Mapped range 0x%lx - 0x%lx with 0x%lx\n", start, end, size);
120}
121
122static int __meminit create_physical_mapping(unsigned long start,
123 unsigned long end)
124{
Michael Ellerman9abcc982017-06-06 15:48:57 +1000125 unsigned long vaddr, addr, mapping_size = 0;
126 pgprot_t prot;
Reza Arbabb5200ec2017-01-16 13:07:43 -0600127
128 start = _ALIGN_UP(start, PAGE_SIZE);
129 for (addr = start; addr < end; addr += mapping_size) {
130 unsigned long gap, previous_size;
131 int rc;
132
133 gap = end - addr;
134 previous_size = mapping_size;
135
136 if (IS_ALIGNED(addr, PUD_SIZE) && gap >= PUD_SIZE &&
137 mmu_psize_defs[MMU_PAGE_1G].shift)
138 mapping_size = PUD_SIZE;
139 else if (IS_ALIGNED(addr, PMD_SIZE) && gap >= PMD_SIZE &&
140 mmu_psize_defs[MMU_PAGE_2M].shift)
141 mapping_size = PMD_SIZE;
142 else
143 mapping_size = PAGE_SIZE;
144
145 if (mapping_size != previous_size) {
146 print_mapping(start, addr, previous_size);
147 start = addr;
148 }
149
Michael Ellerman9abcc982017-06-06 15:48:57 +1000150 vaddr = (unsigned long)__va(addr);
151
152 if (overlaps_kernel_text(vaddr, vaddr + mapping_size))
153 prot = PAGE_KERNEL_X;
154 else
155 prot = PAGE_KERNEL;
156
157 rc = radix__map_kernel_page(vaddr, addr, prot, mapping_size);
Reza Arbabb5200ec2017-01-16 13:07:43 -0600158 if (rc)
159 return rc;
160 }
161
162 print_mapping(start, addr, mapping_size);
163 return 0;
164}
165
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000166static void __init radix_init_pgtable(void)
167{
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000168 unsigned long rts_field;
169 struct memblock_region *reg;
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000170
171 /* We don't support slb for radix */
172 mmu_slb_size = 0;
173 /*
174 * Create the linear mapping, using standard page size for now
175 */
Reza Arbabb5200ec2017-01-16 13:07:43 -0600176 for_each_memblock(memory, reg)
177 WARN_ON(create_physical_mapping(reg->base,
178 reg->base + reg->size));
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000179 /*
180 * Allocate Partition table and process table for the
181 * host.
182 */
Suraj Jitindar Singh555c1632016-11-09 16:36:33 +1100183 BUILD_BUG_ON_MSG((PRTB_SIZE_SHIFT > 36), "Process table size too large.");
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000184 process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT);
185 /*
186 * Fill in the process table.
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000187 */
Aneesh Kumar K.Vb23d9c52016-06-17 11:40:36 +0530188 rts_field = radix__get_tree_size();
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000189 process_tb->prtb0 = cpu_to_be64(rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE);
190 /*
191 * Fill in the partition table. We are suppose to use effective address
192 * of process table here. But our linear mapping also enable us to use
193 * physical address here.
194 */
Michael Ellermaneea81482016-08-04 15:32:06 +1000195 register_process_table(__pa(process_tb), 0, PRTB_SIZE_SHIFT - 12);
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000196 pr_info("Process table %p and radix root for kernel: %p\n", process_tb, init_mm.pgd);
Paul Mackerras7a70d722017-02-27 14:32:41 +1100197 asm volatile("ptesync" : : : "memory");
198 asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : :
199 "r" (TLBIEL_INVAL_SET_LPID), "r" (0));
200 asm volatile("eieio; tlbsync; ptesync" : : : "memory");
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000201}
202
203static void __init radix_init_partition_table(void)
204{
Paul Mackerras9d661952016-11-21 16:00:58 +1100205 unsigned long rts_field, dw0;
Aneesh Kumar K.Vb23d9c52016-06-17 11:40:36 +0530206
Paul Mackerras9d661952016-11-21 16:00:58 +1100207 mmu_partition_table_init();
Aneesh Kumar K.Vb23d9c52016-06-17 11:40:36 +0530208 rts_field = radix__get_tree_size();
Paul Mackerras9d661952016-11-21 16:00:58 +1100209 dw0 = rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE | PATB_HR;
210 mmu_partition_table_set_entry(0, dw0, 0);
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000211
Aneesh Kumar K.V56547412016-07-13 15:05:25 +0530212 pr_info("Initializing Radix MMU\n");
213 pr_info("Partition table %p\n", partition_tb);
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000214}
215
216void __init radix_init_native(void)
217{
Michael Ellermaneea81482016-08-04 15:32:06 +1000218 register_process_table = native_register_process_table;
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000219}
220
221static int __init get_idx_from_shift(unsigned int shift)
222{
223 int idx = -1;
224
225 switch (shift) {
226 case 0xc:
227 idx = MMU_PAGE_4K;
228 break;
229 case 0x10:
230 idx = MMU_PAGE_64K;
231 break;
232 case 0x15:
233 idx = MMU_PAGE_2M;
234 break;
235 case 0x1e:
236 idx = MMU_PAGE_1G;
237 break;
238 }
239 return idx;
240}
241
242static int __init radix_dt_scan_page_sizes(unsigned long node,
243 const char *uname, int depth,
244 void *data)
245{
246 int size = 0;
247 int shift, idx;
248 unsigned int ap;
249 const __be32 *prop;
250 const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
251
252 /* We are scanning "cpu" nodes only */
253 if (type == NULL || strcmp(type, "cpu") != 0)
254 return 0;
255
256 prop = of_get_flat_dt_prop(node, "ibm,processor-radix-AP-encodings", &size);
257 if (!prop)
258 return 0;
259
260 pr_info("Page sizes from device-tree:\n");
261 for (; size >= 4; size -= 4, ++prop) {
262
263 struct mmu_psize_def *def;
264
265 /* top 3 bit is AP encoding */
266 shift = be32_to_cpu(prop[0]) & ~(0xe << 28);
267 ap = be32_to_cpu(prop[0]) >> 29;
Balbir Singhac8d3812016-11-05 15:24:22 +1100268 pr_info("Page size shift = %d AP=0x%x\n", shift, ap);
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000269
270 idx = get_idx_from_shift(shift);
271 if (idx < 0)
272 continue;
273
274 def = &mmu_psize_defs[idx];
275 def->shift = shift;
276 def->ap = ap;
277 }
278
279 /* needed ? */
280 cur_cpu_spec->mmu_features &= ~MMU_FTR_NO_SLBIE_B;
281 return 1;
282}
283
Michael Ellerman2537b092016-07-26 21:55:27 +1000284void __init radix__early_init_devtree(void)
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000285{
286 int rc;
287
288 /*
289 * Try to find the available page sizes in the device-tree
290 */
291 rc = of_scan_flat_dt(radix_dt_scan_page_sizes, NULL);
292 if (rc != 0) /* Found */
293 goto found;
294 /*
295 * let's assume we have page 4k and 64k support
296 */
297 mmu_psize_defs[MMU_PAGE_4K].shift = 12;
298 mmu_psize_defs[MMU_PAGE_4K].ap = 0x0;
299
300 mmu_psize_defs[MMU_PAGE_64K].shift = 16;
301 mmu_psize_defs[MMU_PAGE_64K].ap = 0x5;
302found:
303#ifdef CONFIG_SPARSEMEM_VMEMMAP
304 if (mmu_psize_defs[MMU_PAGE_2M].shift) {
305 /*
306 * map vmemmap using 2M if available
307 */
308 mmu_vmemmap_psize = MMU_PAGE_2M;
309 }
310#endif /* CONFIG_SPARSEMEM_VMEMMAP */
311 return;
312}
313
Aneesh Kumar K.Vad410672016-08-24 15:03:39 +0530314static void update_hid_for_radix(void)
315{
316 unsigned long hid0;
317 unsigned long rb = 3UL << PPC_BITLSHIFT(53); /* IS = 3 */
318
319 asm volatile("ptesync": : :"memory");
320 /* prs = 0, ric = 2, rs = 0, r = 1 is = 3 */
321 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
322 : : "r"(rb), "i"(1), "i"(0), "i"(2), "r"(0) : "memory");
323 /* prs = 1, ric = 2, rs = 0, r = 1 is = 3 */
324 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
325 : : "r"(rb), "i"(1), "i"(1), "i"(2), "r"(0) : "memory");
326 asm volatile("eieio; tlbsync; ptesync; isync; slbia": : :"memory");
327 /*
328 * now switch the HID
329 */
330 hid0 = mfspr(SPRN_HID0);
331 hid0 |= HID0_POWER9_RADIX;
332 mtspr(SPRN_HID0, hid0);
333 asm volatile("isync": : :"memory");
334
335 /* Wait for it to happen */
336 while (!(mfspr(SPRN_HID0) & HID0_POWER9_RADIX))
337 cpu_relax();
338}
339
Balbir Singhee97b6b2016-11-15 17:56:14 +1100340static void radix_init_amor(void)
341{
342 /*
343 * In HV mode, we init AMOR (Authority Mask Override Register) so that
344 * the hypervisor and guest can setup IAMR (Instruction Authority Mask
345 * Register), enable key 0 and set it to 1.
346 *
347 * AMOR = 0b1100 .... 0000 (Mask for key 0 is 11)
348 */
349 mtspr(SPRN_AMOR, (3ul << 62));
350}
351
Balbir Singh3b10d002016-11-15 17:56:16 +1100352static void radix_init_iamr(void)
353{
354 unsigned long iamr;
355
356 /*
357 * The IAMR should set to 0 on DD1.
358 */
359 if (cpu_has_feature(CPU_FTR_POWER9_DD1))
360 iamr = 0;
361 else
362 iamr = (1ul << 62);
363
364 /*
365 * Radix always uses key0 of the IAMR to determine if an access is
366 * allowed. We set bit 0 (IBM bit 1) of key0, to prevent instruction
367 * fetch.
368 */
369 mtspr(SPRN_IAMR, iamr);
370}
371
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000372void __init radix__early_init_mmu(void)
373{
374 unsigned long lpcr;
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000375
376#ifdef CONFIG_PPC_64K_PAGES
377 /* PAGE_SIZE mappings */
378 mmu_virtual_psize = MMU_PAGE_64K;
379#else
380 mmu_virtual_psize = MMU_PAGE_4K;
381#endif
382
383#ifdef CONFIG_SPARSEMEM_VMEMMAP
384 /* vmemmap mapping */
385 mmu_vmemmap_psize = mmu_virtual_psize;
386#endif
387 /*
388 * initialize page table size
389 */
390 __pte_index_size = RADIX_PTE_INDEX_SIZE;
391 __pmd_index_size = RADIX_PMD_INDEX_SIZE;
392 __pud_index_size = RADIX_PUD_INDEX_SIZE;
393 __pgd_index_size = RADIX_PGD_INDEX_SIZE;
394 __pmd_cache_index = RADIX_PMD_INDEX_SIZE;
395 __pte_table_size = RADIX_PTE_TABLE_SIZE;
396 __pmd_table_size = RADIX_PMD_TABLE_SIZE;
397 __pud_table_size = RADIX_PUD_TABLE_SIZE;
398 __pgd_table_size = RADIX_PGD_TABLE_SIZE;
399
Aneesh Kumar K.Va2f41eb2016-04-29 23:26:19 +1000400 __pmd_val_bits = RADIX_PMD_VAL_BITS;
401 __pud_val_bits = RADIX_PUD_VAL_BITS;
402 __pgd_val_bits = RADIX_PGD_VAL_BITS;
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000403
Aneesh Kumar K.Vd6a99962016-04-29 23:26:21 +1000404 __kernel_virt_start = RADIX_KERN_VIRT_START;
405 __kernel_virt_size = RADIX_KERN_VIRT_SIZE;
406 __vmalloc_start = RADIX_VMALLOC_START;
407 __vmalloc_end = RADIX_VMALLOC_END;
408 vmemmap = (struct page *)RADIX_VMEMMAP_BASE;
409 ioremap_bot = IOREMAP_BASE;
Darren Stevensbfa37082016-06-29 21:06:28 +0100410
411#ifdef CONFIG_PCI
412 pci_io_base = ISA_IO_BASE;
413#endif
414
Aneesh Kumar K.V5ed7ecd2016-04-29 23:26:23 +1000415 /*
416 * For now radix also use the same frag size
417 */
418 __pte_frag_nr = H_PTE_FRAG_NR;
419 __pte_frag_size_shift = H_PTE_FRAG_SIZE_SHIFT;
Aneesh Kumar K.Vd6a99962016-04-29 23:26:21 +1000420
Aneesh Kumar K.Vd6c88602016-05-31 11:56:29 +0530421 if (!firmware_has_feature(FW_FEATURE_LPAR)) {
Benjamin Herrenschmidt166dd7d2016-07-05 15:03:51 +1000422 radix_init_native();
Aneesh Kumar K.Vad410672016-08-24 15:03:39 +0530423 if (cpu_has_feature(CPU_FTR_POWER9_DD1))
424 update_hid_for_radix();
Aneesh Kumar K.Vd6c88602016-05-31 11:56:29 +0530425 lpcr = mfspr(SPRN_LPCR);
Aneesh Kumar K.Vbf16cdf2016-07-13 15:05:21 +0530426 mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000427 radix_init_partition_table();
Balbir Singhee97b6b2016-11-15 17:56:14 +1100428 radix_init_amor();
Paul Mackerrascc3d2942017-01-30 21:21:36 +1100429 } else {
430 radix_init_pseries();
Aneesh Kumar K.Vd6c88602016-05-31 11:56:29 +0530431 }
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000432
Paul Mackerras9d661952016-11-21 16:00:58 +1100433 memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
434
Balbir Singh3b10d002016-11-15 17:56:16 +1100435 radix_init_iamr();
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000436 radix_init_pgtable();
437}
438
439void radix__early_init_mmu_secondary(void)
440{
441 unsigned long lpcr;
442 /*
Aneesh Kumar K.Vd6c88602016-05-31 11:56:29 +0530443 * update partition table control register and UPRT
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000444 */
Aneesh Kumar K.Vd6c88602016-05-31 11:56:29 +0530445 if (!firmware_has_feature(FW_FEATURE_LPAR)) {
Aneesh Kumar K.Vcac4a182016-11-17 15:46:23 +0530446
447 if (cpu_has_feature(CPU_FTR_POWER9_DD1))
448 update_hid_for_radix();
449
Aneesh Kumar K.Vd6c88602016-05-31 11:56:29 +0530450 lpcr = mfspr(SPRN_LPCR);
Aneesh Kumar K.Vbf16cdf2016-07-13 15:05:21 +0530451 mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
Aneesh Kumar K.Vd6c88602016-05-31 11:56:29 +0530452
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000453 mtspr(SPRN_PTCR,
454 __pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
Balbir Singhee97b6b2016-11-15 17:56:14 +1100455 radix_init_amor();
Aneesh Kumar K.Vd6c88602016-05-31 11:56:29 +0530456 }
Balbir Singh3b10d002016-11-15 17:56:16 +1100457 radix_init_iamr();
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000458}
459
Benjamin Herrenschmidtfe036a02016-08-19 14:22:37 +0530460void radix__mmu_cleanup_all(void)
461{
462 unsigned long lpcr;
463
464 if (!firmware_has_feature(FW_FEATURE_LPAR)) {
465 lpcr = mfspr(SPRN_LPCR);
466 mtspr(SPRN_LPCR, lpcr & ~LPCR_UPRT);
467 mtspr(SPRN_PTCR, 0);
Alistair Popple1d0761d2016-12-14 13:36:51 +1100468 powernv_set_nmmu_ptcr(0);
Benjamin Herrenschmidtfe036a02016-08-19 14:22:37 +0530469 radix__flush_tlb_all();
470 }
471}
472
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000473void radix__setup_initial_memory_limit(phys_addr_t first_memblock_base,
474 phys_addr_t first_memblock_size)
475{
Aneesh Kumar K.V177ba7c2016-04-29 23:26:10 +1000476 /* We don't currently support the first MEMBLOCK not mapping 0
477 * physical on those processors
478 */
479 BUG_ON(first_memblock_base != 0);
480 /*
481 * We limit the allocation that depend on ppc64_rma_size
482 * to first_memblock_size. We also clamp it to 1GB to
483 * avoid some funky things such as RTAS bugs.
484 *
485 * On radix config we really don't have a limitation
486 * on real mode access. But keeping it as above works
487 * well enough.
488 */
489 ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000);
490 /*
491 * Finally limit subsequent allocations. We really don't want
492 * to limit the memblock allocations to rma_size. FIXME!! should
493 * we even limit at all ?
494 */
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000495 memblock_set_current_limit(first_memblock_base + first_memblock_size);
496}
Aneesh Kumar K.Vd9225ad2016-04-29 23:26:00 +1000497
Reza Arbab6cc27342017-01-16 13:07:44 -0600498#ifdef CONFIG_MEMORY_HOTPLUG
Reza Arbab4b5d62c2017-01-16 13:07:45 -0600499static void free_pte_table(pte_t *pte_start, pmd_t *pmd)
500{
501 pte_t *pte;
502 int i;
503
504 for (i = 0; i < PTRS_PER_PTE; i++) {
505 pte = pte_start + i;
506 if (!pte_none(*pte))
507 return;
508 }
509
510 pte_free_kernel(&init_mm, pte_start);
511 pmd_clear(pmd);
512}
513
514static void free_pmd_table(pmd_t *pmd_start, pud_t *pud)
515{
516 pmd_t *pmd;
517 int i;
518
519 for (i = 0; i < PTRS_PER_PMD; i++) {
520 pmd = pmd_start + i;
521 if (!pmd_none(*pmd))
522 return;
523 }
524
525 pmd_free(&init_mm, pmd_start);
526 pud_clear(pud);
527}
528
529static void remove_pte_table(pte_t *pte_start, unsigned long addr,
530 unsigned long end)
531{
532 unsigned long next;
533 pte_t *pte;
534
535 pte = pte_start + pte_index(addr);
536 for (; addr < end; addr = next, pte++) {
537 next = (addr + PAGE_SIZE) & PAGE_MASK;
538 if (next > end)
539 next = end;
540
541 if (!pte_present(*pte))
542 continue;
543
Reza Arbab0d0a4bc2017-01-16 13:07:46 -0600544 if (!PAGE_ALIGNED(addr) || !PAGE_ALIGNED(next)) {
545 /*
546 * The vmemmap_free() and remove_section_mapping()
547 * codepaths call us with aligned addresses.
548 */
549 WARN_ONCE(1, "%s: unaligned range\n", __func__);
550 continue;
551 }
552
Reza Arbab4b5d62c2017-01-16 13:07:45 -0600553 pte_clear(&init_mm, addr, pte);
554 }
555}
556
557static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
558 unsigned long end)
559{
560 unsigned long next;
561 pte_t *pte_base;
562 pmd_t *pmd;
563
564 pmd = pmd_start + pmd_index(addr);
565 for (; addr < end; addr = next, pmd++) {
566 next = pmd_addr_end(addr, end);
567
568 if (!pmd_present(*pmd))
569 continue;
570
571 if (pmd_huge(*pmd)) {
Reza Arbab0d0a4bc2017-01-16 13:07:46 -0600572 if (!IS_ALIGNED(addr, PMD_SIZE) ||
573 !IS_ALIGNED(next, PMD_SIZE)) {
574 WARN_ONCE(1, "%s: unaligned range\n", __func__);
575 continue;
576 }
577
Reza Arbab4b5d62c2017-01-16 13:07:45 -0600578 pte_clear(&init_mm, addr, (pte_t *)pmd);
579 continue;
580 }
581
582 pte_base = (pte_t *)pmd_page_vaddr(*pmd);
583 remove_pte_table(pte_base, addr, next);
584 free_pte_table(pte_base, pmd);
585 }
586}
587
588static void remove_pud_table(pud_t *pud_start, unsigned long addr,
589 unsigned long end)
590{
591 unsigned long next;
592 pmd_t *pmd_base;
593 pud_t *pud;
594
595 pud = pud_start + pud_index(addr);
596 for (; addr < end; addr = next, pud++) {
597 next = pud_addr_end(addr, end);
598
599 if (!pud_present(*pud))
600 continue;
601
602 if (pud_huge(*pud)) {
Reza Arbab0d0a4bc2017-01-16 13:07:46 -0600603 if (!IS_ALIGNED(addr, PUD_SIZE) ||
604 !IS_ALIGNED(next, PUD_SIZE)) {
605 WARN_ONCE(1, "%s: unaligned range\n", __func__);
606 continue;
607 }
608
Reza Arbab4b5d62c2017-01-16 13:07:45 -0600609 pte_clear(&init_mm, addr, (pte_t *)pud);
610 continue;
611 }
612
613 pmd_base = (pmd_t *)pud_page_vaddr(*pud);
614 remove_pmd_table(pmd_base, addr, next);
615 free_pmd_table(pmd_base, pud);
616 }
617}
618
619static void remove_pagetable(unsigned long start, unsigned long end)
620{
621 unsigned long addr, next;
622 pud_t *pud_base;
623 pgd_t *pgd;
624
625 spin_lock(&init_mm.page_table_lock);
626
627 for (addr = start; addr < end; addr = next) {
628 next = pgd_addr_end(addr, end);
629
630 pgd = pgd_offset_k(addr);
631 if (!pgd_present(*pgd))
632 continue;
633
634 if (pgd_huge(*pgd)) {
Reza Arbab0d0a4bc2017-01-16 13:07:46 -0600635 if (!IS_ALIGNED(addr, PGDIR_SIZE) ||
636 !IS_ALIGNED(next, PGDIR_SIZE)) {
637 WARN_ONCE(1, "%s: unaligned range\n", __func__);
638 continue;
639 }
640
Reza Arbab4b5d62c2017-01-16 13:07:45 -0600641 pte_clear(&init_mm, addr, (pte_t *)pgd);
642 continue;
643 }
644
645 pud_base = (pud_t *)pgd_page_vaddr(*pgd);
646 remove_pud_table(pud_base, addr, next);
647 }
648
649 spin_unlock(&init_mm.page_table_lock);
650 radix__flush_tlb_kernel_range(start, end);
651}
652
Reza Arbab6cc27342017-01-16 13:07:44 -0600653int __ref radix__create_section_mapping(unsigned long start, unsigned long end)
654{
655 return create_physical_mapping(start, end);
656}
Reza Arbab4b5d62c2017-01-16 13:07:45 -0600657
658int radix__remove_section_mapping(unsigned long start, unsigned long end)
659{
660 remove_pagetable(start, end);
661 return 0;
662}
Reza Arbab6cc27342017-01-16 13:07:44 -0600663#endif /* CONFIG_MEMORY_HOTPLUG */
664
Aneesh Kumar K.Vd9225ad2016-04-29 23:26:00 +1000665#ifdef CONFIG_SPARSEMEM_VMEMMAP
666int __meminit radix__vmemmap_create_mapping(unsigned long start,
667 unsigned long page_size,
668 unsigned long phys)
669{
670 /* Create a PTE encoding */
671 unsigned long flags = _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_KERNEL_RW;
672
673 BUG_ON(radix__map_kernel_page(start, phys, __pgprot(flags), page_size));
674 return 0;
675}
676
677#ifdef CONFIG_MEMORY_HOTPLUG
678void radix__vmemmap_remove_mapping(unsigned long start, unsigned long page_size)
679{
Reza Arbab0d0a4bc2017-01-16 13:07:46 -0600680 remove_pagetable(start, start + page_size);
Aneesh Kumar K.Vd9225ad2016-04-29 23:26:00 +1000681}
682#endif
683#endif
Aneesh Kumar K.Vbde3eb62016-04-29 23:26:30 +1000684
685#ifdef CONFIG_TRANSPARENT_HUGEPAGE
686
687unsigned long radix__pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
688 pmd_t *pmdp, unsigned long clr,
689 unsigned long set)
690{
691 unsigned long old;
692
693#ifdef CONFIG_DEBUG_VM
694 WARN_ON(!radix__pmd_trans_huge(*pmdp));
695 assert_spin_locked(&mm->page_table_lock);
696#endif
697
698 old = radix__pte_update(mm, addr, (pte_t *)pmdp, clr, set, 1);
699 trace_hugepage_update(addr, old, clr, set);
700
701 return old;
702}
703
704pmd_t radix__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address,
705 pmd_t *pmdp)
706
707{
708 pmd_t pmd;
709
710 VM_BUG_ON(address & ~HPAGE_PMD_MASK);
711 VM_BUG_ON(radix__pmd_trans_huge(*pmdp));
712 /*
713 * khugepaged calls this for normal pmd
714 */
715 pmd = *pmdp;
716 pmd_clear(pmdp);
717 /*FIXME!! Verify whether we need this kick below */
718 kick_all_cpus_sync();
719 flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
720 return pmd;
721}
722
723/*
724 * For us pgtable_t is pte_t *. Inorder to save the deposisted
725 * page table, we consider the allocated page table as a list
726 * head. On withdraw we need to make sure we zero out the used
727 * list_head memory area.
728 */
729void radix__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
730 pgtable_t pgtable)
731{
732 struct list_head *lh = (struct list_head *) pgtable;
733
734 assert_spin_locked(pmd_lockptr(mm, pmdp));
735
736 /* FIFO */
737 if (!pmd_huge_pte(mm, pmdp))
738 INIT_LIST_HEAD(lh);
739 else
740 list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
741 pmd_huge_pte(mm, pmdp) = pgtable;
742}
743
744pgtable_t radix__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
745{
746 pte_t *ptep;
747 pgtable_t pgtable;
748 struct list_head *lh;
749
750 assert_spin_locked(pmd_lockptr(mm, pmdp));
751
752 /* FIFO */
753 pgtable = pmd_huge_pte(mm, pmdp);
754 lh = (struct list_head *) pgtable;
755 if (list_empty(lh))
756 pmd_huge_pte(mm, pmdp) = NULL;
757 else {
758 pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
759 list_del(lh);
760 }
761 ptep = (pte_t *) pgtable;
762 *ptep = __pte(0);
763 ptep++;
764 *ptep = __pte(0);
765 return pgtable;
766}
767
768
769pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm,
770 unsigned long addr, pmd_t *pmdp)
771{
772 pmd_t old_pmd;
773 unsigned long old;
774
775 old = radix__pmd_hugepage_update(mm, addr, pmdp, ~0UL, 0);
776 old_pmd = __pmd(old);
777 /*
778 * Serialize against find_linux_pte_or_hugepte which does lock-less
779 * lookup in page tables with local interrupts disabled. For huge pages
780 * it casts pmd_t to pte_t. Since format of pte_t is different from
781 * pmd_t we want to prevent transit from pmd pointing to page table
782 * to pmd pointing to huge page (and back) while interrupts are disabled.
783 * We clear pmd to possibly replace it with page table pointer in
784 * different code paths. So make sure we wait for the parallel
785 * find_linux_pte_or_hugepage to finish.
786 */
787 kick_all_cpus_sync();
788 return old_pmd;
789}
790
791int radix__has_transparent_hugepage(void)
792{
793 /* For radix 2M at PMD level means thp */
794 if (mmu_psize_defs[MMU_PAGE_2M].shift == PMD_SHIFT)
795 return 1;
796 return 0;
797}
798#endif /* CONFIG_TRANSPARENT_HUGEPAGE */