blob: 12522dbae615d9f984c4b7fba7ed168719648a39 [file] [log] [blame]
Dave Hansen8f0baad2017-08-30 16:23:00 -07001#include <linux/bug.h>
Richard Fellner13be4482017-05-04 14:26:50 +02002#include <linux/kernel.h>
3#include <linux/errno.h>
4#include <linux/string.h>
5#include <linux/types.h>
6#include <linux/bug.h>
7#include <linux/init.h>
Dave Hansen8f0baad2017-08-30 16:23:00 -07008#include <linux/interrupt.h>
Richard Fellner13be4482017-05-04 14:26:50 +02009#include <linux/spinlock.h>
10#include <linux/mm.h>
Richard Fellner13be4482017-05-04 14:26:50 +020011#include <linux/uaccess.h>
Josh Poimboeuf17092842019-04-12 15:39:29 -050012#include <linux/cpu.h>
Dave Hansen8f0baad2017-08-30 16:23:00 -070013
Kees Cookea6cd392018-01-03 10:18:01 -080014#undef pr_fmt
15#define pr_fmt(fmt) "Kernel/User page tables isolation: " fmt
16
Dave Hansen8f0baad2017-08-30 16:23:00 -070017#include <asm/kaiser.h>
Hugh Dickins0b5ca9d2017-08-17 15:00:37 -070018#include <asm/tlbflush.h> /* to verify its kaiser declarations */
Richard Fellner13be4482017-05-04 14:26:50 +020019#include <asm/pgtable.h>
20#include <asm/pgalloc.h>
21#include <asm/desc.h>
Borislav Petkov80183072018-01-02 14:19:48 +010022#include <asm/cmdline.h>
Borislav Petkovbeca4e22018-01-04 17:42:45 +010023#include <asm/vsyscall.h>
Nicolai Stange8574df12018-07-29 12:15:33 +020024#include <asm/sections.h>
Richard Fellner13be4482017-05-04 14:26:50 +020025
Hugh Dickins23e09432017-09-24 16:59:49 -070026int kaiser_enabled __read_mostly = 1;
27EXPORT_SYMBOL(kaiser_enabled); /* for inlined TLB flush functions */
28
Hugh Dickins0b5ca9d2017-08-17 15:00:37 -070029__visible
30DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
31
32/*
33 * These can have bit 63 set, so we can not just use a plain "or"
34 * instruction to get their value or'd into CR3. It would take
35 * another register. So, we use a memory reference to these instead.
36 *
37 * This is also handy because systems that do not support PCIDs
38 * just end up or'ing a 0 into their CR3, which does no harm.
39 */
Hugh Dickinsd0142ce2017-08-27 16:24:27 -070040DEFINE_PER_CPU(unsigned long, x86_cr3_pcid_user);
Hugh Dickins0b5ca9d2017-08-17 15:00:37 -070041
Dave Hansen8f0baad2017-08-30 16:23:00 -070042/*
43 * At runtime, the only things we map are some things for CPU
44 * hotplug, and stacks for new processes. No two CPUs will ever
45 * be populating the same addresses, so we only need to ensure
46 * that we protect between two CPUs trying to allocate and
47 * populate the same page table page.
48 *
49 * Only take this lock when doing a set_p[4um]d(), but it is not
50 * needed for doing a set_pte(). We assume that only the *owner*
51 * of a given allocation will be doing this for _their_
52 * allocation.
53 *
54 * This ensures that once a system has been running for a while
55 * and there have been stacks all over and these page tables
56 * are fully populated, there will be no further acquisitions of
57 * this lock.
Richard Fellner13be4482017-05-04 14:26:50 +020058 */
Dave Hansen8f0baad2017-08-30 16:23:00 -070059static DEFINE_SPINLOCK(shadow_table_allocation_lock);
60
61/*
62 * Returns -1 on error.
63 */
64static inline unsigned long get_pa_from_mapping(unsigned long vaddr)
Richard Fellner13be4482017-05-04 14:26:50 +020065{
66 pgd_t *pgd;
67 pud_t *pud;
68 pmd_t *pmd;
69 pte_t *pte;
70
Dave Hansen8f0baad2017-08-30 16:23:00 -070071 pgd = pgd_offset_k(vaddr);
72 /*
73 * We made all the kernel PGDs present in kaiser_init().
74 * We expect them to stay that way.
75 */
76 BUG_ON(pgd_none(*pgd));
77 /*
78 * PGDs are either 512GB or 128TB on all x86_64
79 * configurations. We don't handle these.
80 */
81 BUG_ON(pgd_large(*pgd));
82
83 pud = pud_offset(pgd, vaddr);
84 if (pud_none(*pud)) {
85 WARN_ON_ONCE(1);
86 return -1;
87 }
88
89 if (pud_large(*pud))
90 return (pud_pfn(*pud) << PAGE_SHIFT) | (vaddr & ~PUD_PAGE_MASK);
91
92 pmd = pmd_offset(pud, vaddr);
93 if (pmd_none(*pmd)) {
94 WARN_ON_ONCE(1);
95 return -1;
96 }
97
98 if (pmd_large(*pmd))
99 return (pmd_pfn(*pmd) << PAGE_SHIFT) | (vaddr & ~PMD_PAGE_MASK);
100
101 pte = pte_offset_kernel(pmd, vaddr);
102 if (pte_none(*pte)) {
103 WARN_ON_ONCE(1);
104 return -1;
105 }
106
107 return (pte_pfn(*pte) << PAGE_SHIFT) | (vaddr & ~PAGE_MASK);
108}
109
110/*
111 * This is a relatively normal page table walk, except that it
112 * also tries to allocate page tables pages along the way.
113 *
114 * Returns a pointer to a PTE on success, or NULL on failure.
115 */
Borislav Petkovbeca4e22018-01-04 17:42:45 +0100116static pte_t *kaiser_pagetable_walk(unsigned long address, bool user)
Dave Hansen8f0baad2017-08-30 16:23:00 -0700117{
118 pmd_t *pmd;
119 pud_t *pud;
120 pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(address));
121 gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
Borislav Petkovbeca4e22018-01-04 17:42:45 +0100122 unsigned long prot = _KERNPG_TABLE;
Dave Hansen8f0baad2017-08-30 16:23:00 -0700123
Dave Hansen8f0baad2017-08-30 16:23:00 -0700124 if (pgd_none(*pgd)) {
125 WARN_ONCE(1, "All shadow pgds should have been populated");
126 return NULL;
127 }
128 BUILD_BUG_ON(pgd_large(*pgd) != 0);
Richard Fellner13be4482017-05-04 14:26:50 +0200129
Borislav Petkovbeca4e22018-01-04 17:42:45 +0100130 if (user) {
131 /*
132 * The vsyscall page is the only page that will have
133 * _PAGE_USER set. Catch everything else.
134 */
135 BUG_ON(address != VSYSCALL_ADDR);
136
137 set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER));
138 prot = _PAGE_TABLE;
139 }
140
Richard Fellner13be4482017-05-04 14:26:50 +0200141 pud = pud_offset(pgd, address);
Dave Hansen8f0baad2017-08-30 16:23:00 -0700142 /* The shadow page tables do not use large mappings: */
Richard Fellner13be4482017-05-04 14:26:50 +0200143 if (pud_large(*pud)) {
Dave Hansen8f0baad2017-08-30 16:23:00 -0700144 WARN_ON(1);
145 return NULL;
146 }
147 if (pud_none(*pud)) {
148 unsigned long new_pmd_page = __get_free_page(gfp);
149 if (!new_pmd_page)
150 return NULL;
151 spin_lock(&shadow_table_allocation_lock);
Hugh Dickins1972bb92017-09-09 21:27:32 -0700152 if (pud_none(*pud)) {
Borislav Petkovbeca4e22018-01-04 17:42:45 +0100153 set_pud(pud, __pud(prot | __pa(new_pmd_page)));
Hugh Dickins1972bb92017-09-09 21:27:32 -0700154 __inc_zone_page_state(virt_to_page((void *)
155 new_pmd_page), NR_KAISERTABLE);
156 } else
Dave Hansen8f0baad2017-08-30 16:23:00 -0700157 free_page(new_pmd_page);
158 spin_unlock(&shadow_table_allocation_lock);
Richard Fellner13be4482017-05-04 14:26:50 +0200159 }
160
161 pmd = pmd_offset(pud, address);
Dave Hansen8f0baad2017-08-30 16:23:00 -0700162 /* The shadow page tables do not use large mappings: */
Richard Fellner13be4482017-05-04 14:26:50 +0200163 if (pmd_large(*pmd)) {
Dave Hansen8f0baad2017-08-30 16:23:00 -0700164 WARN_ON(1);
165 return NULL;
166 }
167 if (pmd_none(*pmd)) {
168 unsigned long new_pte_page = __get_free_page(gfp);
169 if (!new_pte_page)
170 return NULL;
171 spin_lock(&shadow_table_allocation_lock);
Hugh Dickins1972bb92017-09-09 21:27:32 -0700172 if (pmd_none(*pmd)) {
Borislav Petkovbeca4e22018-01-04 17:42:45 +0100173 set_pmd(pmd, __pmd(prot | __pa(new_pte_page)));
Hugh Dickins1972bb92017-09-09 21:27:32 -0700174 __inc_zone_page_state(virt_to_page((void *)
175 new_pte_page), NR_KAISERTABLE);
176 } else
Dave Hansen8f0baad2017-08-30 16:23:00 -0700177 free_page(new_pte_page);
178 spin_unlock(&shadow_table_allocation_lock);
Richard Fellner13be4482017-05-04 14:26:50 +0200179 }
180
Dave Hansen8f0baad2017-08-30 16:23:00 -0700181 return pte_offset_kernel(pmd, address);
Richard Fellner13be4482017-05-04 14:26:50 +0200182}
183
Hugh Dickins23e09432017-09-24 16:59:49 -0700184static int kaiser_add_user_map(const void *__start_addr, unsigned long size,
185 unsigned long flags)
Richard Fellner13be4482017-05-04 14:26:50 +0200186{
Dave Hansen8f0baad2017-08-30 16:23:00 -0700187 int ret = 0;
Richard Fellner13be4482017-05-04 14:26:50 +0200188 pte_t *pte;
Dave Hansen8f0baad2017-08-30 16:23:00 -0700189 unsigned long start_addr = (unsigned long )__start_addr;
190 unsigned long address = start_addr & PAGE_MASK;
191 unsigned long end_addr = PAGE_ALIGN(start_addr + size);
Richard Fellner13be4482017-05-04 14:26:50 +0200192 unsigned long target_address;
193
Hugh Dickins23e09432017-09-24 16:59:49 -0700194 /*
195 * It is convenient for callers to pass in __PAGE_KERNEL etc,
196 * and there is no actual harm from setting _PAGE_GLOBAL, so
197 * long as CR4.PGE is not set. But it is nonetheless troubling
198 * to see Kaiser itself setting _PAGE_GLOBAL (now that "nokaiser"
199 * requires that not to be #defined to 0): so mask it off here.
200 */
201 flags &= ~_PAGE_GLOBAL;
Lepton Wuec61baf2018-01-12 13:42:56 -0800202 if (!(__supported_pte_mask & _PAGE_NX))
203 flags &= ~_PAGE_NX;
Hugh Dickins23e09432017-09-24 16:59:49 -0700204
Hugh Dickinsf43f3862017-09-03 18:48:02 -0700205 for (; address < end_addr; address += PAGE_SIZE) {
Richard Fellner13be4482017-05-04 14:26:50 +0200206 target_address = get_pa_from_mapping(address);
Dave Hansen8f0baad2017-08-30 16:23:00 -0700207 if (target_address == -1) {
208 ret = -EIO;
209 break;
Richard Fellner13be4482017-05-04 14:26:50 +0200210 }
Borislav Petkovbeca4e22018-01-04 17:42:45 +0100211 pte = kaiser_pagetable_walk(address, flags & _PAGE_USER);
Hugh Dickinsf43f3862017-09-03 18:48:02 -0700212 if (!pte) {
213 ret = -ENOMEM;
214 break;
215 }
Richard Fellner13be4482017-05-04 14:26:50 +0200216 if (pte_none(*pte)) {
217 set_pte(pte, __pte(flags | target_address));
218 } else {
Dave Hansen8f0baad2017-08-30 16:23:00 -0700219 pte_t tmp;
220 set_pte(&tmp, __pte(flags | target_address));
221 WARN_ON_ONCE(!pte_same(*pte, tmp));
Richard Fellner13be4482017-05-04 14:26:50 +0200222 }
223 }
Dave Hansen8f0baad2017-08-30 16:23:00 -0700224 return ret;
Richard Fellner13be4482017-05-04 14:26:50 +0200225}
226
Dave Hansen8f0baad2017-08-30 16:23:00 -0700227static int kaiser_add_user_map_ptrs(const void *start, const void *end, unsigned long flags)
228{
229 unsigned long size = end - start;
230
231 return kaiser_add_user_map(start, size, flags);
232}
233
234/*
235 * Ensure that the top level of the (shadow) page tables are
236 * entirely populated. This ensures that all processes that get
237 * forked have the same entries. This way, we do not have to
238 * ever go set up new entries in older processes.
239 *
240 * Note: we never free these, so there are no updates to them
241 * after this.
242 */
243static void __init kaiser_init_all_pgds(void)
Richard Fellner13be4482017-05-04 14:26:50 +0200244{
245 pgd_t *pgd;
246 int i = 0;
247
248 pgd = native_get_shadow_pgd(pgd_offset_k((unsigned long )0));
249 for (i = PTRS_PER_PGD / 2; i < PTRS_PER_PGD; i++) {
Dave Hansen8f0baad2017-08-30 16:23:00 -0700250 pgd_t new_pgd;
Hugh Dickins1972bb92017-09-09 21:27:32 -0700251 pud_t *pud = pud_alloc_one(&init_mm,
252 PAGE_OFFSET + i * PGDIR_SIZE);
Dave Hansen8f0baad2017-08-30 16:23:00 -0700253 if (!pud) {
254 WARN_ON(1);
255 break;
256 }
Hugh Dickins1972bb92017-09-09 21:27:32 -0700257 inc_zone_page_state(virt_to_page(pud), NR_KAISERTABLE);
Dave Hansen8f0baad2017-08-30 16:23:00 -0700258 new_pgd = __pgd(_KERNPG_TABLE |__pa(pud));
259 /*
260 * Make sure not to stomp on some other pgd entry.
261 */
262 if (!pgd_none(pgd[i])) {
263 WARN_ON(1);
264 continue;
265 }
266 set_pgd(pgd + i, new_pgd);
Richard Fellner13be4482017-05-04 14:26:50 +0200267 }
268}
269
Dave Hansen8f0baad2017-08-30 16:23:00 -0700270#define kaiser_add_user_map_early(start, size, flags) do { \
271 int __ret = kaiser_add_user_map(start, size, flags); \
272 WARN_ON(__ret); \
273} while (0)
274
275#define kaiser_add_user_map_ptrs_early(start, end, flags) do { \
276 int __ret = kaiser_add_user_map_ptrs(start, end, flags); \
277 WARN_ON(__ret); \
278} while (0)
279
Borislav Petkov80183072018-01-02 14:19:48 +0100280void __init kaiser_check_boottime_disable(void)
281{
282 bool enable = true;
283 char arg[5];
284 int ret;
285
Jiri Kosina402e63d2018-01-02 14:19:49 +0100286 if (boot_cpu_has(X86_FEATURE_XENPV))
287 goto silent_disable;
288
Borislav Petkov80183072018-01-02 14:19:48 +0100289 ret = cmdline_find_option(boot_command_line, "pti", arg, sizeof(arg));
290 if (ret > 0) {
291 if (!strncmp(arg, "on", 2))
292 goto enable;
293
294 if (!strncmp(arg, "off", 3))
295 goto disable;
296
297 if (!strncmp(arg, "auto", 4))
298 goto skip;
299 }
300
Josh Poimboeuf17092842019-04-12 15:39:29 -0500301 if (cmdline_find_option_bool(boot_command_line, "nopti") ||
302 cpu_mitigations_off())
Borislav Petkov80183072018-01-02 14:19:48 +0100303 goto disable;
304
305skip:
306 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
307 goto disable;
308
309enable:
310 if (enable)
311 setup_force_cpu_cap(X86_FEATURE_KAISER);
312
313 return;
314
315disable:
Kees Cookea6cd392018-01-03 10:18:01 -0800316 pr_info("disabled\n");
Jiri Kosina402e63d2018-01-02 14:19:49 +0100317
318silent_disable:
Borislav Petkov80183072018-01-02 14:19:48 +0100319 kaiser_enabled = 0;
320 setup_clear_cpu_cap(X86_FEATURE_KAISER);
321}
322
Dave Hansen8f0baad2017-08-30 16:23:00 -0700323/*
324 * If anything in here fails, we will likely die on one of the
325 * first kernel->user transitions and init will die. But, we
326 * will have most of the kernel up by then and should be able to
327 * get a clean warning out of it. If we BUG_ON() here, we run
328 * the risk of being before we have good console output.
329 */
Richard Fellner13be4482017-05-04 14:26:50 +0200330void __init kaiser_init(void)
331{
332 int cpu;
Richard Fellner13be4482017-05-04 14:26:50 +0200333
Borislav Petkov80183072018-01-02 14:19:48 +0100334 if (!kaiser_enabled)
335 return;
Borislav Petkov50624dd2018-01-02 14:19:48 +0100336
Dave Hansen8f0baad2017-08-30 16:23:00 -0700337 kaiser_init_all_pgds();
Richard Fellner13be4482017-05-04 14:26:50 +0200338
Borislav Petkovbeca4e22018-01-04 17:42:45 +0100339 /*
340 * Note that this sets _PAGE_USER and it needs to happen when the
341 * pagetable hierarchy gets created, i.e., early. Otherwise
342 * kaiser_pagetable_walk() will encounter initialized PTEs in the
343 * hierarchy and not set the proper permissions, leading to the
344 * pagefaults with page-protection violations when trying to read the
345 * vsyscall page. For example.
346 */
347 if (vsyscall_enabled())
348 kaiser_add_user_map_early((void *)VSYSCALL_ADDR,
349 PAGE_SIZE,
Ben Hutchings9a0be5a2018-01-26 16:23:02 +0000350 vsyscall_pgprot);
Borislav Petkovbeca4e22018-01-04 17:42:45 +0100351
Richard Fellner13be4482017-05-04 14:26:50 +0200352 for_each_possible_cpu(cpu) {
Dave Hansen8f0baad2017-08-30 16:23:00 -0700353 void *percpu_vaddr = __per_cpu_user_mapped_start +
354 per_cpu_offset(cpu);
355 unsigned long percpu_sz = __per_cpu_user_mapped_end -
356 __per_cpu_user_mapped_start;
357 kaiser_add_user_map_early(percpu_vaddr, percpu_sz,
358 __PAGE_KERNEL);
Richard Fellner13be4482017-05-04 14:26:50 +0200359 }
360
Dave Hansen8f0baad2017-08-30 16:23:00 -0700361 /*
362 * Map the entry/exit text section, which is needed at
363 * switches from user to and from kernel.
364 */
365 kaiser_add_user_map_ptrs_early(__entry_text_start, __entry_text_end,
366 __PAGE_KERNEL_RX);
Richard Fellner13be4482017-05-04 14:26:50 +0200367
Dave Hansen8f0baad2017-08-30 16:23:00 -0700368#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
369 kaiser_add_user_map_ptrs_early(__irqentry_text_start,
370 __irqentry_text_end,
371 __PAGE_KERNEL_RX);
372#endif
373 kaiser_add_user_map_early((void *)idt_descr.address,
374 sizeof(gate_desc) * NR_VECTORS,
375 __PAGE_KERNEL_RO);
376#ifdef CONFIG_TRACING
377 kaiser_add_user_map_early(&trace_idt_descr,
378 sizeof(trace_idt_descr),
379 __PAGE_KERNEL);
380 kaiser_add_user_map_early(&trace_idt_table,
381 sizeof(gate_desc) * NR_VECTORS,
382 __PAGE_KERNEL);
383#endif
384 kaiser_add_user_map_early(&debug_idt_descr, sizeof(debug_idt_descr),
385 __PAGE_KERNEL);
386 kaiser_add_user_map_early(&debug_idt_table,
387 sizeof(gate_desc) * NR_VECTORS,
388 __PAGE_KERNEL);
Kees Cookea6cd392018-01-03 10:18:01 -0800389
390 pr_info("enabled\n");
Richard Fellner13be4482017-05-04 14:26:50 +0200391}
392
Hugh Dickinsbe6bf012017-09-03 19:23:08 -0700393/* Add a mapping to the shadow mapping, and synchronize the mappings */
Dave Hansen8f0baad2017-08-30 16:23:00 -0700394int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags)
Richard Fellner13be4482017-05-04 14:26:50 +0200395{
Hugh Dickins23e09432017-09-24 16:59:49 -0700396 if (!kaiser_enabled)
397 return 0;
Dave Hansen8f0baad2017-08-30 16:23:00 -0700398 return kaiser_add_user_map((const void *)addr, size, flags);
Richard Fellner13be4482017-05-04 14:26:50 +0200399}
400
Richard Fellner13be4482017-05-04 14:26:50 +0200401void kaiser_remove_mapping(unsigned long start, unsigned long size)
402{
Hugh Dickinsbe6bf012017-09-03 19:23:08 -0700403 extern void unmap_pud_range_nofree(pgd_t *pgd,
404 unsigned long start, unsigned long end);
Dave Hansen8f0baad2017-08-30 16:23:00 -0700405 unsigned long end = start + size;
Hugh Dickins3df14612017-10-02 10:57:24 -0700406 unsigned long addr, next;
407 pgd_t *pgd;
Dave Hansen8f0baad2017-08-30 16:23:00 -0700408
Hugh Dickins23e09432017-09-24 16:59:49 -0700409 if (!kaiser_enabled)
410 return;
Hugh Dickins3df14612017-10-02 10:57:24 -0700411 pgd = native_get_shadow_pgd(pgd_offset_k(start));
412 for (addr = start; addr < end; pgd++, addr = next) {
413 next = pgd_addr_end(addr, end);
414 unmap_pud_range_nofree(pgd, addr, next);
Dave Hansen8f0baad2017-08-30 16:23:00 -0700415 }
Richard Fellner13be4482017-05-04 14:26:50 +0200416}
Hugh Dickinsac2f1012017-09-05 12:05:01 -0700417
418/*
419 * Page table pages are page-aligned. The lower half of the top
420 * level is used for userspace and the top half for the kernel.
421 * This returns true for user pages that need to get copied into
422 * both the user and kernel copies of the page tables, and false
423 * for kernel pages that should only be in the kernel copy.
424 */
425static inline bool is_userspace_pgd(pgd_t *pgdp)
426{
427 return ((unsigned long)pgdp % PAGE_SIZE) < (PAGE_SIZE / 2);
428}
429
430pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd)
431{
Hugh Dickins23e09432017-09-24 16:59:49 -0700432 if (!kaiser_enabled)
433 return pgd;
Hugh Dickinsac2f1012017-09-05 12:05:01 -0700434 /*
435 * Do we need to also populate the shadow pgd? Check _PAGE_USER to
436 * skip cases like kexec and EFI which make temporary low mappings.
437 */
438 if (pgd.pgd & _PAGE_USER) {
439 if (is_userspace_pgd(pgdp)) {
440 native_get_shadow_pgd(pgdp)->pgd = pgd.pgd;
441 /*
442 * Even if the entry is *mapping* userspace, ensure
443 * that userspace can not use it. This way, if we
444 * get out to userspace running on the kernel CR3,
445 * userspace will crash instead of running.
446 */
Guenter Roeck92fd81f2018-01-04 13:41:55 -0800447 if (__supported_pte_mask & _PAGE_NX)
448 pgd.pgd |= _PAGE_NX;
Hugh Dickinsac2f1012017-09-05 12:05:01 -0700449 }
450 } else if (!pgd.pgd) {
451 /*
452 * pgd_clear() cannot check _PAGE_USER, and is even used to
453 * clear corrupted pgd entries: so just rely on cases like
454 * kexec and EFI never to be using pgd_clear().
455 */
456 if (!WARN_ON_ONCE((unsigned long)pgdp & PAGE_SIZE) &&
457 is_userspace_pgd(pgdp))
458 native_get_shadow_pgd(pgdp)->pgd = pgd.pgd;
459 }
460 return pgd;
461}
Hugh Dickins0b5ca9d2017-08-17 15:00:37 -0700462
463void kaiser_setup_pcid(void)
464{
Hugh Dickins0b5ca9d2017-08-17 15:00:37 -0700465 unsigned long user_cr3 = KAISER_SHADOW_PGD_OFFSET;
466
Hugh Dickins169b3692017-10-03 20:49:04 -0700467 if (this_cpu_has(X86_FEATURE_PCID))
Hugh Dickins0b5ca9d2017-08-17 15:00:37 -0700468 user_cr3 |= X86_CR3_PCID_USER_NOFLUSH;
Hugh Dickins0b5ca9d2017-08-17 15:00:37 -0700469 /*
470 * These variables are used by the entry/exit
471 * code to change PCID and pgd and TLB flushing.
472 */
Hugh Dickinsd0142ce2017-08-27 16:24:27 -0700473 this_cpu_write(x86_cr3_pcid_user, user_cr3);
Hugh Dickins0b5ca9d2017-08-17 15:00:37 -0700474}
475
476/*
477 * Make a note that this cpu will need to flush USER tlb on return to user.
Hugh Dickinsfe5cb752017-11-04 18:43:06 -0700478 * If cpu does not have PCID, then the NOFLUSH bit will never have been set.
Hugh Dickins0b5ca9d2017-08-17 15:00:37 -0700479 */
480void kaiser_flush_tlb_on_return_to_user(void)
481{
Hugh Dickinsfe5cb752017-11-04 18:43:06 -0700482 if (this_cpu_has(X86_FEATURE_PCID))
483 this_cpu_write(x86_cr3_pcid_user,
Hugh Dickins0b5ca9d2017-08-17 15:00:37 -0700484 X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET);
485}
486EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user);