blob: da7b45b05066569cb440018843e47af0084bd371 [file] [log] [blame]
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -07001/*
2 * Xen mmu operations
3 *
4 * This file contains the various mmu fetch and update operations.
5 * The most important job they must perform is the mapping between the
6 * domain's pfn and the overall machine mfns.
7 *
8 * Xen allows guests to directly update the pagetable, in a controlled
9 * fashion. In other words, the guest modifies the same pagetable
10 * that the CPU actually uses, which eliminates the overhead of having
11 * a separate shadow pagetable.
12 *
13 * In order to allow this, it falls on the guest domain to map its
14 * notion of a "physical" pfn - which is just a domain-local linear
15 * address - into a real "machine address" which the CPU's MMU can
16 * use.
17 *
18 * A pgd_t/pmd_t/pte_t will typically contain an mfn, and so can be
19 * inserted directly into the pagetable. When creating a new
20 * pte/pmd/pgd, it converts the passed pfn into an mfn. Conversely,
21 * when reading the content back with __(pgd|pmd|pte)_val, it converts
22 * the mfn back into a pfn.
23 *
24 * The other constraint is that all pages which make up a pagetable
25 * must be mapped read-only in the guest. This prevents uncontrolled
26 * guest updates to the pagetable. Xen strictly enforces this, and
27 * will disallow any pagetable update which will end up mapping a
28 * pagetable page RW, and will disallow using any writable page as a
29 * pagetable.
30 *
31 * Naively, when loading %cr3 with the base of a new pagetable, Xen
32 * would need to validate the whole pagetable before going on.
33 * Naturally, this is quite slow. The solution is to "pin" a
34 * pagetable, which enforces all the constraints on the pagetable even
35 * when it is not actively in use. This menas that Xen can be assured
36 * that it is still valid when you do load it into %cr3, and doesn't
37 * need to revalidate it.
38 *
39 * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
40 */
Jeremy Fitzhardingef120f132007-07-17 18:37:06 -070041#include <linux/sched.h>
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -070042#include <linux/highmem.h>
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -070043#include <linux/bug.h>
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -070044
45#include <asm/pgtable.h>
46#include <asm/tlbflush.h>
47#include <asm/mmu_context.h>
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -070048#include <asm/paravirt.h>
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -070049
50#include <asm/xen/hypercall.h>
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -070051#include <asm/xen/hypervisor.h>
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -070052
53#include <xen/page.h>
54#include <xen/interface/xen.h>
55
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -070056#include "multicalls.h"
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -070057#include "mmu.h"
58
Jeremy Fitzhardinged451bb72008-05-26 23:31:18 +010059#define P2M_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(unsigned long))
Jeremy Fitzhardingecf0923e2008-05-26 23:31:20 +010060#define TOP_ENTRIES (MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE)
Jeremy Fitzhardinged451bb72008-05-26 23:31:18 +010061
Jeremy Fitzhardingecf0923e2008-05-26 23:31:20 +010062/* Placeholder for holes in the address space */
63static unsigned long p2m_missing[P2M_ENTRIES_PER_PAGE]
64 __attribute__((section(".data.page_aligned"))) =
65 { [ 0 ... P2M_ENTRIES_PER_PAGE-1 ] = ~0UL };
66
67 /* Array of pointers to pages containing p2m entries */
68static unsigned long *p2m_top[TOP_ENTRIES]
69 __attribute__((section(".data.page_aligned"))) =
70 { [ 0 ... TOP_ENTRIES - 1] = &p2m_missing[0] };
Jeremy Fitzhardinged451bb72008-05-26 23:31:18 +010071
72static inline unsigned p2m_top_index(unsigned long pfn)
73{
Jeremy Fitzhardinge8006ec32008-05-26 23:31:19 +010074 BUG_ON(pfn >= MAX_DOMAIN_PAGES);
Jeremy Fitzhardinged451bb72008-05-26 23:31:18 +010075 return pfn / P2M_ENTRIES_PER_PAGE;
76}
77
78static inline unsigned p2m_index(unsigned long pfn)
79{
80 return pfn % P2M_ENTRIES_PER_PAGE;
81}
82
83void __init xen_build_dynamic_phys_to_machine(void)
84{
85 unsigned pfn;
86 unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list;
Jeremy Fitzhardinge8006ec32008-05-26 23:31:19 +010087 unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages);
Jeremy Fitzhardinged451bb72008-05-26 23:31:18 +010088
Jeremy Fitzhardinge8006ec32008-05-26 23:31:19 +010089 for(pfn = 0; pfn < max_pfn; pfn += P2M_ENTRIES_PER_PAGE) {
Jeremy Fitzhardinged451bb72008-05-26 23:31:18 +010090 unsigned topidx = p2m_top_index(pfn);
91
92 p2m_top[topidx] = &mfn_list[pfn];
93 }
94}
95
96unsigned long get_phys_to_machine(unsigned long pfn)
97{
98 unsigned topidx, idx;
99
Jeremy Fitzhardinge8006ec32008-05-26 23:31:19 +0100100 if (unlikely(pfn >= MAX_DOMAIN_PAGES))
101 return INVALID_P2M_ENTRY;
102
Jeremy Fitzhardinged451bb72008-05-26 23:31:18 +0100103 topidx = p2m_top_index(pfn);
Jeremy Fitzhardinged451bb72008-05-26 23:31:18 +0100104 idx = p2m_index(pfn);
105 return p2m_top[topidx][idx];
106}
107
108static void alloc_p2m(unsigned long **pp)
109{
110 unsigned long *p;
111 unsigned i;
112
113 p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL);
114 BUG_ON(p == NULL);
115
116 for(i = 0; i < P2M_ENTRIES_PER_PAGE; i++)
117 p[i] = INVALID_P2M_ENTRY;
118
Jeremy Fitzhardingecf0923e2008-05-26 23:31:20 +0100119 if (cmpxchg(pp, p2m_missing, p) != p2m_missing)
Jeremy Fitzhardinged451bb72008-05-26 23:31:18 +0100120 free_page((unsigned long)p);
121}
122
123void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
124{
125 unsigned topidx, idx;
126
127 if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
128 BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
129 return;
130 }
131
Jeremy Fitzhardinge8006ec32008-05-26 23:31:19 +0100132 if (unlikely(pfn >= MAX_DOMAIN_PAGES)) {
133 BUG_ON(mfn != INVALID_P2M_ENTRY);
134 return;
135 }
136
Jeremy Fitzhardinged451bb72008-05-26 23:31:18 +0100137 topidx = p2m_top_index(pfn);
Jeremy Fitzhardingecf0923e2008-05-26 23:31:20 +0100138 if (p2m_top[topidx] == p2m_missing) {
Jeremy Fitzhardinged451bb72008-05-26 23:31:18 +0100139 /* no need to allocate a page to store an invalid entry */
140 if (mfn == INVALID_P2M_ENTRY)
141 return;
142 alloc_p2m(&p2m_top[topidx]);
143 }
144
145 idx = p2m_index(pfn);
146 p2m_top[topidx][idx] = mfn;
147}
148
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700149xmaddr_t arbitrary_virt_to_machine(unsigned long address)
150{
Harvey Harrisonda7bfc52008-02-09 23:24:08 +0100151 unsigned int level;
Ingo Molnarf0646e42008-01-30 13:33:43 +0100152 pte_t *pte = lookup_address(address, &level);
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700153 unsigned offset = address & PAGE_MASK;
154
155 BUG_ON(pte == NULL);
156
157 return XMADDR((pte_mfn(*pte) << PAGE_SHIFT) + offset);
158}
159
160void make_lowmem_page_readonly(void *vaddr)
161{
162 pte_t *pte, ptev;
163 unsigned long address = (unsigned long)vaddr;
Harvey Harrisonda7bfc52008-02-09 23:24:08 +0100164 unsigned int level;
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700165
Ingo Molnarf0646e42008-01-30 13:33:43 +0100166 pte = lookup_address(address, &level);
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700167 BUG_ON(pte == NULL);
168
169 ptev = pte_wrprotect(*pte);
170
171 if (HYPERVISOR_update_va_mapping(address, ptev, 0))
172 BUG();
173}
174
175void make_lowmem_page_readwrite(void *vaddr)
176{
177 pte_t *pte, ptev;
178 unsigned long address = (unsigned long)vaddr;
Harvey Harrisonda7bfc52008-02-09 23:24:08 +0100179 unsigned int level;
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700180
Ingo Molnarf0646e42008-01-30 13:33:43 +0100181 pte = lookup_address(address, &level);
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700182 BUG_ON(pte == NULL);
183
184 ptev = pte_mkwrite(*pte);
185
186 if (HYPERVISOR_update_va_mapping(address, ptev, 0))
187 BUG();
188}
189
190
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700191void xen_set_pmd(pmd_t *ptr, pmd_t val)
192{
Jeremy Fitzhardinged66bf8f2007-07-17 18:37:06 -0700193 struct multicall_space mcs;
194 struct mmu_update *u;
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700195
Jeremy Fitzhardinged66bf8f2007-07-17 18:37:06 -0700196 preempt_disable();
197
198 mcs = xen_mc_entry(sizeof(*u));
199 u = mcs.args;
200 u->ptr = virt_to_machine(ptr).maddr;
201 u->val = pmd_val_ma(val);
202 MULTI_mmu_update(mcs.mc, u, 1, NULL, DOMID_SELF);
203
204 xen_mc_issue(PARAVIRT_LAZY_MMU);
205
206 preempt_enable();
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700207}
208
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700209/*
210 * Associate a virtual page frame with a given physical page frame
211 * and protection flags for that frame.
212 */
213void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
214{
215 pgd_t *pgd;
216 pud_t *pud;
217 pmd_t *pmd;
218 pte_t *pte;
219
220 pgd = swapper_pg_dir + pgd_index(vaddr);
221 if (pgd_none(*pgd)) {
222 BUG();
223 return;
224 }
225 pud = pud_offset(pgd, vaddr);
226 if (pud_none(*pud)) {
227 BUG();
228 return;
229 }
230 pmd = pmd_offset(pud, vaddr);
231 if (pmd_none(*pmd)) {
232 BUG();
233 return;
234 }
235 pte = pte_offset_kernel(pmd, vaddr);
236 /* <mfn,flags> stored as-is, to permit clearing entries */
237 xen_set_pte(pte, mfn_pte(mfn, flags));
238
239 /*
240 * It's enough to flush this one mapping.
241 * (PGE mappings get flushed as well)
242 */
243 __flush_tlb_one(vaddr);
244}
245
246void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
247 pte_t *ptep, pte_t pteval)
248{
Jeremy Fitzhardinge2bd50032008-04-02 10:54:10 -0700249 /* updates to init_mm may be done without lock */
250 if (mm == &init_mm)
251 preempt_disable();
252
Jeremy Fitzhardinged66bf8f2007-07-17 18:37:06 -0700253 if (mm == current->mm || mm == &init_mm) {
Jeremy Fitzhardinge8965c1c2007-10-16 11:51:29 -0700254 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
Jeremy Fitzhardinged66bf8f2007-07-17 18:37:06 -0700255 struct multicall_space mcs;
256 mcs = xen_mc_entry(0);
257
258 MULTI_update_va_mapping(mcs.mc, addr, pteval, 0);
259 xen_mc_issue(PARAVIRT_LAZY_MMU);
Jeremy Fitzhardinge2bd50032008-04-02 10:54:10 -0700260 goto out;
Jeremy Fitzhardinged66bf8f2007-07-17 18:37:06 -0700261 } else
262 if (HYPERVISOR_update_va_mapping(addr, pteval, 0) == 0)
Jeremy Fitzhardinge2bd50032008-04-02 10:54:10 -0700263 goto out;
Jeremy Fitzhardinged66bf8f2007-07-17 18:37:06 -0700264 }
265 xen_set_pte(ptep, pteval);
Jeremy Fitzhardinge2bd50032008-04-02 10:54:10 -0700266
267out:
268 if (mm == &init_mm)
269 preempt_enable();
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700270}
271
Jeremy Fitzhardinge947a69c2008-03-17 16:37:09 -0700272pteval_t xen_pte_val(pte_t pte)
273{
274 pteval_t ret = pte.pte;
275
276 if (ret & _PAGE_PRESENT)
277 ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
278
279 return ret;
280}
281
282pgdval_t xen_pgd_val(pgd_t pgd)
283{
284 pgdval_t ret = pgd.pgd;
285 if (ret & _PAGE_PRESENT)
286 ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
287 return ret;
288}
289
290pte_t xen_make_pte(pteval_t pte)
291{
292 if (pte & _PAGE_PRESENT) {
293 pte = phys_to_machine(XPADDR(pte)).maddr;
294 pte &= ~(_PAGE_PCD | _PAGE_PWT);
295 }
296
297 return (pte_t){ .pte = pte };
298}
299
300pgd_t xen_make_pgd(pgdval_t pgd)
301{
302 if (pgd & _PAGE_PRESENT)
303 pgd = phys_to_machine(XPADDR(pgd)).maddr;
304
305 return (pgd_t){ pgd };
306}
307
308pmdval_t xen_pmd_val(pmd_t pmd)
309{
310 pmdval_t ret = native_pmd_val(pmd);
311 if (ret & _PAGE_PRESENT)
312 ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
313 return ret;
314}
Jeremy Fitzhardinge3843fc22008-05-09 12:05:57 +0100315
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700316void xen_set_pud(pud_t *ptr, pud_t val)
317{
Jeremy Fitzhardinged66bf8f2007-07-17 18:37:06 -0700318 struct multicall_space mcs;
319 struct mmu_update *u;
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700320
Jeremy Fitzhardinged66bf8f2007-07-17 18:37:06 -0700321 preempt_disable();
322
323 mcs = xen_mc_entry(sizeof(*u));
324 u = mcs.args;
325 u->ptr = virt_to_machine(ptr).maddr;
326 u->val = pud_val_ma(val);
327 MULTI_mmu_update(mcs.mc, u, 1, NULL, DOMID_SELF);
328
329 xen_mc_issue(PARAVIRT_LAZY_MMU);
330
331 preempt_enable();
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700332}
333
334void xen_set_pte(pte_t *ptep, pte_t pte)
335{
336 ptep->pte_high = pte.pte_high;
337 smp_wmb();
338 ptep->pte_low = pte.pte_low;
339}
340
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700341void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
342{
343 set_64bit((u64 *)ptep, pte_val_ma(pte));
344}
345
346void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
347{
348 ptep->pte_low = 0;
349 smp_wmb(); /* make sure low gets written first */
350 ptep->pte_high = 0;
351}
352
353void xen_pmd_clear(pmd_t *pmdp)
354{
355 xen_set_pmd(pmdp, __pmd(0));
356}
357
Jeremy Fitzhardingeabf33032008-03-17 16:37:07 -0700358pmd_t xen_make_pmd(pmdval_t pmd)
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700359{
Jeremy Fitzhardinge430442e2008-03-17 16:37:08 -0700360 if (pmd & _PAGE_PRESENT)
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700361 pmd = phys_to_machine(XPADDR(pmd)).maddr;
362
Jeremy Fitzhardinge947a69c2008-03-17 16:37:09 -0700363 return native_make_pmd(pmd);
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700364}
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700365
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700366/*
367 (Yet another) pagetable walker. This one is intended for pinning a
368 pagetable. This means that it walks a pagetable and calls the
369 callback function on each page it finds making up the page table,
370 at every level. It walks the entire pagetable, but it only bothers
371 pinning pte pages which are below pte_limit. In the normal case
372 this will be TASK_SIZE, but at boot we need to pin up to
373 FIXADDR_TOP. But the important bit is that we don't pin beyond
374 there, because then we start getting into Xen's ptes.
375*/
Jeremy Fitzhardinge74260712007-10-16 11:51:30 -0700376static int pgd_walk(pgd_t *pgd_base, int (*func)(struct page *, enum pt_level),
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700377 unsigned long limit)
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700378{
379 pgd_t *pgd = pgd_base;
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700380 int flush = 0;
381 unsigned long addr = 0;
382 unsigned long pgd_next;
383
384 BUG_ON(limit > FIXADDR_TOP);
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700385
386 if (xen_feature(XENFEAT_auto_translated_physmap))
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700387 return 0;
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700388
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700389 for (; addr != FIXADDR_TOP; pgd++, addr = pgd_next) {
390 pud_t *pud;
391 unsigned long pud_limit, pud_next;
392
393 pgd_next = pud_limit = pgd_addr_end(addr, FIXADDR_TOP);
394
395 if (!pgd_val(*pgd))
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700396 continue;
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700397
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700398 pud = pud_offset(pgd, 0);
399
400 if (PTRS_PER_PUD > 1) /* not folded */
Jeremy Fitzhardinge74260712007-10-16 11:51:30 -0700401 flush |= (*func)(virt_to_page(pud), PT_PUD);
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700402
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700403 for (; addr != pud_limit; pud++, addr = pud_next) {
404 pmd_t *pmd;
405 unsigned long pmd_limit;
406
407 pud_next = pud_addr_end(addr, pud_limit);
408
409 if (pud_next < limit)
410 pmd_limit = pud_next;
411 else
412 pmd_limit = limit;
413
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700414 if (pud_none(*pud))
415 continue;
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700416
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700417 pmd = pmd_offset(pud, 0);
418
419 if (PTRS_PER_PMD > 1) /* not folded */
Jeremy Fitzhardinge74260712007-10-16 11:51:30 -0700420 flush |= (*func)(virt_to_page(pmd), PT_PMD);
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700421
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700422 for (; addr != pmd_limit; pmd++) {
423 addr += (PAGE_SIZE * PTRS_PER_PTE);
424 if ((pmd_limit-1) < (addr-1)) {
425 addr = pmd_limit;
426 break;
427 }
428
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700429 if (pmd_none(*pmd))
430 continue;
431
Jeremy Fitzhardinge74260712007-10-16 11:51:30 -0700432 flush |= (*func)(pmd_page(*pmd), PT_PTE);
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700433 }
434 }
435 }
436
Jeremy Fitzhardinge74260712007-10-16 11:51:30 -0700437 flush |= (*func)(virt_to_page(pgd_base), PT_PGD);
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700438
439 return flush;
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700440}
441
Jeremy Fitzhardinge74260712007-10-16 11:51:30 -0700442static spinlock_t *lock_pte(struct page *page)
443{
444 spinlock_t *ptl = NULL;
445
446#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
447 ptl = __pte_lockptr(page);
448 spin_lock(ptl);
449#endif
450
451 return ptl;
452}
453
454static void do_unlock(void *v)
455{
456 spinlock_t *ptl = v;
457 spin_unlock(ptl);
458}
459
460static void xen_do_pin(unsigned level, unsigned long pfn)
461{
462 struct mmuext_op *op;
463 struct multicall_space mcs;
464
465 mcs = __xen_mc_entry(sizeof(*op));
466 op = mcs.args;
467 op->cmd = level;
468 op->arg1.mfn = pfn_to_mfn(pfn);
469 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
470}
471
472static int pin_page(struct page *page, enum pt_level level)
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700473{
Christoph Lameterd60cd462008-04-28 02:12:51 -0700474 unsigned pgfl = TestSetPagePinned(page);
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700475 int flush;
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700476
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700477 if (pgfl)
478 flush = 0; /* already pinned */
479 else if (PageHighMem(page))
480 /* kmaps need flushing if we found an unpinned
481 highpage */
482 flush = 1;
483 else {
484 void *pt = lowmem_page_address(page);
485 unsigned long pfn = page_to_pfn(page);
486 struct multicall_space mcs = __xen_mc_entry(0);
Jeremy Fitzhardinge74260712007-10-16 11:51:30 -0700487 spinlock_t *ptl;
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700488
489 flush = 0;
490
Jeremy Fitzhardinge74260712007-10-16 11:51:30 -0700491 ptl = NULL;
492 if (level == PT_PTE)
493 ptl = lock_pte(page);
494
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700495 MULTI_update_va_mapping(mcs.mc, (unsigned long)pt,
496 pfn_pte(pfn, PAGE_KERNEL_RO),
Jeremy Fitzhardinge74260712007-10-16 11:51:30 -0700497 level == PT_PGD ? UVMF_TLB_FLUSH : 0);
498
499 if (level == PT_PTE)
500 xen_do_pin(MMUEXT_PIN_L1_TABLE, pfn);
501
502 if (ptl) {
503 /* Queue a deferred unlock for when this batch
504 is completed. */
505 xen_mc_callback(do_unlock, ptl);
506 }
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700507 }
508
509 return flush;
510}
511
512/* This is called just after a mm has been created, but it has not
513 been used yet. We need to make sure that its pagetable is all
514 read-only, and can be pinned. */
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700515void xen_pgd_pin(pgd_t *pgd)
516{
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700517 xen_mc_batch();
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700518
Jeremy Fitzhardingef87e4ca2007-07-17 18:37:06 -0700519 if (pgd_walk(pgd, pin_page, TASK_SIZE)) {
520 /* re-enable interrupts for kmap_flush_unused */
521 xen_mc_issue(0);
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700522 kmap_flush_unused();
Jeremy Fitzhardingef87e4ca2007-07-17 18:37:06 -0700523 xen_mc_batch();
524 }
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700525
Jeremy Fitzhardinge3843fc22008-05-09 12:05:57 +0100526 xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd)));
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700527 xen_mc_issue(0);
528}
529
530/* The init_mm pagetable is really pinned as soon as its created, but
531 that's before we have page structures to store the bits. So do all
532 the book-keeping now. */
Jeremy Fitzhardinge74260712007-10-16 11:51:30 -0700533static __init int mark_pinned(struct page *page, enum pt_level level)
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700534{
535 SetPagePinned(page);
536 return 0;
537}
538
539void __init xen_mark_init_mm_pinned(void)
540{
541 pgd_walk(init_mm.pgd, mark_pinned, FIXADDR_TOP);
542}
543
Jeremy Fitzhardinge74260712007-10-16 11:51:30 -0700544static int unpin_page(struct page *page, enum pt_level level)
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700545{
Christoph Lameterd60cd462008-04-28 02:12:51 -0700546 unsigned pgfl = TestClearPagePinned(page);
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700547
548 if (pgfl && !PageHighMem(page)) {
549 void *pt = lowmem_page_address(page);
550 unsigned long pfn = page_to_pfn(page);
Jeremy Fitzhardinge74260712007-10-16 11:51:30 -0700551 spinlock_t *ptl = NULL;
552 struct multicall_space mcs;
553
554 if (level == PT_PTE) {
555 ptl = lock_pte(page);
556
557 xen_do_pin(MMUEXT_UNPIN_TABLE, pfn);
558 }
559
560 mcs = __xen_mc_entry(0);
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700561
562 MULTI_update_va_mapping(mcs.mc, (unsigned long)pt,
563 pfn_pte(pfn, PAGE_KERNEL),
Jeremy Fitzhardinge74260712007-10-16 11:51:30 -0700564 level == PT_PGD ? UVMF_TLB_FLUSH : 0);
565
566 if (ptl) {
567 /* unlock when batch completed */
568 xen_mc_callback(do_unlock, ptl);
569 }
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700570 }
571
572 return 0; /* never need to flush on unpin */
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700573}
574
575/* Release a pagetables pages back as normal RW */
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700576static void xen_pgd_unpin(pgd_t *pgd)
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700577{
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700578 xen_mc_batch();
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700579
Jeremy Fitzhardinge74260712007-10-16 11:51:30 -0700580 xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700581
582 pgd_walk(pgd, unpin_page, TASK_SIZE);
583
584 xen_mc_issue(0);
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700585}
586
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700587void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next)
588{
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700589 spin_lock(&next->page_table_lock);
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700590 xen_pgd_pin(next->pgd);
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700591 spin_unlock(&next->page_table_lock);
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700592}
593
594void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
595{
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700596 spin_lock(&mm->page_table_lock);
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700597 xen_pgd_pin(mm->pgd);
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700598 spin_unlock(&mm->page_table_lock);
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700599}
600
Jeremy Fitzhardingef87e4ca2007-07-17 18:37:06 -0700601
602#ifdef CONFIG_SMP
603/* Another cpu may still have their %cr3 pointing at the pagetable, so
604 we need to repoint it somewhere else before we can unpin it. */
605static void drop_other_mm_ref(void *info)
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700606{
Jeremy Fitzhardingef87e4ca2007-07-17 18:37:06 -0700607 struct mm_struct *mm = info;
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700608
Jeremy Fitzhardingef87e4ca2007-07-17 18:37:06 -0700609 if (__get_cpu_var(cpu_tlbstate).active_mm == mm)
610 leave_mm(smp_processor_id());
Jeremy Fitzhardinge9f799912007-10-16 11:51:30 -0700611
612 /* If this cpu still has a stale cr3 reference, then make sure
613 it has been flushed. */
614 if (x86_read_percpu(xen_current_cr3) == __pa(mm->pgd)) {
615 load_cr3(swapper_pg_dir);
616 arch_flush_lazy_cpu_mode();
617 }
Jeremy Fitzhardingef87e4ca2007-07-17 18:37:06 -0700618}
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700619
Jeremy Fitzhardingef87e4ca2007-07-17 18:37:06 -0700620static void drop_mm_ref(struct mm_struct *mm)
621{
Jeremy Fitzhardinge9f799912007-10-16 11:51:30 -0700622 cpumask_t mask;
623 unsigned cpu;
624
Jeremy Fitzhardingef87e4ca2007-07-17 18:37:06 -0700625 if (current->active_mm == mm) {
626 if (current->mm == mm)
627 load_cr3(swapper_pg_dir);
628 else
629 leave_mm(smp_processor_id());
Jeremy Fitzhardinge9f799912007-10-16 11:51:30 -0700630 arch_flush_lazy_cpu_mode();
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700631 }
632
Jeremy Fitzhardinge9f799912007-10-16 11:51:30 -0700633 /* Get the "official" set of cpus referring to our pagetable. */
634 mask = mm->cpu_vm_mask;
635
636 /* It's possible that a vcpu may have a stale reference to our
637 cr3, because its in lazy mode, and it hasn't yet flushed
638 its set of pending hypercalls yet. In this case, we can
639 look at its actual current cr3 value, and force it to flush
640 if needed. */
641 for_each_online_cpu(cpu) {
642 if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd))
643 cpu_set(cpu, mask);
644 }
645
646 if (!cpus_empty(mask))
647 xen_smp_call_function_mask(mask, drop_other_mm_ref, mm, 1);
Jeremy Fitzhardingef87e4ca2007-07-17 18:37:06 -0700648}
649#else
650static void drop_mm_ref(struct mm_struct *mm)
651{
652 if (current->active_mm == mm)
653 load_cr3(swapper_pg_dir);
654}
655#endif
656
657/*
658 * While a process runs, Xen pins its pagetables, which means that the
659 * hypervisor forces it to be read-only, and it controls all updates
660 * to it. This means that all pagetable updates have to go via the
661 * hypervisor, which is moderately expensive.
662 *
663 * Since we're pulling the pagetable down, we switch to use init_mm,
664 * unpin old process pagetable and mark it all read-write, which
665 * allows further operations on it to be simple memory accesses.
666 *
667 * The only subtle point is that another CPU may be still using the
668 * pagetable because of lazy tlb flushing. This means we need need to
669 * switch all CPUs off this pagetable before we can unpin it.
670 */
671void xen_exit_mmap(struct mm_struct *mm)
672{
673 get_cpu(); /* make sure we don't move around */
674 drop_mm_ref(mm);
675 put_cpu();
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700676
Jeremy Fitzhardingef120f132007-07-17 18:37:06 -0700677 spin_lock(&mm->page_table_lock);
Jeremy Fitzhardingedf912ea2007-09-25 11:50:00 -0700678
679 /* pgd may not be pinned in the error exit path of execve */
680 if (PagePinned(virt_to_page(mm->pgd)))
681 xen_pgd_unpin(mm->pgd);
Jeremy Fitzhardinge74260712007-10-16 11:51:30 -0700682
Jeremy Fitzhardingef120f132007-07-17 18:37:06 -0700683 spin_unlock(&mm->page_table_lock);
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700684}