blob: ef5728dde8f39cf77ffa96d54cac44bba3130fcc [file] [log] [blame]
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -07001/*
2 * Xen mmu operations
3 *
4 * This file contains the various mmu fetch and update operations.
5 * The most important job they must perform is the mapping between the
6 * domain's pfn and the overall machine mfns.
7 *
8 * Xen allows guests to directly update the pagetable, in a controlled
9 * fashion. In other words, the guest modifies the same pagetable
10 * that the CPU actually uses, which eliminates the overhead of having
11 * a separate shadow pagetable.
12 *
13 * In order to allow this, it falls on the guest domain to map its
14 * notion of a "physical" pfn - which is just a domain-local linear
15 * address - into a real "machine address" which the CPU's MMU can
16 * use.
17 *
18 * A pgd_t/pmd_t/pte_t will typically contain an mfn, and so can be
19 * inserted directly into the pagetable. When creating a new
20 * pte/pmd/pgd, it converts the passed pfn into an mfn. Conversely,
21 * when reading the content back with __(pgd|pmd|pte)_val, it converts
22 * the mfn back into a pfn.
23 *
24 * The other constraint is that all pages which make up a pagetable
25 * must be mapped read-only in the guest. This prevents uncontrolled
26 * guest updates to the pagetable. Xen strictly enforces this, and
27 * will disallow any pagetable update which will end up mapping a
28 * pagetable page RW, and will disallow using any writable page as a
29 * pagetable.
30 *
31 * Naively, when loading %cr3 with the base of a new pagetable, Xen
32 * would need to validate the whole pagetable before going on.
33 * Naturally, this is quite slow. The solution is to "pin" a
34 * pagetable, which enforces all the constraints on the pagetable even
35 * when it is not actively in use. This menas that Xen can be assured
36 * that it is still valid when you do load it into %cr3, and doesn't
37 * need to revalidate it.
38 *
39 * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
40 */
Jeremy Fitzhardingef120f132007-07-17 18:37:06 -070041#include <linux/sched.h>
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -070042#include <linux/highmem.h>
Jeremy Fitzhardinge994025c2008-08-20 17:02:19 -070043#include <linux/debugfs.h>
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -070044#include <linux/bug.h>
Jeremy Fitzhardinged2cb2142010-03-26 15:37:50 -070045#include <linux/vmalloc.h>
Randy Dunlap44408ad2009-05-12 13:31:40 -070046#include <linux/module.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090047#include <linux/gfp.h>
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -070048
49#include <asm/pgtable.h>
50#include <asm/tlbflush.h>
Jeremy Fitzhardinge5deb30d2008-07-08 15:07:06 -070051#include <asm/fixmap.h>
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -070052#include <asm/mmu_context.h>
Jeremy Fitzhardinge319f3ba2009-01-28 14:35:01 -080053#include <asm/setup.h>
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -070054#include <asm/paravirt.h>
Alex Nixon7347b402010-02-19 13:31:06 -050055#include <asm/e820.h>
Jeremy Fitzhardingecbcd79c2008-07-08 15:06:27 -070056#include <asm/linkage.h>
Alex Nixon08bbc9d2009-02-09 12:05:46 -080057#include <asm/page.h>
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -070058
59#include <asm/xen/hypercall.h>
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -070060#include <asm/xen/hypervisor.h>
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -070061
Jeremy Fitzhardingec0011db2010-02-04 14:46:34 -080062#include <xen/xen.h>
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -070063#include <xen/page.h>
64#include <xen/interface/xen.h>
Jeremy Fitzhardinge319f3ba2009-01-28 14:35:01 -080065#include <xen/interface/version.h>
Jeremy Fitzhardingec0011db2010-02-04 14:46:34 -080066#include <xen/interface/memory.h>
Jeremy Fitzhardinge319f3ba2009-01-28 14:35:01 -080067#include <xen/hvc-console.h>
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -070068
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -070069#include "multicalls.h"
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -070070#include "mmu.h"
Jeremy Fitzhardinge994025c2008-08-20 17:02:19 -070071#include "debugfs.h"
72
73#define MMU_UPDATE_HISTO 30
74
Alex Nixon19001c82009-02-09 12:05:46 -080075/*
76 * Protects atomic reservation decrease/increase against concurrent increases.
77 * Also protects non-atomic updates of current_pages and driver_pages, and
78 * balloon lists.
79 */
80DEFINE_SPINLOCK(xen_reservation_lock);
81
Jeremy Fitzhardinge994025c2008-08-20 17:02:19 -070082#ifdef CONFIG_XEN_DEBUG_FS
83
84static struct {
85 u32 pgd_update;
86 u32 pgd_update_pinned;
87 u32 pgd_update_batched;
88
89 u32 pud_update;
90 u32 pud_update_pinned;
91 u32 pud_update_batched;
92
93 u32 pmd_update;
94 u32 pmd_update_pinned;
95 u32 pmd_update_batched;
96
97 u32 pte_update;
98 u32 pte_update_pinned;
99 u32 pte_update_batched;
100
101 u32 mmu_update;
102 u32 mmu_update_extended;
103 u32 mmu_update_histo[MMU_UPDATE_HISTO];
104
105 u32 prot_commit;
106 u32 prot_commit_batched;
107
108 u32 set_pte_at;
109 u32 set_pte_at_batched;
110 u32 set_pte_at_pinned;
111 u32 set_pte_at_current;
112 u32 set_pte_at_kernel;
113} mmu_stats;
114
115static u8 zero_stats;
116
117static inline void check_zero(void)
118{
119 if (unlikely(zero_stats)) {
120 memset(&mmu_stats, 0, sizeof(mmu_stats));
121 zero_stats = 0;
122 }
123}
124
125#define ADD_STATS(elem, val) \
126 do { check_zero(); mmu_stats.elem += (val); } while(0)
127
128#else /* !CONFIG_XEN_DEBUG_FS */
129
130#define ADD_STATS(elem, val) do { (void)(val); } while(0)
131
132#endif /* CONFIG_XEN_DEBUG_FS */
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700133
Jeremy Fitzhardinge319f3ba2009-01-28 14:35:01 -0800134
135/*
136 * Identity map, in addition to plain kernel map. This needs to be
137 * large enough to allocate page table pages to allocate the rest.
138 * Each page can map 2MB.
139 */
140static pte_t level1_ident_pgt[PTRS_PER_PTE * 4] __page_aligned_bss;
141
142#ifdef CONFIG_X86_64
143/* l3 pud for userspace vsyscall mapping */
144static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss;
145#endif /* CONFIG_X86_64 */
146
147/*
148 * Note about cr3 (pagetable base) values:
149 *
150 * xen_cr3 contains the current logical cr3 value; it contains the
151 * last set cr3. This may not be the current effective cr3, because
152 * its update may be being lazily deferred. However, a vcpu looking
153 * at its own cr3 can use this value knowing that it everything will
154 * be self-consistent.
155 *
156 * xen_current_cr3 contains the actual vcpu cr3; it is set once the
157 * hypercall to set the vcpu cr3 is complete (so it may be a little
158 * out of date, but it will never be set early). If one vcpu is
159 * looking at another vcpu's cr3 value, it should use this variable.
160 */
161DEFINE_PER_CPU(unsigned long, xen_cr3); /* cr3 stored as physaddr */
162DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */
163
164
Jeremy Fitzhardinged6182fb2008-07-08 15:07:13 -0700165/*
166 * Just beyond the highest usermode address. STACK_TOP_MAX has a
167 * redzone above it, so round it up to a PGD boundary.
168 */
169#define USER_LIMIT ((STACK_TOP_MAX + PGDIR_SIZE - 1) & PGDIR_MASK)
170
171
Jeremy Fitzhardinged451bb72008-05-26 23:31:18 +0100172#define P2M_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(unsigned long))
Jeremy Fitzhardingecf0923e2008-05-26 23:31:20 +0100173#define TOP_ENTRIES (MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE)
Jeremy Fitzhardinged451bb72008-05-26 23:31:18 +0100174
Jeremy Fitzhardingecf0923e2008-05-26 23:31:20 +0100175/* Placeholder for holes in the address space */
Jeremy Fitzhardingecbcd79c2008-07-08 15:06:27 -0700176static unsigned long p2m_missing[P2M_ENTRIES_PER_PAGE] __page_aligned_data =
Jeremy Fitzhardingecf0923e2008-05-26 23:31:20 +0100177 { [ 0 ... P2M_ENTRIES_PER_PAGE-1 ] = ~0UL };
178
179 /* Array of pointers to pages containing p2m entries */
Jeremy Fitzhardingecbcd79c2008-07-08 15:06:27 -0700180static unsigned long *p2m_top[TOP_ENTRIES] __page_aligned_data =
Jeremy Fitzhardingecf0923e2008-05-26 23:31:20 +0100181 { [ 0 ... TOP_ENTRIES - 1] = &p2m_missing[0] };
Jeremy Fitzhardinged451bb72008-05-26 23:31:18 +0100182
Jeremy Fitzhardinged5edbc12008-05-26 23:31:22 +0100183/* Arrays of p2m arrays expressed in mfns used for save/restore */
Jeremy Fitzhardingecbcd79c2008-07-08 15:06:27 -0700184static unsigned long p2m_top_mfn[TOP_ENTRIES] __page_aligned_bss;
Jeremy Fitzhardinged5edbc12008-05-26 23:31:22 +0100185
Jeremy Fitzhardingecbcd79c2008-07-08 15:06:27 -0700186static unsigned long p2m_top_mfn_list[TOP_ENTRIES / P2M_ENTRIES_PER_PAGE]
187 __page_aligned_bss;
Jeremy Fitzhardinged5edbc12008-05-26 23:31:22 +0100188
Jeremy Fitzhardinged451bb72008-05-26 23:31:18 +0100189static inline unsigned p2m_top_index(unsigned long pfn)
190{
Jeremy Fitzhardinge8006ec32008-05-26 23:31:19 +0100191 BUG_ON(pfn >= MAX_DOMAIN_PAGES);
Jeremy Fitzhardinged451bb72008-05-26 23:31:18 +0100192 return pfn / P2M_ENTRIES_PER_PAGE;
193}
194
195static inline unsigned p2m_index(unsigned long pfn)
196{
197 return pfn % P2M_ENTRIES_PER_PAGE;
198}
199
Jeremy Fitzhardinged5edbc12008-05-26 23:31:22 +0100200/* Build the parallel p2m_top_mfn structures */
Ian Campbellfa24ba62009-11-21 11:32:49 +0000201void xen_build_mfn_list_list(void)
Jeremy Fitzhardinged5edbc12008-05-26 23:31:22 +0100202{
203 unsigned pfn, idx;
204
Tejf63c2f22008-12-16 11:56:06 -0800205 for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_ENTRIES_PER_PAGE) {
Jeremy Fitzhardinged5edbc12008-05-26 23:31:22 +0100206 unsigned topidx = p2m_top_index(pfn);
207
208 p2m_top_mfn[topidx] = virt_to_mfn(p2m_top[topidx]);
209 }
210
Tejf63c2f22008-12-16 11:56:06 -0800211 for (idx = 0; idx < ARRAY_SIZE(p2m_top_mfn_list); idx++) {
Jeremy Fitzhardinged5edbc12008-05-26 23:31:22 +0100212 unsigned topidx = idx * P2M_ENTRIES_PER_PAGE;
213 p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]);
214 }
Jeremy Fitzhardingecdaead62009-02-27 15:34:59 -0800215}
Jeremy Fitzhardinged5edbc12008-05-26 23:31:22 +0100216
Jeremy Fitzhardingecdaead62009-02-27 15:34:59 -0800217void xen_setup_mfn_list_list(void)
218{
Jeremy Fitzhardinged5edbc12008-05-26 23:31:22 +0100219 BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
220
221 HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
222 virt_to_mfn(p2m_top_mfn_list);
223 HYPERVISOR_shared_info->arch.max_pfn = xen_start_info->nr_pages;
224}
225
226/* Set up p2m_top to point to the domain-builder provided p2m pages */
Jeremy Fitzhardinged451bb72008-05-26 23:31:18 +0100227void __init xen_build_dynamic_phys_to_machine(void)
228{
Jeremy Fitzhardinged451bb72008-05-26 23:31:18 +0100229 unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list;
Jeremy Fitzhardinge8006ec32008-05-26 23:31:19 +0100230 unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages);
Jeremy Fitzhardinged5edbc12008-05-26 23:31:22 +0100231 unsigned pfn;
Jeremy Fitzhardinged451bb72008-05-26 23:31:18 +0100232
Tejf63c2f22008-12-16 11:56:06 -0800233 for (pfn = 0; pfn < max_pfn; pfn += P2M_ENTRIES_PER_PAGE) {
Jeremy Fitzhardinged451bb72008-05-26 23:31:18 +0100234 unsigned topidx = p2m_top_index(pfn);
235
236 p2m_top[topidx] = &mfn_list[pfn];
237 }
Jeremy Fitzhardingecdaead62009-02-27 15:34:59 -0800238
239 xen_build_mfn_list_list();
Jeremy Fitzhardinged451bb72008-05-26 23:31:18 +0100240}
241
242unsigned long get_phys_to_machine(unsigned long pfn)
243{
244 unsigned topidx, idx;
245
Jeremy Fitzhardinge8006ec32008-05-26 23:31:19 +0100246 if (unlikely(pfn >= MAX_DOMAIN_PAGES))
247 return INVALID_P2M_ENTRY;
248
Jeremy Fitzhardinged451bb72008-05-26 23:31:18 +0100249 topidx = p2m_top_index(pfn);
Jeremy Fitzhardinged451bb72008-05-26 23:31:18 +0100250 idx = p2m_index(pfn);
251 return p2m_top[topidx][idx];
252}
Ingo Molnar15ce60052008-06-02 13:20:11 +0200253EXPORT_SYMBOL_GPL(get_phys_to_machine);
Jeremy Fitzhardinged451bb72008-05-26 23:31:18 +0100254
Jeremy Fitzhardingee791ca02009-02-26 15:48:33 -0800255/* install a new p2m_top page */
256bool install_p2mtop_page(unsigned long pfn, unsigned long *p)
Jeremy Fitzhardinged451bb72008-05-26 23:31:18 +0100257{
Jeremy Fitzhardingee791ca02009-02-26 15:48:33 -0800258 unsigned topidx = p2m_top_index(pfn);
259 unsigned long **pfnp, *mfnp;
Jeremy Fitzhardinged451bb72008-05-26 23:31:18 +0100260 unsigned i;
261
Jeremy Fitzhardingee791ca02009-02-26 15:48:33 -0800262 pfnp = &p2m_top[topidx];
263 mfnp = &p2m_top_mfn[topidx];
Jeremy Fitzhardinged451bb72008-05-26 23:31:18 +0100264
Tejf63c2f22008-12-16 11:56:06 -0800265 for (i = 0; i < P2M_ENTRIES_PER_PAGE; i++)
Jeremy Fitzhardinged451bb72008-05-26 23:31:18 +0100266 p[i] = INVALID_P2M_ENTRY;
267
Jeremy Fitzhardingee791ca02009-02-26 15:48:33 -0800268 if (cmpxchg(pfnp, p2m_missing, p) == p2m_missing) {
Jeremy Fitzhardinged5edbc12008-05-26 23:31:22 +0100269 *mfnp = virt_to_mfn(p);
Jeremy Fitzhardingee791ca02009-02-26 15:48:33 -0800270 return true;
271 }
272
273 return false;
274}
275
276static void alloc_p2m(unsigned long pfn)
277{
278 unsigned long *p;
279
280 p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL);
281 BUG_ON(p == NULL);
282
283 if (!install_p2mtop_page(pfn, p))
284 free_page((unsigned long)p);
285}
286
287/* Try to install p2m mapping; fail if intermediate bits missing */
288bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
289{
290 unsigned topidx, idx;
291
292 if (unlikely(pfn >= MAX_DOMAIN_PAGES)) {
293 BUG_ON(mfn != INVALID_P2M_ENTRY);
294 return true;
295 }
296
297 topidx = p2m_top_index(pfn);
298 if (p2m_top[topidx] == p2m_missing) {
299 if (mfn == INVALID_P2M_ENTRY)
300 return true;
301 return false;
302 }
303
304 idx = p2m_index(pfn);
305 p2m_top[topidx][idx] = mfn;
306
307 return true;
Jeremy Fitzhardinged451bb72008-05-26 23:31:18 +0100308}
309
310void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
311{
Jeremy Fitzhardinged451bb72008-05-26 23:31:18 +0100312 if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
313 BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
314 return;
315 }
316
Jeremy Fitzhardingee791ca02009-02-26 15:48:33 -0800317 if (unlikely(!__set_phys_to_machine(pfn, mfn))) {
318 alloc_p2m(pfn);
Jeremy Fitzhardinge8006ec32008-05-26 23:31:19 +0100319
Jeremy Fitzhardingee791ca02009-02-26 15:48:33 -0800320 if (!__set_phys_to_machine(pfn, mfn))
321 BUG();
Jeremy Fitzhardinged451bb72008-05-26 23:31:18 +0100322 }
Jeremy Fitzhardinged451bb72008-05-26 23:31:18 +0100323}
324
Jeremy Fitzhardinge9976b392009-02-27 09:19:26 -0800325unsigned long arbitrary_virt_to_mfn(void *vaddr)
326{
327 xmaddr_t maddr = arbitrary_virt_to_machine(vaddr);
328
329 return PFN_DOWN(maddr.maddr);
330}
331
Jeremy Fitzhardingece803e72008-07-08 15:06:55 -0700332xmaddr_t arbitrary_virt_to_machine(void *vaddr)
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700333{
Jeremy Fitzhardingece803e72008-07-08 15:06:55 -0700334 unsigned long address = (unsigned long)vaddr;
Harvey Harrisonda7bfc52008-02-09 23:24:08 +0100335 unsigned int level;
Chris Lalancette9f32d212008-10-23 17:40:25 -0700336 pte_t *pte;
337 unsigned offset;
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700338
Chris Lalancette9f32d212008-10-23 17:40:25 -0700339 /*
340 * if the PFN is in the linear mapped vaddr range, we can just use
341 * the (quick) virt_to_machine() p2m lookup
342 */
343 if (virt_addr_valid(vaddr))
344 return virt_to_machine(vaddr);
345
346 /* otherwise we have to do a (slower) full page-table walk */
347
348 pte = lookup_address(address, &level);
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700349 BUG_ON(pte == NULL);
Chris Lalancette9f32d212008-10-23 17:40:25 -0700350 offset = address & ~PAGE_MASK;
Jeremy Fitzhardingeebd879e2008-07-08 15:06:54 -0700351 return XMADDR(((phys_addr_t)pte_mfn(*pte) << PAGE_SHIFT) + offset);
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700352}
353
354void make_lowmem_page_readonly(void *vaddr)
355{
356 pte_t *pte, ptev;
357 unsigned long address = (unsigned long)vaddr;
Harvey Harrisonda7bfc52008-02-09 23:24:08 +0100358 unsigned int level;
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700359
Ingo Molnarf0646e42008-01-30 13:33:43 +0100360 pte = lookup_address(address, &level);
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700361 BUG_ON(pte == NULL);
362
363 ptev = pte_wrprotect(*pte);
364
365 if (HYPERVISOR_update_va_mapping(address, ptev, 0))
366 BUG();
367}
368
369void make_lowmem_page_readwrite(void *vaddr)
370{
371 pte_t *pte, ptev;
372 unsigned long address = (unsigned long)vaddr;
Harvey Harrisonda7bfc52008-02-09 23:24:08 +0100373 unsigned int level;
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700374
Ingo Molnarf0646e42008-01-30 13:33:43 +0100375 pte = lookup_address(address, &level);
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700376 BUG_ON(pte == NULL);
377
378 ptev = pte_mkwrite(*pte);
379
380 if (HYPERVISOR_update_va_mapping(address, ptev, 0))
381 BUG();
382}
383
384
Jeremy Fitzhardinge7708ad62008-08-19 13:34:22 -0700385static bool xen_page_pinned(void *ptr)
Jeremy Fitzhardingee2426cf2008-05-31 01:24:27 +0100386{
387 struct page *page = virt_to_page(ptr);
388
389 return PagePinned(page);
390}
391
Jeremy Fitzhardingec0011db2010-02-04 14:46:34 -0800392static bool xen_iomap_pte(pte_t pte)
393{
Alex Nixon7347b402010-02-19 13:31:06 -0500394 return pte_flags(pte) & _PAGE_IOMAP;
Jeremy Fitzhardingec0011db2010-02-04 14:46:34 -0800395}
396
397static void xen_set_iomap_pte(pte_t *ptep, pte_t pteval)
398{
399 struct multicall_space mcs;
400 struct mmu_update *u;
401
402 mcs = xen_mc_entry(sizeof(*u));
403 u = mcs.args;
404
405 /* ptep might be kmapped when using 32-bit HIGHPTE */
406 u->ptr = arbitrary_virt_to_machine(ptep).maddr;
407 u->val = pte_val_ma(pteval);
408
409 MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_IO);
410
411 xen_mc_issue(PARAVIRT_LAZY_MMU);
412}
413
Jeremy Fitzhardinge7708ad62008-08-19 13:34:22 -0700414static void xen_extend_mmu_update(const struct mmu_update *update)
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700415{
Jeremy Fitzhardinged66bf8f2007-07-17 18:37:06 -0700416 struct multicall_space mcs;
417 struct mmu_update *u;
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700418
Jeremy Fitzhardinge400d3492008-06-16 04:30:03 -0700419 mcs = xen_mc_extend_args(__HYPERVISOR_mmu_update, sizeof(*u));
420
Jeremy Fitzhardinge994025c2008-08-20 17:02:19 -0700421 if (mcs.mc != NULL) {
422 ADD_STATS(mmu_update_extended, 1);
423 ADD_STATS(mmu_update_histo[mcs.mc->args[1]], -1);
424
Jeremy Fitzhardinge400d3492008-06-16 04:30:03 -0700425 mcs.mc->args[1]++;
Jeremy Fitzhardinge994025c2008-08-20 17:02:19 -0700426
427 if (mcs.mc->args[1] < MMU_UPDATE_HISTO)
428 ADD_STATS(mmu_update_histo[mcs.mc->args[1]], 1);
429 else
430 ADD_STATS(mmu_update_histo[0], 1);
431 } else {
432 ADD_STATS(mmu_update, 1);
Jeremy Fitzhardinge400d3492008-06-16 04:30:03 -0700433 mcs = __xen_mc_entry(sizeof(*u));
434 MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_SELF);
Jeremy Fitzhardinge994025c2008-08-20 17:02:19 -0700435 ADD_STATS(mmu_update_histo[1], 1);
Jeremy Fitzhardinge400d3492008-06-16 04:30:03 -0700436 }
437
438 u = mcs.args;
439 *u = *update;
440}
441
442void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val)
443{
444 struct mmu_update u;
445
Jeremy Fitzhardinged66bf8f2007-07-17 18:37:06 -0700446 preempt_disable();
447
Jeremy Fitzhardinge400d3492008-06-16 04:30:03 -0700448 xen_mc_batch();
449
Jeremy Fitzhardingece803e72008-07-08 15:06:55 -0700450 /* ptr may be ioremapped for 64-bit pagetable setup */
451 u.ptr = arbitrary_virt_to_machine(ptr).maddr;
Jeremy Fitzhardinge400d3492008-06-16 04:30:03 -0700452 u.val = pmd_val_ma(val);
Jeremy Fitzhardinge7708ad62008-08-19 13:34:22 -0700453 xen_extend_mmu_update(&u);
Jeremy Fitzhardinged66bf8f2007-07-17 18:37:06 -0700454
Jeremy Fitzhardinge994025c2008-08-20 17:02:19 -0700455 ADD_STATS(pmd_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
456
Jeremy Fitzhardinged66bf8f2007-07-17 18:37:06 -0700457 xen_mc_issue(PARAVIRT_LAZY_MMU);
458
459 preempt_enable();
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700460}
461
Jeremy Fitzhardingee2426cf2008-05-31 01:24:27 +0100462void xen_set_pmd(pmd_t *ptr, pmd_t val)
463{
Jeremy Fitzhardinge994025c2008-08-20 17:02:19 -0700464 ADD_STATS(pmd_update, 1);
465
Jeremy Fitzhardingee2426cf2008-05-31 01:24:27 +0100466 /* If page is not pinned, we can just update the entry
467 directly */
Jeremy Fitzhardinge7708ad62008-08-19 13:34:22 -0700468 if (!xen_page_pinned(ptr)) {
Jeremy Fitzhardingee2426cf2008-05-31 01:24:27 +0100469 *ptr = val;
470 return;
471 }
472
Jeremy Fitzhardinge994025c2008-08-20 17:02:19 -0700473 ADD_STATS(pmd_update_pinned, 1);
474
Jeremy Fitzhardingee2426cf2008-05-31 01:24:27 +0100475 xen_set_pmd_hyper(ptr, val);
476}
477
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700478/*
479 * Associate a virtual page frame with a given physical page frame
480 * and protection flags for that frame.
481 */
482void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
483{
Jeremy Fitzhardinge836fe2f2008-07-08 15:06:58 -0700484 set_pte_vaddr(vaddr, mfn_pte(mfn, flags));
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700485}
486
487void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
488 pte_t *ptep, pte_t pteval)
489{
Jeremy Fitzhardingec0011db2010-02-04 14:46:34 -0800490 if (xen_iomap_pte(pteval)) {
491 xen_set_iomap_pte(ptep, pteval);
492 goto out;
493 }
494
Jeremy Fitzhardinge994025c2008-08-20 17:02:19 -0700495 ADD_STATS(set_pte_at, 1);
496// ADD_STATS(set_pte_at_pinned, xen_page_pinned(ptep));
497 ADD_STATS(set_pte_at_current, mm == current->mm);
498 ADD_STATS(set_pte_at_kernel, mm == &init_mm);
499
Jeremy Fitzhardinged66bf8f2007-07-17 18:37:06 -0700500 if (mm == current->mm || mm == &init_mm) {
Jeremy Fitzhardinge8965c1c2007-10-16 11:51:29 -0700501 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
Jeremy Fitzhardinged66bf8f2007-07-17 18:37:06 -0700502 struct multicall_space mcs;
503 mcs = xen_mc_entry(0);
504
505 MULTI_update_va_mapping(mcs.mc, addr, pteval, 0);
Jeremy Fitzhardinge994025c2008-08-20 17:02:19 -0700506 ADD_STATS(set_pte_at_batched, 1);
Jeremy Fitzhardinged66bf8f2007-07-17 18:37:06 -0700507 xen_mc_issue(PARAVIRT_LAZY_MMU);
Jeremy Fitzhardinge2bd50032008-04-02 10:54:10 -0700508 goto out;
Jeremy Fitzhardinged66bf8f2007-07-17 18:37:06 -0700509 } else
510 if (HYPERVISOR_update_va_mapping(addr, pteval, 0) == 0)
Jeremy Fitzhardinge2bd50032008-04-02 10:54:10 -0700511 goto out;
Jeremy Fitzhardinged66bf8f2007-07-17 18:37:06 -0700512 }
513 xen_set_pte(ptep, pteval);
Jeremy Fitzhardinge2bd50032008-04-02 10:54:10 -0700514
Jeremy Fitzhardinge2829b442009-02-17 23:53:19 -0800515out: return;
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700516}
517
Tejf63c2f22008-12-16 11:56:06 -0800518pte_t xen_ptep_modify_prot_start(struct mm_struct *mm,
519 unsigned long addr, pte_t *ptep)
Jeremy Fitzhardingee57778a2008-06-16 04:30:02 -0700520{
521 /* Just return the pte as-is. We preserve the bits on commit */
522 return *ptep;
523}
524
525void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
526 pte_t *ptep, pte_t pte)
527{
Jeremy Fitzhardinge400d3492008-06-16 04:30:03 -0700528 struct mmu_update u;
Jeremy Fitzhardingee57778a2008-06-16 04:30:02 -0700529
Jeremy Fitzhardinge400d3492008-06-16 04:30:03 -0700530 xen_mc_batch();
531
Chris Lalancette9f32d212008-10-23 17:40:25 -0700532 u.ptr = arbitrary_virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD;
Jeremy Fitzhardinge400d3492008-06-16 04:30:03 -0700533 u.val = pte_val_ma(pte);
Jeremy Fitzhardinge7708ad62008-08-19 13:34:22 -0700534 xen_extend_mmu_update(&u);
Jeremy Fitzhardingee57778a2008-06-16 04:30:02 -0700535
Jeremy Fitzhardinge994025c2008-08-20 17:02:19 -0700536 ADD_STATS(prot_commit, 1);
537 ADD_STATS(prot_commit_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
538
Jeremy Fitzhardingee57778a2008-06-16 04:30:02 -0700539 xen_mc_issue(PARAVIRT_LAZY_MMU);
540}
541
Jeremy Fitzhardingeebb9cfe2008-06-16 15:01:56 -0700542/* Assume pteval_t is equivalent to all the other *val_t types. */
543static pteval_t pte_mfn_to_pfn(pteval_t val)
544{
545 if (val & _PAGE_PRESENT) {
Jeremy Fitzhardinge59438c92008-07-21 22:59:42 -0700546 unsigned long mfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
Jeremy Fitzhardinge77be1fa2008-07-21 22:59:56 -0700547 pteval_t flags = val & PTE_FLAGS_MASK;
Jeremy Fitzhardinged8355ac2008-07-03 22:10:18 -0700548 val = ((pteval_t)mfn_to_pfn(mfn) << PAGE_SHIFT) | flags;
Jeremy Fitzhardingeebb9cfe2008-06-16 15:01:56 -0700549 }
550
551 return val;
552}
553
554static pteval_t pte_pfn_to_mfn(pteval_t val)
555{
556 if (val & _PAGE_PRESENT) {
Jeremy Fitzhardinge59438c92008-07-21 22:59:42 -0700557 unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
Jeremy Fitzhardinge77be1fa2008-07-21 22:59:56 -0700558 pteval_t flags = val & PTE_FLAGS_MASK;
Jeremy Fitzhardinged8355ac2008-07-03 22:10:18 -0700559 val = ((pteval_t)pfn_to_mfn(pfn) << PAGE_SHIFT) | flags;
Jeremy Fitzhardingeebb9cfe2008-06-16 15:01:56 -0700560 }
561
562 return val;
563}
564
Jeremy Fitzhardingec0011db2010-02-04 14:46:34 -0800565static pteval_t iomap_pte(pteval_t val)
566{
567 if (val & _PAGE_PRESENT) {
568 unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
569 pteval_t flags = val & PTE_FLAGS_MASK;
570
571 /* We assume the pte frame number is a MFN, so
572 just use it as-is. */
573 val = ((pteval_t)pfn << PAGE_SHIFT) | flags;
574 }
575
576 return val;
577}
578
Jeremy Fitzhardinge947a69c2008-03-17 16:37:09 -0700579pteval_t xen_pte_val(pte_t pte)
580{
Jeremy Fitzhardingec0011db2010-02-04 14:46:34 -0800581 if (xen_initial_domain() && (pte.pte & _PAGE_IOMAP))
582 return pte.pte;
583
Jeremy Fitzhardingeebb9cfe2008-06-16 15:01:56 -0700584 return pte_mfn_to_pfn(pte.pte);
Jeremy Fitzhardinge947a69c2008-03-17 16:37:09 -0700585}
Jeremy Fitzhardingeda5de7c2009-01-28 14:35:07 -0800586PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val);
Jeremy Fitzhardinge947a69c2008-03-17 16:37:09 -0700587
588pgdval_t xen_pgd_val(pgd_t pgd)
589{
Jeremy Fitzhardingeebb9cfe2008-06-16 15:01:56 -0700590 return pte_mfn_to_pfn(pgd.pgd);
Jeremy Fitzhardinge947a69c2008-03-17 16:37:09 -0700591}
Jeremy Fitzhardingeda5de7c2009-01-28 14:35:07 -0800592PV_CALLEE_SAVE_REGS_THUNK(xen_pgd_val);
Jeremy Fitzhardinge947a69c2008-03-17 16:37:09 -0700593
594pte_t xen_make_pte(pteval_t pte)
595{
Alex Nixon7347b402010-02-19 13:31:06 -0500596 phys_addr_t addr = (pte & PTE_PFN_MASK);
597
598 /*
599 * Unprivileged domains are allowed to do IOMAPpings for
600 * PCI passthrough, but not map ISA space. The ISA
601 * mappings are just dummy local mappings to keep other
602 * parts of the kernel happy.
603 */
604 if (unlikely(pte & _PAGE_IOMAP) &&
605 (xen_initial_domain() || addr >= ISA_END_ADDRESS)) {
Jeremy Fitzhardingec0011db2010-02-04 14:46:34 -0800606 pte = iomap_pte(pte);
Alex Nixon7347b402010-02-19 13:31:06 -0500607 } else {
608 pte &= ~_PAGE_IOMAP;
Jeremy Fitzhardingec0011db2010-02-04 14:46:34 -0800609 pte = pte_pfn_to_mfn(pte);
Alex Nixon7347b402010-02-19 13:31:06 -0500610 }
Jeremy Fitzhardingec0011db2010-02-04 14:46:34 -0800611
Jeremy Fitzhardingeebb9cfe2008-06-16 15:01:56 -0700612 return native_make_pte(pte);
Jeremy Fitzhardinge947a69c2008-03-17 16:37:09 -0700613}
Jeremy Fitzhardingeda5de7c2009-01-28 14:35:07 -0800614PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte);
Jeremy Fitzhardinge947a69c2008-03-17 16:37:09 -0700615
616pgd_t xen_make_pgd(pgdval_t pgd)
617{
Jeremy Fitzhardingeebb9cfe2008-06-16 15:01:56 -0700618 pgd = pte_pfn_to_mfn(pgd);
619 return native_make_pgd(pgd);
Jeremy Fitzhardinge947a69c2008-03-17 16:37:09 -0700620}
Jeremy Fitzhardingeda5de7c2009-01-28 14:35:07 -0800621PV_CALLEE_SAVE_REGS_THUNK(xen_make_pgd);
Jeremy Fitzhardinge947a69c2008-03-17 16:37:09 -0700622
623pmdval_t xen_pmd_val(pmd_t pmd)
624{
Jeremy Fitzhardingeebb9cfe2008-06-16 15:01:56 -0700625 return pte_mfn_to_pfn(pmd.pmd);
Jeremy Fitzhardinge947a69c2008-03-17 16:37:09 -0700626}
Jeremy Fitzhardingeda5de7c2009-01-28 14:35:07 -0800627PV_CALLEE_SAVE_REGS_THUNK(xen_pmd_val);
Jeremy Fitzhardinge28499142008-05-09 12:05:57 +0100628
Jeremy Fitzhardingee2426cf2008-05-31 01:24:27 +0100629void xen_set_pud_hyper(pud_t *ptr, pud_t val)
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700630{
Jeremy Fitzhardinge400d3492008-06-16 04:30:03 -0700631 struct mmu_update u;
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700632
Jeremy Fitzhardinged66bf8f2007-07-17 18:37:06 -0700633 preempt_disable();
634
Jeremy Fitzhardinge400d3492008-06-16 04:30:03 -0700635 xen_mc_batch();
636
Jeremy Fitzhardingece803e72008-07-08 15:06:55 -0700637 /* ptr may be ioremapped for 64-bit pagetable setup */
638 u.ptr = arbitrary_virt_to_machine(ptr).maddr;
Jeremy Fitzhardinge400d3492008-06-16 04:30:03 -0700639 u.val = pud_val_ma(val);
Jeremy Fitzhardinge7708ad62008-08-19 13:34:22 -0700640 xen_extend_mmu_update(&u);
Jeremy Fitzhardinged66bf8f2007-07-17 18:37:06 -0700641
Jeremy Fitzhardinge994025c2008-08-20 17:02:19 -0700642 ADD_STATS(pud_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
643
Jeremy Fitzhardinged66bf8f2007-07-17 18:37:06 -0700644 xen_mc_issue(PARAVIRT_LAZY_MMU);
645
646 preempt_enable();
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700647}
648
Jeremy Fitzhardingee2426cf2008-05-31 01:24:27 +0100649void xen_set_pud(pud_t *ptr, pud_t val)
650{
Jeremy Fitzhardinge994025c2008-08-20 17:02:19 -0700651 ADD_STATS(pud_update, 1);
652
Jeremy Fitzhardingee2426cf2008-05-31 01:24:27 +0100653 /* If page is not pinned, we can just update the entry
654 directly */
Jeremy Fitzhardinge7708ad62008-08-19 13:34:22 -0700655 if (!xen_page_pinned(ptr)) {
Jeremy Fitzhardingee2426cf2008-05-31 01:24:27 +0100656 *ptr = val;
657 return;
658 }
659
Jeremy Fitzhardinge994025c2008-08-20 17:02:19 -0700660 ADD_STATS(pud_update_pinned, 1);
661
Jeremy Fitzhardingee2426cf2008-05-31 01:24:27 +0100662 xen_set_pud_hyper(ptr, val);
663}
664
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700665void xen_set_pte(pte_t *ptep, pte_t pte)
666{
Jeremy Fitzhardingec0011db2010-02-04 14:46:34 -0800667 if (xen_iomap_pte(pte)) {
668 xen_set_iomap_pte(ptep, pte);
669 return;
670 }
671
Jeremy Fitzhardinge994025c2008-08-20 17:02:19 -0700672 ADD_STATS(pte_update, 1);
673// ADD_STATS(pte_update_pinned, xen_page_pinned(ptep));
674 ADD_STATS(pte_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
675
Jeremy Fitzhardingef6e58732008-07-08 15:06:38 -0700676#ifdef CONFIG_X86_PAE
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700677 ptep->pte_high = pte.pte_high;
678 smp_wmb();
679 ptep->pte_low = pte.pte_low;
Jeremy Fitzhardingef6e58732008-07-08 15:06:38 -0700680#else
681 *ptep = pte;
682#endif
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700683}
684
Jeremy Fitzhardingef6e58732008-07-08 15:06:38 -0700685#ifdef CONFIG_X86_PAE
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700686void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
687{
Jeremy Fitzhardingec0011db2010-02-04 14:46:34 -0800688 if (xen_iomap_pte(pte)) {
689 xen_set_iomap_pte(ptep, pte);
690 return;
691 }
692
Jeremy Fitzhardingef6e58732008-07-08 15:06:38 -0700693 set_64bit((u64 *)ptep, native_pte_val(pte));
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700694}
695
696void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
697{
698 ptep->pte_low = 0;
699 smp_wmb(); /* make sure low gets written first */
700 ptep->pte_high = 0;
701}
702
703void xen_pmd_clear(pmd_t *pmdp)
704{
Jeremy Fitzhardingee2426cf2008-05-31 01:24:27 +0100705 set_pmd(pmdp, __pmd(0));
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700706}
Jeremy Fitzhardingef6e58732008-07-08 15:06:38 -0700707#endif /* CONFIG_X86_PAE */
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700708
Jeremy Fitzhardingeabf33032008-03-17 16:37:07 -0700709pmd_t xen_make_pmd(pmdval_t pmd)
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700710{
Jeremy Fitzhardingeebb9cfe2008-06-16 15:01:56 -0700711 pmd = pte_pfn_to_mfn(pmd);
Jeremy Fitzhardinge947a69c2008-03-17 16:37:09 -0700712 return native_make_pmd(pmd);
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700713}
Jeremy Fitzhardingeda5de7c2009-01-28 14:35:07 -0800714PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd);
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700715
Jeremy Fitzhardingef6e58732008-07-08 15:06:38 -0700716#if PAGETABLE_LEVELS == 4
717pudval_t xen_pud_val(pud_t pud)
718{
719 return pte_mfn_to_pfn(pud.pud);
720}
Jeremy Fitzhardingeda5de7c2009-01-28 14:35:07 -0800721PV_CALLEE_SAVE_REGS_THUNK(xen_pud_val);
Jeremy Fitzhardingef6e58732008-07-08 15:06:38 -0700722
723pud_t xen_make_pud(pudval_t pud)
724{
725 pud = pte_pfn_to_mfn(pud);
726
727 return native_make_pud(pud);
728}
Jeremy Fitzhardingeda5de7c2009-01-28 14:35:07 -0800729PV_CALLEE_SAVE_REGS_THUNK(xen_make_pud);
Jeremy Fitzhardingef6e58732008-07-08 15:06:38 -0700730
Jeremy Fitzhardinged6182fb2008-07-08 15:07:13 -0700731pgd_t *xen_get_user_pgd(pgd_t *pgd)
732{
733 pgd_t *pgd_page = (pgd_t *)(((unsigned long)pgd) & PAGE_MASK);
734 unsigned offset = pgd - pgd_page;
735 pgd_t *user_ptr = NULL;
736
737 if (offset < pgd_index(USER_LIMIT)) {
738 struct page *page = virt_to_page(pgd_page);
739 user_ptr = (pgd_t *)page->private;
740 if (user_ptr)
741 user_ptr += offset;
742 }
743
744 return user_ptr;
745}
746
747static void __xen_set_pgd_hyper(pgd_t *ptr, pgd_t val)
Jeremy Fitzhardingef6e58732008-07-08 15:06:38 -0700748{
749 struct mmu_update u;
750
Jeremy Fitzhardingef6e58732008-07-08 15:06:38 -0700751 u.ptr = virt_to_machine(ptr).maddr;
752 u.val = pgd_val_ma(val);
Jeremy Fitzhardinge7708ad62008-08-19 13:34:22 -0700753 xen_extend_mmu_update(&u);
Jeremy Fitzhardinged6182fb2008-07-08 15:07:13 -0700754}
755
756/*
757 * Raw hypercall-based set_pgd, intended for in early boot before
758 * there's a page structure. This implies:
759 * 1. The only existing pagetable is the kernel's
760 * 2. It is always pinned
761 * 3. It has no user pagetable attached to it
762 */
763void __init xen_set_pgd_hyper(pgd_t *ptr, pgd_t val)
764{
765 preempt_disable();
766
767 xen_mc_batch();
768
769 __xen_set_pgd_hyper(ptr, val);
Jeremy Fitzhardingef6e58732008-07-08 15:06:38 -0700770
771 xen_mc_issue(PARAVIRT_LAZY_MMU);
772
773 preempt_enable();
774}
775
776void xen_set_pgd(pgd_t *ptr, pgd_t val)
777{
Jeremy Fitzhardinged6182fb2008-07-08 15:07:13 -0700778 pgd_t *user_ptr = xen_get_user_pgd(ptr);
779
Jeremy Fitzhardinge994025c2008-08-20 17:02:19 -0700780 ADD_STATS(pgd_update, 1);
781
Jeremy Fitzhardingef6e58732008-07-08 15:06:38 -0700782 /* If page is not pinned, we can just update the entry
783 directly */
Jeremy Fitzhardinge7708ad62008-08-19 13:34:22 -0700784 if (!xen_page_pinned(ptr)) {
Jeremy Fitzhardingef6e58732008-07-08 15:06:38 -0700785 *ptr = val;
Jeremy Fitzhardinged6182fb2008-07-08 15:07:13 -0700786 if (user_ptr) {
Jeremy Fitzhardinge7708ad62008-08-19 13:34:22 -0700787 WARN_ON(xen_page_pinned(user_ptr));
Jeremy Fitzhardinged6182fb2008-07-08 15:07:13 -0700788 *user_ptr = val;
789 }
Jeremy Fitzhardingef6e58732008-07-08 15:06:38 -0700790 return;
791 }
792
Jeremy Fitzhardinge994025c2008-08-20 17:02:19 -0700793 ADD_STATS(pgd_update_pinned, 1);
794 ADD_STATS(pgd_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
795
Jeremy Fitzhardinged6182fb2008-07-08 15:07:13 -0700796 /* If it's pinned, then we can at least batch the kernel and
797 user updates together. */
798 xen_mc_batch();
799
800 __xen_set_pgd_hyper(ptr, val);
801 if (user_ptr)
802 __xen_set_pgd_hyper(user_ptr, val);
803
804 xen_mc_issue(PARAVIRT_LAZY_MMU);
Jeremy Fitzhardingef6e58732008-07-08 15:06:38 -0700805}
806#endif /* PAGETABLE_LEVELS == 4 */
807
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700808/*
Jeremy Fitzhardinge5deb30d2008-07-08 15:07:06 -0700809 * (Yet another) pagetable walker. This one is intended for pinning a
810 * pagetable. This means that it walks a pagetable and calls the
811 * callback function on each page it finds making up the page table,
812 * at every level. It walks the entire pagetable, but it only bothers
813 * pinning pte pages which are below limit. In the normal case this
814 * will be STACK_TOP_MAX, but at boot we need to pin up to
815 * FIXADDR_TOP.
816 *
817 * For 32-bit the important bit is that we don't pin beyond there,
818 * because then we start getting into Xen's ptes.
819 *
820 * For 64-bit, we must skip the Xen hole in the middle of the address
821 * space, just after the big x86-64 virtual hole.
822 */
Ian Campbell86bbc2c2008-11-21 10:21:33 +0000823static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
824 int (*func)(struct mm_struct *mm, struct page *,
825 enum pt_level),
826 unsigned long limit)
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700827{
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700828 int flush = 0;
Jeremy Fitzhardinge5deb30d2008-07-08 15:07:06 -0700829 unsigned hole_low, hole_high;
830 unsigned pgdidx_limit, pudidx_limit, pmdidx_limit;
831 unsigned pgdidx, pudidx, pmdidx;
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700832
Jeremy Fitzhardinge5deb30d2008-07-08 15:07:06 -0700833 /* The limit is the last byte to be touched */
834 limit--;
835 BUG_ON(limit >= FIXADDR_TOP);
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700836
837 if (xen_feature(XENFEAT_auto_translated_physmap))
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700838 return 0;
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700839
Jeremy Fitzhardinge5deb30d2008-07-08 15:07:06 -0700840 /*
841 * 64-bit has a great big hole in the middle of the address
842 * space, which contains the Xen mappings. On 32-bit these
843 * will end up making a zero-sized hole and so is a no-op.
844 */
Jeremy Fitzhardinged6182fb2008-07-08 15:07:13 -0700845 hole_low = pgd_index(USER_LIMIT);
Jeremy Fitzhardinge5deb30d2008-07-08 15:07:06 -0700846 hole_high = pgd_index(PAGE_OFFSET);
847
848 pgdidx_limit = pgd_index(limit);
849#if PTRS_PER_PUD > 1
850 pudidx_limit = pud_index(limit);
851#else
852 pudidx_limit = 0;
853#endif
854#if PTRS_PER_PMD > 1
855 pmdidx_limit = pmd_index(limit);
856#else
857 pmdidx_limit = 0;
858#endif
859
Jeremy Fitzhardinge5deb30d2008-07-08 15:07:06 -0700860 for (pgdidx = 0; pgdidx <= pgdidx_limit; pgdidx++) {
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700861 pud_t *pud;
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700862
Jeremy Fitzhardinge5deb30d2008-07-08 15:07:06 -0700863 if (pgdidx >= hole_low && pgdidx < hole_high)
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700864 continue;
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700865
Jeremy Fitzhardinge5deb30d2008-07-08 15:07:06 -0700866 if (!pgd_val(pgd[pgdidx]))
867 continue;
868
869 pud = pud_offset(&pgd[pgdidx], 0);
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700870
871 if (PTRS_PER_PUD > 1) /* not folded */
Jeremy Fitzhardingeeefb47f2008-10-08 13:01:39 -0700872 flush |= (*func)(mm, virt_to_page(pud), PT_PUD);
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700873
Jeremy Fitzhardinge5deb30d2008-07-08 15:07:06 -0700874 for (pudidx = 0; pudidx < PTRS_PER_PUD; pudidx++) {
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700875 pmd_t *pmd;
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700876
Jeremy Fitzhardinge5deb30d2008-07-08 15:07:06 -0700877 if (pgdidx == pgdidx_limit &&
878 pudidx > pudidx_limit)
879 goto out;
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700880
Jeremy Fitzhardinge5deb30d2008-07-08 15:07:06 -0700881 if (pud_none(pud[pudidx]))
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700882 continue;
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700883
Jeremy Fitzhardinge5deb30d2008-07-08 15:07:06 -0700884 pmd = pmd_offset(&pud[pudidx], 0);
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700885
886 if (PTRS_PER_PMD > 1) /* not folded */
Jeremy Fitzhardingeeefb47f2008-10-08 13:01:39 -0700887 flush |= (*func)(mm, virt_to_page(pmd), PT_PMD);
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700888
Jeremy Fitzhardinge5deb30d2008-07-08 15:07:06 -0700889 for (pmdidx = 0; pmdidx < PTRS_PER_PMD; pmdidx++) {
890 struct page *pte;
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700891
Jeremy Fitzhardinge5deb30d2008-07-08 15:07:06 -0700892 if (pgdidx == pgdidx_limit &&
893 pudidx == pudidx_limit &&
894 pmdidx > pmdidx_limit)
895 goto out;
896
897 if (pmd_none(pmd[pmdidx]))
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700898 continue;
899
Jeremy Fitzhardinge5deb30d2008-07-08 15:07:06 -0700900 pte = pmd_page(pmd[pmdidx]);
Jeremy Fitzhardingeeefb47f2008-10-08 13:01:39 -0700901 flush |= (*func)(mm, pte, PT_PTE);
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700902 }
903 }
904 }
Jeremy Fitzhardinge11ad93e2008-08-19 13:32:51 -0700905
Jeremy Fitzhardinge5deb30d2008-07-08 15:07:06 -0700906out:
Jeremy Fitzhardinge11ad93e2008-08-19 13:32:51 -0700907 /* Do the top level last, so that the callbacks can use it as
908 a cue to do final things like tlb flushes. */
Jeremy Fitzhardingeeefb47f2008-10-08 13:01:39 -0700909 flush |= (*func)(mm, virt_to_page(pgd), PT_PGD);
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700910
911 return flush;
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700912}
913
Ian Campbell86bbc2c2008-11-21 10:21:33 +0000914static int xen_pgd_walk(struct mm_struct *mm,
915 int (*func)(struct mm_struct *mm, struct page *,
916 enum pt_level),
917 unsigned long limit)
918{
919 return __xen_pgd_walk(mm, mm->pgd, func, limit);
920}
921
Jeremy Fitzhardinge7708ad62008-08-19 13:34:22 -0700922/* If we're using split pte locks, then take the page's lock and
923 return a pointer to it. Otherwise return NULL. */
Jeremy Fitzhardingeeefb47f2008-10-08 13:01:39 -0700924static spinlock_t *xen_pte_lock(struct page *page, struct mm_struct *mm)
Jeremy Fitzhardinge74260712007-10-16 11:51:30 -0700925{
926 spinlock_t *ptl = NULL;
927
Jeremy Fitzhardingef7d0b922008-09-09 15:43:22 -0700928#if USE_SPLIT_PTLOCKS
Jeremy Fitzhardinge74260712007-10-16 11:51:30 -0700929 ptl = __pte_lockptr(page);
Jeremy Fitzhardingeeefb47f2008-10-08 13:01:39 -0700930 spin_lock_nest_lock(ptl, &mm->page_table_lock);
Jeremy Fitzhardinge74260712007-10-16 11:51:30 -0700931#endif
932
933 return ptl;
934}
935
Jeremy Fitzhardinge7708ad62008-08-19 13:34:22 -0700936static void xen_pte_unlock(void *v)
Jeremy Fitzhardinge74260712007-10-16 11:51:30 -0700937{
938 spinlock_t *ptl = v;
939 spin_unlock(ptl);
940}
941
942static void xen_do_pin(unsigned level, unsigned long pfn)
943{
944 struct mmuext_op *op;
945 struct multicall_space mcs;
946
947 mcs = __xen_mc_entry(sizeof(*op));
948 op = mcs.args;
949 op->cmd = level;
950 op->arg1.mfn = pfn_to_mfn(pfn);
951 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
952}
953
Jeremy Fitzhardingeeefb47f2008-10-08 13:01:39 -0700954static int xen_pin_page(struct mm_struct *mm, struct page *page,
955 enum pt_level level)
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700956{
Christoph Lameterd60cd462008-04-28 02:12:51 -0700957 unsigned pgfl = TestSetPagePinned(page);
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700958 int flush;
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -0700959
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700960 if (pgfl)
961 flush = 0; /* already pinned */
962 else if (PageHighMem(page))
963 /* kmaps need flushing if we found an unpinned
964 highpage */
965 flush = 1;
966 else {
967 void *pt = lowmem_page_address(page);
968 unsigned long pfn = page_to_pfn(page);
969 struct multicall_space mcs = __xen_mc_entry(0);
Jeremy Fitzhardinge74260712007-10-16 11:51:30 -0700970 spinlock_t *ptl;
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700971
972 flush = 0;
973
Jeremy Fitzhardinge11ad93e2008-08-19 13:32:51 -0700974 /*
975 * We need to hold the pagetable lock between the time
976 * we make the pagetable RO and when we actually pin
977 * it. If we don't, then other users may come in and
978 * attempt to update the pagetable by writing it,
979 * which will fail because the memory is RO but not
980 * pinned, so Xen won't do the trap'n'emulate.
981 *
982 * If we're using split pte locks, we can't hold the
983 * entire pagetable's worth of locks during the
984 * traverse, because we may wrap the preempt count (8
985 * bits). The solution is to mark RO and pin each PTE
986 * page while holding the lock. This means the number
987 * of locks we end up holding is never more than a
988 * batch size (~32 entries, at present).
989 *
990 * If we're not using split pte locks, we needn't pin
991 * the PTE pages independently, because we're
992 * protected by the overall pagetable lock.
993 */
Jeremy Fitzhardinge74260712007-10-16 11:51:30 -0700994 ptl = NULL;
995 if (level == PT_PTE)
Jeremy Fitzhardingeeefb47f2008-10-08 13:01:39 -0700996 ptl = xen_pte_lock(page, mm);
Jeremy Fitzhardinge74260712007-10-16 11:51:30 -0700997
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -0700998 MULTI_update_va_mapping(mcs.mc, (unsigned long)pt,
999 pfn_pte(pfn, PAGE_KERNEL_RO),
Jeremy Fitzhardinge74260712007-10-16 11:51:30 -07001000 level == PT_PGD ? UVMF_TLB_FLUSH : 0);
1001
Jeremy Fitzhardinge11ad93e2008-08-19 13:32:51 -07001002 if (ptl) {
Jeremy Fitzhardinge74260712007-10-16 11:51:30 -07001003 xen_do_pin(MMUEXT_PIN_L1_TABLE, pfn);
1004
Jeremy Fitzhardinge74260712007-10-16 11:51:30 -07001005 /* Queue a deferred unlock for when this batch
1006 is completed. */
Jeremy Fitzhardinge7708ad62008-08-19 13:34:22 -07001007 xen_mc_callback(xen_pte_unlock, ptl);
Jeremy Fitzhardinge74260712007-10-16 11:51:30 -07001008 }
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -07001009 }
1010
1011 return flush;
1012}
1013
1014/* This is called just after a mm has been created, but it has not
1015 been used yet. We need to make sure that its pagetable is all
1016 read-only, and can be pinned. */
Jeremy Fitzhardingeeefb47f2008-10-08 13:01:39 -07001017static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -07001018{
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -07001019 xen_mc_batch();
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -07001020
Ian Campbell86bbc2c2008-11-21 10:21:33 +00001021 if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) {
Jeremy Fitzhardinged05fdf32008-10-28 19:23:06 +11001022 /* re-enable interrupts for flushing */
Jeremy Fitzhardingef87e4ca2007-07-17 18:37:06 -07001023 xen_mc_issue(0);
Jeremy Fitzhardinged05fdf32008-10-28 19:23:06 +11001024
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -07001025 kmap_flush_unused();
Jeremy Fitzhardinged05fdf32008-10-28 19:23:06 +11001026
Jeremy Fitzhardingef87e4ca2007-07-17 18:37:06 -07001027 xen_mc_batch();
1028 }
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -07001029
Jeremy Fitzhardinged6182fb2008-07-08 15:07:13 -07001030#ifdef CONFIG_X86_64
1031 {
1032 pgd_t *user_pgd = xen_get_user_pgd(pgd);
1033
1034 xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd)));
1035
1036 if (user_pgd) {
Jeremy Fitzhardingeeefb47f2008-10-08 13:01:39 -07001037 xen_pin_page(mm, virt_to_page(user_pgd), PT_PGD);
Tejf63c2f22008-12-16 11:56:06 -08001038 xen_do_pin(MMUEXT_PIN_L4_TABLE,
1039 PFN_DOWN(__pa(user_pgd)));
Jeremy Fitzhardinged6182fb2008-07-08 15:07:13 -07001040 }
1041 }
1042#else /* CONFIG_X86_32 */
Jeremy Fitzhardinge5deb30d2008-07-08 15:07:06 -07001043#ifdef CONFIG_X86_PAE
1044 /* Need to make sure unshared kernel PMD is pinnable */
Jeremy Fitzhardinge47cb2ed2008-11-06 13:48:24 -08001045 xen_pin_page(mm, pgd_page(pgd[pgd_index(TASK_SIZE)]),
Jeremy Fitzhardingeeefb47f2008-10-08 13:01:39 -07001046 PT_PMD);
Jeremy Fitzhardinge5deb30d2008-07-08 15:07:06 -07001047#endif
Jeremy Fitzhardinge28499142008-05-09 12:05:57 +01001048 xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd)));
Jeremy Fitzhardinged6182fb2008-07-08 15:07:13 -07001049#endif /* CONFIG_X86_64 */
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -07001050 xen_mc_issue(0);
1051}
1052
Jeremy Fitzhardingeeefb47f2008-10-08 13:01:39 -07001053static void xen_pgd_pin(struct mm_struct *mm)
1054{
1055 __xen_pgd_pin(mm, mm->pgd);
1056}
1057
Jeremy Fitzhardinge0e913982008-05-26 23:31:27 +01001058/*
1059 * On save, we need to pin all pagetables to make sure they get their
1060 * mfns turned into pfns. Search the list for any unpinned pgds and pin
1061 * them (unpinned pgds are not currently in use, probably because the
1062 * process is under construction or destruction).
Jeremy Fitzhardingeeefb47f2008-10-08 13:01:39 -07001063 *
1064 * Expected to be called in stop_machine() ("equivalent to taking
1065 * every spinlock in the system"), so the locking doesn't really
1066 * matter all that much.
Jeremy Fitzhardinge0e913982008-05-26 23:31:27 +01001067 */
1068void xen_mm_pin_all(void)
1069{
1070 unsigned long flags;
1071 struct page *page;
1072
1073 spin_lock_irqsave(&pgd_lock, flags);
1074
1075 list_for_each_entry(page, &pgd_list, lru) {
1076 if (!PagePinned(page)) {
Jeremy Fitzhardingeeefb47f2008-10-08 13:01:39 -07001077 __xen_pgd_pin(&init_mm, (pgd_t *)page_address(page));
Jeremy Fitzhardinge0e913982008-05-26 23:31:27 +01001078 SetPageSavePinned(page);
1079 }
1080 }
1081
1082 spin_unlock_irqrestore(&pgd_lock, flags);
1083}
1084
Eduardo Habkostc1f2f092008-07-08 15:06:24 -07001085/*
1086 * The init_mm pagetable is really pinned as soon as its created, but
1087 * that's before we have page structures to store the bits. So do all
1088 * the book-keeping now.
1089 */
Jeremy Fitzhardingeeefb47f2008-10-08 13:01:39 -07001090static __init int xen_mark_pinned(struct mm_struct *mm, struct page *page,
1091 enum pt_level level)
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -07001092{
1093 SetPagePinned(page);
1094 return 0;
1095}
1096
Jeremy Fitzhardingeb96229b2009-03-17 13:30:55 -07001097static void __init xen_mark_init_mm_pinned(void)
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -07001098{
Jeremy Fitzhardingeeefb47f2008-10-08 13:01:39 -07001099 xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP);
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -07001100}
1101
Jeremy Fitzhardingeeefb47f2008-10-08 13:01:39 -07001102static int xen_unpin_page(struct mm_struct *mm, struct page *page,
1103 enum pt_level level)
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -07001104{
Christoph Lameterd60cd462008-04-28 02:12:51 -07001105 unsigned pgfl = TestClearPagePinned(page);
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -07001106
1107 if (pgfl && !PageHighMem(page)) {
1108 void *pt = lowmem_page_address(page);
1109 unsigned long pfn = page_to_pfn(page);
Jeremy Fitzhardinge74260712007-10-16 11:51:30 -07001110 spinlock_t *ptl = NULL;
1111 struct multicall_space mcs;
1112
Jeremy Fitzhardinge11ad93e2008-08-19 13:32:51 -07001113 /*
1114 * Do the converse to pin_page. If we're using split
1115 * pte locks, we must be holding the lock for while
1116 * the pte page is unpinned but still RO to prevent
1117 * concurrent updates from seeing it in this
1118 * partially-pinned state.
1119 */
Jeremy Fitzhardinge74260712007-10-16 11:51:30 -07001120 if (level == PT_PTE) {
Jeremy Fitzhardingeeefb47f2008-10-08 13:01:39 -07001121 ptl = xen_pte_lock(page, mm);
Jeremy Fitzhardinge74260712007-10-16 11:51:30 -07001122
Jeremy Fitzhardinge11ad93e2008-08-19 13:32:51 -07001123 if (ptl)
1124 xen_do_pin(MMUEXT_UNPIN_TABLE, pfn);
Jeremy Fitzhardinge74260712007-10-16 11:51:30 -07001125 }
1126
1127 mcs = __xen_mc_entry(0);
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -07001128
1129 MULTI_update_va_mapping(mcs.mc, (unsigned long)pt,
1130 pfn_pte(pfn, PAGE_KERNEL),
Jeremy Fitzhardinge74260712007-10-16 11:51:30 -07001131 level == PT_PGD ? UVMF_TLB_FLUSH : 0);
1132
1133 if (ptl) {
1134 /* unlock when batch completed */
Jeremy Fitzhardinge7708ad62008-08-19 13:34:22 -07001135 xen_mc_callback(xen_pte_unlock, ptl);
Jeremy Fitzhardinge74260712007-10-16 11:51:30 -07001136 }
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -07001137 }
1138
1139 return 0; /* never need to flush on unpin */
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -07001140}
1141
1142/* Release a pagetables pages back as normal RW */
Jeremy Fitzhardingeeefb47f2008-10-08 13:01:39 -07001143static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd)
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -07001144{
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -07001145 xen_mc_batch();
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -07001146
Jeremy Fitzhardinge74260712007-10-16 11:51:30 -07001147 xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -07001148
Jeremy Fitzhardinged6182fb2008-07-08 15:07:13 -07001149#ifdef CONFIG_X86_64
1150 {
1151 pgd_t *user_pgd = xen_get_user_pgd(pgd);
1152
1153 if (user_pgd) {
Tejf63c2f22008-12-16 11:56:06 -08001154 xen_do_pin(MMUEXT_UNPIN_TABLE,
1155 PFN_DOWN(__pa(user_pgd)));
Jeremy Fitzhardingeeefb47f2008-10-08 13:01:39 -07001156 xen_unpin_page(mm, virt_to_page(user_pgd), PT_PGD);
Jeremy Fitzhardinged6182fb2008-07-08 15:07:13 -07001157 }
1158 }
1159#endif
1160
Jeremy Fitzhardinge5deb30d2008-07-08 15:07:06 -07001161#ifdef CONFIG_X86_PAE
1162 /* Need to make sure unshared kernel PMD is unpinned */
Jeremy Fitzhardinge47cb2ed2008-11-06 13:48:24 -08001163 xen_unpin_page(mm, pgd_page(pgd[pgd_index(TASK_SIZE)]),
Jeremy Fitzhardingeeefb47f2008-10-08 13:01:39 -07001164 PT_PMD);
Jeremy Fitzhardinge5deb30d2008-07-08 15:07:06 -07001165#endif
Jeremy Fitzhardinged6182fb2008-07-08 15:07:13 -07001166
Ian Campbell86bbc2c2008-11-21 10:21:33 +00001167 __xen_pgd_walk(mm, pgd, xen_unpin_page, USER_LIMIT);
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -07001168
1169 xen_mc_issue(0);
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -07001170}
1171
Jeremy Fitzhardingeeefb47f2008-10-08 13:01:39 -07001172static void xen_pgd_unpin(struct mm_struct *mm)
1173{
1174 __xen_pgd_unpin(mm, mm->pgd);
1175}
1176
Jeremy Fitzhardinge0e913982008-05-26 23:31:27 +01001177/*
1178 * On resume, undo any pinning done at save, so that the rest of the
1179 * kernel doesn't see any unexpected pinned pagetables.
1180 */
1181void xen_mm_unpin_all(void)
1182{
1183 unsigned long flags;
1184 struct page *page;
1185
1186 spin_lock_irqsave(&pgd_lock, flags);
1187
1188 list_for_each_entry(page, &pgd_list, lru) {
1189 if (PageSavePinned(page)) {
1190 BUG_ON(!PagePinned(page));
Jeremy Fitzhardingeeefb47f2008-10-08 13:01:39 -07001191 __xen_pgd_unpin(&init_mm, (pgd_t *)page_address(page));
Jeremy Fitzhardinge0e913982008-05-26 23:31:27 +01001192 ClearPageSavePinned(page);
1193 }
1194 }
1195
1196 spin_unlock_irqrestore(&pgd_lock, flags);
1197}
1198
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -07001199void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next)
1200{
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -07001201 spin_lock(&next->page_table_lock);
Jeremy Fitzhardingeeefb47f2008-10-08 13:01:39 -07001202 xen_pgd_pin(next);
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -07001203 spin_unlock(&next->page_table_lock);
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -07001204}
1205
1206void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
1207{
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -07001208 spin_lock(&mm->page_table_lock);
Jeremy Fitzhardingeeefb47f2008-10-08 13:01:39 -07001209 xen_pgd_pin(mm);
Jeremy Fitzhardingef4f97b32007-07-17 18:37:05 -07001210 spin_unlock(&mm->page_table_lock);
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -07001211}
1212
Jeremy Fitzhardingef87e4ca2007-07-17 18:37:06 -07001213
1214#ifdef CONFIG_SMP
1215/* Another cpu may still have their %cr3 pointing at the pagetable, so
1216 we need to repoint it somewhere else before we can unpin it. */
1217static void drop_other_mm_ref(void *info)
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -07001218{
Jeremy Fitzhardingef87e4ca2007-07-17 18:37:06 -07001219 struct mm_struct *mm = info;
Jeremy Fitzhardingece87b3d2008-07-08 15:06:40 -07001220 struct mm_struct *active_mm;
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -07001221
Brian Gerst9eb912d2009-01-19 00:38:57 +09001222 active_mm = percpu_read(cpu_tlbstate.active_mm);
Jeremy Fitzhardingece87b3d2008-07-08 15:06:40 -07001223
1224 if (active_mm == mm)
Jeremy Fitzhardingef87e4ca2007-07-17 18:37:06 -07001225 leave_mm(smp_processor_id());
Jeremy Fitzhardinge9f799912007-10-16 11:51:30 -07001226
1227 /* If this cpu still has a stale cr3 reference, then make sure
1228 it has been flushed. */
Jeremy Fitzhardinge7fd7d832009-02-17 23:24:03 -08001229 if (percpu_read(xen_current_cr3) == __pa(mm->pgd))
Jeremy Fitzhardinge9f799912007-10-16 11:51:30 -07001230 load_cr3(swapper_pg_dir);
Jeremy Fitzhardingef87e4ca2007-07-17 18:37:06 -07001231}
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -07001232
Jeremy Fitzhardinge7708ad62008-08-19 13:34:22 -07001233static void xen_drop_mm_ref(struct mm_struct *mm)
Jeremy Fitzhardingef87e4ca2007-07-17 18:37:06 -07001234{
Mike Travise4d98202008-12-16 17:34:05 -08001235 cpumask_var_t mask;
Jeremy Fitzhardinge9f799912007-10-16 11:51:30 -07001236 unsigned cpu;
1237
Jeremy Fitzhardingef87e4ca2007-07-17 18:37:06 -07001238 if (current->active_mm == mm) {
1239 if (current->mm == mm)
1240 load_cr3(swapper_pg_dir);
1241 else
1242 leave_mm(smp_processor_id());
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -07001243 }
1244
Jeremy Fitzhardinge9f799912007-10-16 11:51:30 -07001245 /* Get the "official" set of cpus referring to our pagetable. */
Mike Travise4d98202008-12-16 17:34:05 -08001246 if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) {
1247 for_each_online_cpu(cpu) {
Rusty Russell78f1c4d2009-09-24 09:34:51 -06001248 if (!cpumask_test_cpu(cpu, mm_cpumask(mm))
Mike Travise4d98202008-12-16 17:34:05 -08001249 && per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd))
1250 continue;
1251 smp_call_function_single(cpu, drop_other_mm_ref, mm, 1);
1252 }
1253 return;
1254 }
Rusty Russell78f1c4d2009-09-24 09:34:51 -06001255 cpumask_copy(mask, mm_cpumask(mm));
Jeremy Fitzhardinge9f799912007-10-16 11:51:30 -07001256
1257 /* It's possible that a vcpu may have a stale reference to our
1258 cr3, because its in lazy mode, and it hasn't yet flushed
1259 its set of pending hypercalls yet. In this case, we can
1260 look at its actual current cr3 value, and force it to flush
1261 if needed. */
1262 for_each_online_cpu(cpu) {
1263 if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd))
Mike Travise4d98202008-12-16 17:34:05 -08001264 cpumask_set_cpu(cpu, mask);
Jeremy Fitzhardinge9f799912007-10-16 11:51:30 -07001265 }
1266
Mike Travise4d98202008-12-16 17:34:05 -08001267 if (!cpumask_empty(mask))
1268 smp_call_function_many(mask, drop_other_mm_ref, mm, 1);
1269 free_cpumask_var(mask);
Jeremy Fitzhardingef87e4ca2007-07-17 18:37:06 -07001270}
1271#else
Jeremy Fitzhardinge7708ad62008-08-19 13:34:22 -07001272static void xen_drop_mm_ref(struct mm_struct *mm)
Jeremy Fitzhardingef87e4ca2007-07-17 18:37:06 -07001273{
1274 if (current->active_mm == mm)
1275 load_cr3(swapper_pg_dir);
1276}
1277#endif
1278
1279/*
1280 * While a process runs, Xen pins its pagetables, which means that the
1281 * hypervisor forces it to be read-only, and it controls all updates
1282 * to it. This means that all pagetable updates have to go via the
1283 * hypervisor, which is moderately expensive.
1284 *
1285 * Since we're pulling the pagetable down, we switch to use init_mm,
1286 * unpin old process pagetable and mark it all read-write, which
1287 * allows further operations on it to be simple memory accesses.
1288 *
1289 * The only subtle point is that another CPU may be still using the
1290 * pagetable because of lazy tlb flushing. This means we need need to
1291 * switch all CPUs off this pagetable before we can unpin it.
1292 */
1293void xen_exit_mmap(struct mm_struct *mm)
1294{
1295 get_cpu(); /* make sure we don't move around */
Jeremy Fitzhardinge7708ad62008-08-19 13:34:22 -07001296 xen_drop_mm_ref(mm);
Jeremy Fitzhardingef87e4ca2007-07-17 18:37:06 -07001297 put_cpu();
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -07001298
Jeremy Fitzhardingef120f132007-07-17 18:37:06 -07001299 spin_lock(&mm->page_table_lock);
Jeremy Fitzhardingedf912ea2007-09-25 11:50:00 -07001300
1301 /* pgd may not be pinned in the error exit path of execve */
Jeremy Fitzhardinge7708ad62008-08-19 13:34:22 -07001302 if (xen_page_pinned(mm->pgd))
Jeremy Fitzhardingeeefb47f2008-10-08 13:01:39 -07001303 xen_pgd_unpin(mm);
Jeremy Fitzhardinge74260712007-10-16 11:51:30 -07001304
Jeremy Fitzhardingef120f132007-07-17 18:37:06 -07001305 spin_unlock(&mm->page_table_lock);
Jeremy Fitzhardinge3b827c12007-07-17 18:37:04 -07001306}
Jeremy Fitzhardinge994025c2008-08-20 17:02:19 -07001307
Jeremy Fitzhardinge319f3ba2009-01-28 14:35:01 -08001308static __init void xen_pagetable_setup_start(pgd_t *base)
1309{
1310}
1311
Thomas Gleixnerf1d70622009-08-20 13:13:52 +02001312static void xen_post_allocator_init(void);
1313
Jeremy Fitzhardinge319f3ba2009-01-28 14:35:01 -08001314static __init void xen_pagetable_setup_done(pgd_t *base)
1315{
1316 xen_setup_shared_info();
Thomas Gleixnerf1d70622009-08-20 13:13:52 +02001317 xen_post_allocator_init();
Jeremy Fitzhardinge319f3ba2009-01-28 14:35:01 -08001318}
1319
1320static void xen_write_cr2(unsigned long cr2)
1321{
1322 percpu_read(xen_vcpu)->arch.cr2 = cr2;
1323}
1324
1325static unsigned long xen_read_cr2(void)
1326{
1327 return percpu_read(xen_vcpu)->arch.cr2;
1328}
1329
1330unsigned long xen_read_cr2_direct(void)
1331{
1332 return percpu_read(xen_vcpu_info.arch.cr2);
1333}
1334
1335static void xen_flush_tlb(void)
1336{
1337 struct mmuext_op *op;
1338 struct multicall_space mcs;
1339
1340 preempt_disable();
1341
1342 mcs = xen_mc_entry(sizeof(*op));
1343
1344 op = mcs.args;
1345 op->cmd = MMUEXT_TLB_FLUSH_LOCAL;
1346 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
1347
1348 xen_mc_issue(PARAVIRT_LAZY_MMU);
1349
1350 preempt_enable();
1351}
1352
1353static void xen_flush_tlb_single(unsigned long addr)
1354{
1355 struct mmuext_op *op;
1356 struct multicall_space mcs;
1357
1358 preempt_disable();
1359
1360 mcs = xen_mc_entry(sizeof(*op));
1361 op = mcs.args;
1362 op->cmd = MMUEXT_INVLPG_LOCAL;
1363 op->arg1.linear_addr = addr & PAGE_MASK;
1364 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
1365
1366 xen_mc_issue(PARAVIRT_LAZY_MMU);
1367
1368 preempt_enable();
1369}
1370
1371static void xen_flush_tlb_others(const struct cpumask *cpus,
1372 struct mm_struct *mm, unsigned long va)
1373{
1374 struct {
1375 struct mmuext_op op;
1376 DECLARE_BITMAP(mask, NR_CPUS);
1377 } *args;
1378 struct multicall_space mcs;
1379
Jeremy Fitzhardingee3f8a742009-03-04 17:36:57 -08001380 if (cpumask_empty(cpus))
1381 return; /* nothing to do */
Jeremy Fitzhardinge319f3ba2009-01-28 14:35:01 -08001382
1383 mcs = xen_mc_entry(sizeof(*args));
1384 args = mcs.args;
1385 args->op.arg2.vcpumask = to_cpumask(args->mask);
1386
1387 /* Remove us, and any offline CPUS. */
1388 cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask);
1389 cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask));
Jeremy Fitzhardinge319f3ba2009-01-28 14:35:01 -08001390
1391 if (va == TLB_FLUSH_ALL) {
1392 args->op.cmd = MMUEXT_TLB_FLUSH_MULTI;
1393 } else {
1394 args->op.cmd = MMUEXT_INVLPG_MULTI;
1395 args->op.arg1.linear_addr = va;
1396 }
1397
1398 MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF);
1399
Jeremy Fitzhardinge319f3ba2009-01-28 14:35:01 -08001400 xen_mc_issue(PARAVIRT_LAZY_MMU);
1401}
1402
1403static unsigned long xen_read_cr3(void)
1404{
1405 return percpu_read(xen_cr3);
1406}
1407
1408static void set_current_cr3(void *v)
1409{
1410 percpu_write(xen_current_cr3, (unsigned long)v);
1411}
1412
1413static void __xen_write_cr3(bool kernel, unsigned long cr3)
1414{
1415 struct mmuext_op *op;
1416 struct multicall_space mcs;
1417 unsigned long mfn;
1418
1419 if (cr3)
1420 mfn = pfn_to_mfn(PFN_DOWN(cr3));
1421 else
1422 mfn = 0;
1423
1424 WARN_ON(mfn == 0 && kernel);
1425
1426 mcs = __xen_mc_entry(sizeof(*op));
1427
1428 op = mcs.args;
1429 op->cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR;
1430 op->arg1.mfn = mfn;
1431
1432 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
1433
1434 if (kernel) {
1435 percpu_write(xen_cr3, cr3);
1436
1437 /* Update xen_current_cr3 once the batch has actually
1438 been submitted. */
1439 xen_mc_callback(set_current_cr3, (void *)cr3);
1440 }
1441}
1442
1443static void xen_write_cr3(unsigned long cr3)
1444{
1445 BUG_ON(preemptible());
1446
1447 xen_mc_batch(); /* disables interrupts */
1448
1449 /* Update while interrupts are disabled, so its atomic with
1450 respect to ipis */
1451 percpu_write(xen_cr3, cr3);
1452
1453 __xen_write_cr3(true, cr3);
1454
1455#ifdef CONFIG_X86_64
1456 {
1457 pgd_t *user_pgd = xen_get_user_pgd(__va(cr3));
1458 if (user_pgd)
1459 __xen_write_cr3(false, __pa(user_pgd));
1460 else
1461 __xen_write_cr3(false, 0);
1462 }
1463#endif
1464
1465 xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */
1466}
1467
1468static int xen_pgd_alloc(struct mm_struct *mm)
1469{
1470 pgd_t *pgd = mm->pgd;
1471 int ret = 0;
1472
1473 BUG_ON(PagePinned(virt_to_page(pgd)));
1474
1475#ifdef CONFIG_X86_64
1476 {
1477 struct page *page = virt_to_page(pgd);
1478 pgd_t *user_pgd;
1479
1480 BUG_ON(page->private != 0);
1481
1482 ret = -ENOMEM;
1483
1484 user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
1485 page->private = (unsigned long)user_pgd;
1486
1487 if (user_pgd != NULL) {
1488 user_pgd[pgd_index(VSYSCALL_START)] =
1489 __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE);
1490 ret = 0;
1491 }
1492
1493 BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd))));
1494 }
1495#endif
1496
1497 return ret;
1498}
1499
1500static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
1501{
1502#ifdef CONFIG_X86_64
1503 pgd_t *user_pgd = xen_get_user_pgd(pgd);
1504
1505 if (user_pgd)
1506 free_page((unsigned long)user_pgd);
1507#endif
1508}
1509
Jeremy Fitzhardinge1f4f9312009-02-02 13:58:06 -08001510#ifdef CONFIG_X86_32
1511static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
1512{
1513 /* If there's an existing pte, then don't allow _PAGE_RW to be set */
1514 if (pte_val_ma(*ptep) & _PAGE_PRESENT)
1515 pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) &
1516 pte_val_ma(pte));
1517
1518 return pte;
1519}
1520
1521/* Init-time set_pte while constructing initial pagetables, which
1522 doesn't allow RO pagetable pages to be remapped RW */
1523static __init void xen_set_pte_init(pte_t *ptep, pte_t pte)
1524{
1525 pte = mask_rw_pte(ptep, pte);
1526
1527 xen_set_pte(ptep, pte);
1528}
1529#endif
Jeremy Fitzhardinge319f3ba2009-01-28 14:35:01 -08001530
Jeremy Fitzhardingeb96229b2009-03-17 13:30:55 -07001531static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
1532{
1533 struct mmuext_op op;
1534 op.cmd = cmd;
1535 op.arg1.mfn = pfn_to_mfn(pfn);
1536 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
1537 BUG();
1538}
1539
Jeremy Fitzhardinge319f3ba2009-01-28 14:35:01 -08001540/* Early in boot, while setting up the initial pagetable, assume
1541 everything is pinned. */
1542static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
1543{
1544#ifdef CONFIG_FLATMEM
1545 BUG_ON(mem_map); /* should only be used early */
1546#endif
1547 make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
Jeremy Fitzhardingeb96229b2009-03-17 13:30:55 -07001548 pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
1549}
1550
1551/* Used for pmd and pud */
1552static __init void xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn)
1553{
1554#ifdef CONFIG_FLATMEM
1555 BUG_ON(mem_map); /* should only be used early */
1556#endif
1557 make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
Jeremy Fitzhardinge319f3ba2009-01-28 14:35:01 -08001558}
1559
1560/* Early release_pte assumes that all pts are pinned, since there's
1561 only init_mm and anything attached to that is pinned. */
Jeremy Fitzhardingeb96229b2009-03-17 13:30:55 -07001562static __init void xen_release_pte_init(unsigned long pfn)
Jeremy Fitzhardinge319f3ba2009-01-28 14:35:01 -08001563{
Jeremy Fitzhardingeb96229b2009-03-17 13:30:55 -07001564 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
Jeremy Fitzhardinge319f3ba2009-01-28 14:35:01 -08001565 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
1566}
1567
Jeremy Fitzhardingeb96229b2009-03-17 13:30:55 -07001568static __init void xen_release_pmd_init(unsigned long pfn)
Jeremy Fitzhardinge319f3ba2009-01-28 14:35:01 -08001569{
Jeremy Fitzhardingeb96229b2009-03-17 13:30:55 -07001570 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
Jeremy Fitzhardinge319f3ba2009-01-28 14:35:01 -08001571}
1572
1573/* This needs to make sure the new pte page is pinned iff its being
1574 attached to a pinned pagetable. */
1575static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned level)
1576{
1577 struct page *page = pfn_to_page(pfn);
1578
1579 if (PagePinned(virt_to_page(mm->pgd))) {
1580 SetPagePinned(page);
1581
Jeremy Fitzhardinge319f3ba2009-01-28 14:35:01 -08001582 if (!PageHighMem(page)) {
1583 make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn)));
1584 if (level == PT_PTE && USE_SPLIT_PTLOCKS)
1585 pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
1586 } else {
1587 /* make sure there are no stray mappings of
1588 this page */
1589 kmap_flush_unused();
1590 }
1591 }
1592}
1593
1594static void xen_alloc_pte(struct mm_struct *mm, unsigned long pfn)
1595{
1596 xen_alloc_ptpage(mm, pfn, PT_PTE);
1597}
1598
1599static void xen_alloc_pmd(struct mm_struct *mm, unsigned long pfn)
1600{
1601 xen_alloc_ptpage(mm, pfn, PT_PMD);
1602}
1603
1604/* This should never happen until we're OK to use struct page */
1605static void xen_release_ptpage(unsigned long pfn, unsigned level)
1606{
1607 struct page *page = pfn_to_page(pfn);
1608
1609 if (PagePinned(page)) {
1610 if (!PageHighMem(page)) {
1611 if (level == PT_PTE && USE_SPLIT_PTLOCKS)
1612 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
1613 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
1614 }
1615 ClearPagePinned(page);
1616 }
1617}
1618
1619static void xen_release_pte(unsigned long pfn)
1620{
1621 xen_release_ptpage(pfn, PT_PTE);
1622}
1623
1624static void xen_release_pmd(unsigned long pfn)
1625{
1626 xen_release_ptpage(pfn, PT_PMD);
1627}
1628
1629#if PAGETABLE_LEVELS == 4
1630static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn)
1631{
1632 xen_alloc_ptpage(mm, pfn, PT_PUD);
1633}
1634
1635static void xen_release_pud(unsigned long pfn)
1636{
1637 xen_release_ptpage(pfn, PT_PUD);
1638}
1639#endif
1640
1641void __init xen_reserve_top(void)
1642{
1643#ifdef CONFIG_X86_32
1644 unsigned long top = HYPERVISOR_VIRT_START;
1645 struct xen_platform_parameters pp;
1646
1647 if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0)
1648 top = pp.virt_start;
1649
1650 reserve_top_address(-top);
1651#endif /* CONFIG_X86_32 */
1652}
1653
1654/*
1655 * Like __va(), but returns address in the kernel mapping (which is
1656 * all we have until the physical memory mapping has been set up.
1657 */
1658static void *__ka(phys_addr_t paddr)
1659{
1660#ifdef CONFIG_X86_64
1661 return (void *)(paddr + __START_KERNEL_map);
1662#else
1663 return __va(paddr);
1664#endif
1665}
1666
1667/* Convert a machine address to physical address */
1668static unsigned long m2p(phys_addr_t maddr)
1669{
1670 phys_addr_t paddr;
1671
1672 maddr &= PTE_PFN_MASK;
1673 paddr = mfn_to_pfn(maddr >> PAGE_SHIFT) << PAGE_SHIFT;
1674
1675 return paddr;
1676}
1677
1678/* Convert a machine address to kernel virtual */
1679static void *m2v(phys_addr_t maddr)
1680{
1681 return __ka(m2p(maddr));
1682}
1683
1684static void set_page_prot(void *addr, pgprot_t prot)
1685{
1686 unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
1687 pte_t pte = pfn_pte(pfn, prot);
1688
1689 if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0))
1690 BUG();
1691}
1692
1693static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
1694{
1695 unsigned pmdidx, pteidx;
1696 unsigned ident_pte;
1697 unsigned long pfn;
1698
1699 ident_pte = 0;
1700 pfn = 0;
1701 for (pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) {
1702 pte_t *pte_page;
1703
1704 /* Reuse or allocate a page of ptes */
1705 if (pmd_present(pmd[pmdidx]))
1706 pte_page = m2v(pmd[pmdidx].pmd);
1707 else {
1708 /* Check for free pte pages */
1709 if (ident_pte == ARRAY_SIZE(level1_ident_pgt))
1710 break;
1711
1712 pte_page = &level1_ident_pgt[ident_pte];
1713 ident_pte += PTRS_PER_PTE;
1714
1715 pmd[pmdidx] = __pmd(__pa(pte_page) | _PAGE_TABLE);
1716 }
1717
1718 /* Install mappings */
1719 for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) {
1720 pte_t pte;
1721
1722 if (pfn > max_pfn_mapped)
1723 max_pfn_mapped = pfn;
1724
1725 if (!pte_none(pte_page[pteidx]))
1726 continue;
1727
1728 pte = pfn_pte(pfn, PAGE_KERNEL_EXEC);
1729 pte_page[pteidx] = pte;
1730 }
1731 }
1732
1733 for (pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE)
1734 set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO);
1735
1736 set_page_prot(pmd, PAGE_KERNEL_RO);
1737}
1738
1739#ifdef CONFIG_X86_64
1740static void convert_pfn_mfn(void *v)
1741{
1742 pte_t *pte = v;
1743 int i;
1744
1745 /* All levels are converted the same way, so just treat them
1746 as ptes. */
1747 for (i = 0; i < PTRS_PER_PTE; i++)
1748 pte[i] = xen_make_pte(pte[i].pte);
1749}
1750
1751/*
1752 * Set up the inital kernel pagetable.
1753 *
1754 * We can construct this by grafting the Xen provided pagetable into
1755 * head_64.S's preconstructed pagetables. We copy the Xen L2's into
1756 * level2_ident_pgt, level2_kernel_pgt and level2_fixmap_pgt. This
1757 * means that only the kernel has a physical mapping to start with -
1758 * but that's enough to get __va working. We need to fill in the rest
1759 * of the physical mapping once some sort of allocator has been set
1760 * up.
1761 */
1762__init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
1763 unsigned long max_pfn)
1764{
1765 pud_t *l3;
1766 pmd_t *l2;
1767
1768 /* Zap identity mapping */
1769 init_level4_pgt[0] = __pgd(0);
1770
1771 /* Pre-constructed entries are in pfn, so convert to mfn */
1772 convert_pfn_mfn(init_level4_pgt);
1773 convert_pfn_mfn(level3_ident_pgt);
1774 convert_pfn_mfn(level3_kernel_pgt);
1775
1776 l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd);
1777 l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud);
1778
1779 memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
1780 memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
1781
1782 l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd);
1783 l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud);
1784 memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
1785
1786 /* Set up identity map */
1787 xen_map_identity_early(level2_ident_pgt, max_pfn);
1788
1789 /* Make pagetable pieces RO */
1790 set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
1791 set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
1792 set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO);
1793 set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO);
1794 set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
1795 set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
1796
1797 /* Pin down new L4 */
1798 pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
1799 PFN_DOWN(__pa_symbol(init_level4_pgt)));
1800
1801 /* Unpin Xen-provided one */
1802 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
1803
1804 /* Switch over */
1805 pgd = init_level4_pgt;
1806
1807 /*
1808 * At this stage there can be no user pgd, and no page
1809 * structure to attach it to, so make sure we just set kernel
1810 * pgd.
1811 */
1812 xen_mc_batch();
1813 __xen_write_cr3(true, __pa(pgd));
1814 xen_mc_issue(PARAVIRT_LAZY_CPU);
1815
1816 reserve_early(__pa(xen_start_info->pt_base),
1817 __pa(xen_start_info->pt_base +
1818 xen_start_info->nr_pt_frames * PAGE_SIZE),
1819 "XEN PAGETABLES");
1820
1821 return pgd;
1822}
1823#else /* !CONFIG_X86_64 */
1824static pmd_t level2_kernel_pgt[PTRS_PER_PMD] __page_aligned_bss;
1825
1826__init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
1827 unsigned long max_pfn)
1828{
1829 pmd_t *kernel_pmd;
1830
Jeremy Fitzhardinge93dbda72009-02-26 17:35:44 -08001831 max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) +
1832 xen_start_info->nr_pt_frames * PAGE_SIZE +
1833 512*1024);
Jeremy Fitzhardinge319f3ba2009-01-28 14:35:01 -08001834
1835 kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
1836 memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD);
1837
1838 xen_map_identity_early(level2_kernel_pgt, max_pfn);
1839
1840 memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD);
1841 set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY],
1842 __pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT));
1843
1844 set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
1845 set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO);
1846 set_page_prot(empty_zero_page, PAGE_KERNEL_RO);
1847
1848 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
1849
1850 xen_write_cr3(__pa(swapper_pg_dir));
1851
1852 pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir)));
1853
Jeremy Fitzhardinge33df4db2009-05-07 11:56:44 -07001854 reserve_early(__pa(xen_start_info->pt_base),
1855 __pa(xen_start_info->pt_base +
1856 xen_start_info->nr_pt_frames * PAGE_SIZE),
1857 "XEN PAGETABLES");
1858
Jeremy Fitzhardinge319f3ba2009-01-28 14:35:01 -08001859 return swapper_pg_dir;
1860}
1861#endif /* CONFIG_X86_64 */
1862
Masami Hiramatsu3b3809a2009-04-09 10:55:33 -07001863static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
Jeremy Fitzhardinge319f3ba2009-01-28 14:35:01 -08001864{
1865 pte_t pte;
1866
1867 phys >>= PAGE_SHIFT;
1868
1869 switch (idx) {
1870 case FIX_BTMAP_END ... FIX_BTMAP_BEGIN:
1871#ifdef CONFIG_X86_F00F_BUG
1872 case FIX_F00F_IDT:
1873#endif
1874#ifdef CONFIG_X86_32
1875 case FIX_WP_TEST:
1876 case FIX_VDSO:
1877# ifdef CONFIG_HIGHMEM
1878 case FIX_KMAP_BEGIN ... FIX_KMAP_END:
1879# endif
1880#else
1881 case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE:
1882#endif
1883#ifdef CONFIG_X86_LOCAL_APIC
1884 case FIX_APIC_BASE: /* maps dummy local APIC */
1885#endif
Jeremy Fitzhardinge3ecb1b72009-03-07 23:48:41 -08001886 case FIX_TEXT_POKE0:
1887 case FIX_TEXT_POKE1:
1888 /* All local page mappings */
Jeremy Fitzhardinge319f3ba2009-01-28 14:35:01 -08001889 pte = pfn_pte(phys, prot);
1890 break;
1891
Jeremy Fitzhardingec0011db2010-02-04 14:46:34 -08001892 case FIX_PARAVIRT_BOOTMAP:
1893 /* This is an MFN, but it isn't an IO mapping from the
1894 IO domain */
Jeremy Fitzhardinge319f3ba2009-01-28 14:35:01 -08001895 pte = mfn_pte(phys, prot);
1896 break;
Jeremy Fitzhardingec0011db2010-02-04 14:46:34 -08001897
1898 default:
1899 /* By default, set_fixmap is used for hardware mappings */
1900 pte = mfn_pte(phys, __pgprot(pgprot_val(prot) | _PAGE_IOMAP));
1901 break;
Jeremy Fitzhardinge319f3ba2009-01-28 14:35:01 -08001902 }
1903
1904 __native_set_fixmap(idx, pte);
1905
1906#ifdef CONFIG_X86_64
1907 /* Replicate changes to map the vsyscall page into the user
1908 pagetable vsyscall mapping. */
1909 if (idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) {
1910 unsigned long vaddr = __fix_to_virt(idx);
1911 set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte);
1912 }
1913#endif
1914}
1915
Thomas Gleixnerf1d70622009-08-20 13:13:52 +02001916static __init void xen_post_allocator_init(void)
Jeremy Fitzhardinge319f3ba2009-01-28 14:35:01 -08001917{
1918 pv_mmu_ops.set_pte = xen_set_pte;
1919 pv_mmu_ops.set_pmd = xen_set_pmd;
1920 pv_mmu_ops.set_pud = xen_set_pud;
1921#if PAGETABLE_LEVELS == 4
1922 pv_mmu_ops.set_pgd = xen_set_pgd;
1923#endif
1924
1925 /* This will work as long as patching hasn't happened yet
1926 (which it hasn't) */
1927 pv_mmu_ops.alloc_pte = xen_alloc_pte;
1928 pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
1929 pv_mmu_ops.release_pte = xen_release_pte;
1930 pv_mmu_ops.release_pmd = xen_release_pmd;
1931#if PAGETABLE_LEVELS == 4
1932 pv_mmu_ops.alloc_pud = xen_alloc_pud;
1933 pv_mmu_ops.release_pud = xen_release_pud;
1934#endif
1935
1936#ifdef CONFIG_X86_64
1937 SetPagePinned(virt_to_page(level3_user_vsyscall));
1938#endif
1939 xen_mark_init_mm_pinned();
1940}
1941
Jeremy Fitzhardingeb407fc52009-02-17 23:46:21 -08001942static void xen_leave_lazy_mmu(void)
1943{
Jeremy Fitzhardinge5caecb92009-02-20 23:01:26 -08001944 preempt_disable();
Jeremy Fitzhardingeb407fc52009-02-17 23:46:21 -08001945 xen_mc_flush();
1946 paravirt_leave_lazy_mmu();
Jeremy Fitzhardinge5caecb92009-02-20 23:01:26 -08001947 preempt_enable();
Jeremy Fitzhardingeb407fc52009-02-17 23:46:21 -08001948}
Jeremy Fitzhardinge319f3ba2009-01-28 14:35:01 -08001949
Thomas Gleixner030cb6c2009-08-20 14:30:02 +02001950static const struct pv_mmu_ops xen_mmu_ops __initdata = {
Jeremy Fitzhardinge319f3ba2009-01-28 14:35:01 -08001951 .read_cr2 = xen_read_cr2,
1952 .write_cr2 = xen_write_cr2,
1953
1954 .read_cr3 = xen_read_cr3,
1955 .write_cr3 = xen_write_cr3,
1956
1957 .flush_tlb_user = xen_flush_tlb,
1958 .flush_tlb_kernel = xen_flush_tlb,
1959 .flush_tlb_single = xen_flush_tlb_single,
1960 .flush_tlb_others = xen_flush_tlb_others,
1961
1962 .pte_update = paravirt_nop,
1963 .pte_update_defer = paravirt_nop,
1964
1965 .pgd_alloc = xen_pgd_alloc,
1966 .pgd_free = xen_pgd_free,
1967
1968 .alloc_pte = xen_alloc_pte_init,
1969 .release_pte = xen_release_pte_init,
Jeremy Fitzhardingeb96229b2009-03-17 13:30:55 -07001970 .alloc_pmd = xen_alloc_pmd_init,
Jeremy Fitzhardinge319f3ba2009-01-28 14:35:01 -08001971 .alloc_pmd_clone = paravirt_nop,
Jeremy Fitzhardingeb96229b2009-03-17 13:30:55 -07001972 .release_pmd = xen_release_pmd_init,
Jeremy Fitzhardinge319f3ba2009-01-28 14:35:01 -08001973
Jeremy Fitzhardinge319f3ba2009-01-28 14:35:01 -08001974#ifdef CONFIG_X86_64
1975 .set_pte = xen_set_pte,
1976#else
1977 .set_pte = xen_set_pte_init,
1978#endif
1979 .set_pte_at = xen_set_pte_at,
1980 .set_pmd = xen_set_pmd_hyper,
1981
1982 .ptep_modify_prot_start = __ptep_modify_prot_start,
1983 .ptep_modify_prot_commit = __ptep_modify_prot_commit,
1984
Jeremy Fitzhardingeda5de7c2009-01-28 14:35:07 -08001985 .pte_val = PV_CALLEE_SAVE(xen_pte_val),
1986 .pgd_val = PV_CALLEE_SAVE(xen_pgd_val),
Jeremy Fitzhardinge319f3ba2009-01-28 14:35:01 -08001987
Jeremy Fitzhardingeda5de7c2009-01-28 14:35:07 -08001988 .make_pte = PV_CALLEE_SAVE(xen_make_pte),
1989 .make_pgd = PV_CALLEE_SAVE(xen_make_pgd),
Jeremy Fitzhardinge319f3ba2009-01-28 14:35:01 -08001990
1991#ifdef CONFIG_X86_PAE
1992 .set_pte_atomic = xen_set_pte_atomic,
Jeremy Fitzhardinge319f3ba2009-01-28 14:35:01 -08001993 .pte_clear = xen_pte_clear,
1994 .pmd_clear = xen_pmd_clear,
1995#endif /* CONFIG_X86_PAE */
1996 .set_pud = xen_set_pud_hyper,
1997
Jeremy Fitzhardingeda5de7c2009-01-28 14:35:07 -08001998 .make_pmd = PV_CALLEE_SAVE(xen_make_pmd),
1999 .pmd_val = PV_CALLEE_SAVE(xen_pmd_val),
Jeremy Fitzhardinge319f3ba2009-01-28 14:35:01 -08002000
2001#if PAGETABLE_LEVELS == 4
Jeremy Fitzhardingeda5de7c2009-01-28 14:35:07 -08002002 .pud_val = PV_CALLEE_SAVE(xen_pud_val),
2003 .make_pud = PV_CALLEE_SAVE(xen_make_pud),
Jeremy Fitzhardinge319f3ba2009-01-28 14:35:01 -08002004 .set_pgd = xen_set_pgd_hyper,
2005
Jeremy Fitzhardingeb96229b2009-03-17 13:30:55 -07002006 .alloc_pud = xen_alloc_pmd_init,
2007 .release_pud = xen_release_pmd_init,
Jeremy Fitzhardinge319f3ba2009-01-28 14:35:01 -08002008#endif /* PAGETABLE_LEVELS == 4 */
2009
2010 .activate_mm = xen_activate_mm,
2011 .dup_mmap = xen_dup_mmap,
2012 .exit_mmap = xen_exit_mmap,
2013
2014 .lazy_mode = {
2015 .enter = paravirt_enter_lazy_mmu,
Jeremy Fitzhardingeb407fc52009-02-17 23:46:21 -08002016 .leave = xen_leave_lazy_mmu,
Jeremy Fitzhardinge319f3ba2009-01-28 14:35:01 -08002017 },
2018
2019 .set_fixmap = xen_set_fixmap,
2020};
2021
Thomas Gleixner030cb6c2009-08-20 14:30:02 +02002022void __init xen_init_mmu_ops(void)
2023{
2024 x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start;
2025 x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
2026 pv_mmu_ops = xen_mmu_ops;
Jeremy Fitzhardinged2cb2142010-03-26 15:37:50 -07002027
2028 vmap_lazy_unmap = false;
Thomas Gleixner030cb6c2009-08-20 14:30:02 +02002029}
Jeremy Fitzhardinge319f3ba2009-01-28 14:35:01 -08002030
Alex Nixon08bbc9d2009-02-09 12:05:46 -08002031/* Protected by xen_reservation_lock. */
2032#define MAX_CONTIG_ORDER 9 /* 2MB */
2033static unsigned long discontig_frames[1<<MAX_CONTIG_ORDER];
2034
2035#define VOID_PTE (mfn_pte(0, __pgprot(0)))
2036static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order,
2037 unsigned long *in_frames,
2038 unsigned long *out_frames)
2039{
2040 int i;
2041 struct multicall_space mcs;
2042
2043 xen_mc_batch();
2044 for (i = 0; i < (1UL<<order); i++, vaddr += PAGE_SIZE) {
2045 mcs = __xen_mc_entry(0);
2046
2047 if (in_frames)
2048 in_frames[i] = virt_to_mfn(vaddr);
2049
2050 MULTI_update_va_mapping(mcs.mc, vaddr, VOID_PTE, 0);
2051 set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY);
2052
2053 if (out_frames)
2054 out_frames[i] = virt_to_pfn(vaddr);
2055 }
2056 xen_mc_issue(0);
2057}
2058
2059/*
2060 * Update the pfn-to-mfn mappings for a virtual address range, either to
2061 * point to an array of mfns, or contiguously from a single starting
2062 * mfn.
2063 */
2064static void xen_remap_exchanged_ptes(unsigned long vaddr, int order,
2065 unsigned long *mfns,
2066 unsigned long first_mfn)
2067{
2068 unsigned i, limit;
2069 unsigned long mfn;
2070
2071 xen_mc_batch();
2072
2073 limit = 1u << order;
2074 for (i = 0; i < limit; i++, vaddr += PAGE_SIZE) {
2075 struct multicall_space mcs;
2076 unsigned flags;
2077
2078 mcs = __xen_mc_entry(0);
2079 if (mfns)
2080 mfn = mfns[i];
2081 else
2082 mfn = first_mfn + i;
2083
2084 if (i < (limit - 1))
2085 flags = 0;
2086 else {
2087 if (order == 0)
2088 flags = UVMF_INVLPG | UVMF_ALL;
2089 else
2090 flags = UVMF_TLB_FLUSH | UVMF_ALL;
2091 }
2092
2093 MULTI_update_va_mapping(mcs.mc, vaddr,
2094 mfn_pte(mfn, PAGE_KERNEL), flags);
2095
2096 set_phys_to_machine(virt_to_pfn(vaddr), mfn);
2097 }
2098
2099 xen_mc_issue(0);
2100}
2101
2102/*
2103 * Perform the hypercall to exchange a region of our pfns to point to
2104 * memory with the required contiguous alignment. Takes the pfns as
2105 * input, and populates mfns as output.
2106 *
2107 * Returns a success code indicating whether the hypervisor was able to
2108 * satisfy the request or not.
2109 */
2110static int xen_exchange_memory(unsigned long extents_in, unsigned int order_in,
2111 unsigned long *pfns_in,
2112 unsigned long extents_out,
2113 unsigned int order_out,
2114 unsigned long *mfns_out,
2115 unsigned int address_bits)
2116{
2117 long rc;
2118 int success;
2119
2120 struct xen_memory_exchange exchange = {
2121 .in = {
2122 .nr_extents = extents_in,
2123 .extent_order = order_in,
2124 .extent_start = pfns_in,
2125 .domid = DOMID_SELF
2126 },
2127 .out = {
2128 .nr_extents = extents_out,
2129 .extent_order = order_out,
2130 .extent_start = mfns_out,
2131 .address_bits = address_bits,
2132 .domid = DOMID_SELF
2133 }
2134 };
2135
2136 BUG_ON(extents_in << order_in != extents_out << order_out);
2137
2138 rc = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
2139 success = (exchange.nr_exchanged == extents_in);
2140
2141 BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0)));
2142 BUG_ON(success && (rc != 0));
2143
2144 return success;
2145}
2146
2147int xen_create_contiguous_region(unsigned long vstart, unsigned int order,
2148 unsigned int address_bits)
2149{
2150 unsigned long *in_frames = discontig_frames, out_frame;
2151 unsigned long flags;
2152 int success;
2153
2154 /*
2155 * Currently an auto-translated guest will not perform I/O, nor will
2156 * it require PAE page directories below 4GB. Therefore any calls to
2157 * this function are redundant and can be ignored.
2158 */
2159
2160 if (xen_feature(XENFEAT_auto_translated_physmap))
2161 return 0;
2162
2163 if (unlikely(order > MAX_CONTIG_ORDER))
2164 return -ENOMEM;
2165
2166 memset((void *) vstart, 0, PAGE_SIZE << order);
2167
Alex Nixon08bbc9d2009-02-09 12:05:46 -08002168 spin_lock_irqsave(&xen_reservation_lock, flags);
2169
2170 /* 1. Zap current PTEs, remembering MFNs. */
2171 xen_zap_pfn_range(vstart, order, in_frames, NULL);
2172
2173 /* 2. Get a new contiguous memory extent. */
2174 out_frame = virt_to_pfn(vstart);
2175 success = xen_exchange_memory(1UL << order, 0, in_frames,
2176 1, order, &out_frame,
2177 address_bits);
2178
2179 /* 3. Map the new extent in place of old pages. */
2180 if (success)
2181 xen_remap_exchanged_ptes(vstart, order, NULL, out_frame);
2182 else
2183 xen_remap_exchanged_ptes(vstart, order, in_frames, 0);
2184
2185 spin_unlock_irqrestore(&xen_reservation_lock, flags);
2186
2187 return success ? 0 : -ENOMEM;
2188}
2189EXPORT_SYMBOL_GPL(xen_create_contiguous_region);
2190
2191void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
2192{
2193 unsigned long *out_frames = discontig_frames, in_frame;
2194 unsigned long flags;
2195 int success;
2196
2197 if (xen_feature(XENFEAT_auto_translated_physmap))
2198 return;
2199
2200 if (unlikely(order > MAX_CONTIG_ORDER))
2201 return;
2202
2203 memset((void *) vstart, 0, PAGE_SIZE << order);
2204
Alex Nixon08bbc9d2009-02-09 12:05:46 -08002205 spin_lock_irqsave(&xen_reservation_lock, flags);
2206
2207 /* 1. Find start MFN of contiguous extent. */
2208 in_frame = virt_to_mfn(vstart);
2209
2210 /* 2. Zap current PTEs. */
2211 xen_zap_pfn_range(vstart, order, NULL, out_frames);
2212
2213 /* 3. Do the exchange for non-contiguous MFNs. */
2214 success = xen_exchange_memory(1, order, &in_frame, 1UL << order,
2215 0, out_frames, 0);
2216
2217 /* 4. Map new pages in place of old pages. */
2218 if (success)
2219 xen_remap_exchanged_ptes(vstart, order, out_frames, 0);
2220 else
2221 xen_remap_exchanged_ptes(vstart, order, NULL, in_frame);
2222
2223 spin_unlock_irqrestore(&xen_reservation_lock, flags);
2224}
2225EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region);
2226
Jeremy Fitzhardinge994025c2008-08-20 17:02:19 -07002227#ifdef CONFIG_XEN_DEBUG_FS
2228
2229static struct dentry *d_mmu_debug;
2230
2231static int __init xen_mmu_debugfs(void)
2232{
2233 struct dentry *d_xen = xen_init_debugfs();
2234
2235 if (d_xen == NULL)
2236 return -ENOMEM;
2237
2238 d_mmu_debug = debugfs_create_dir("mmu", d_xen);
2239
2240 debugfs_create_u8("zero_stats", 0644, d_mmu_debug, &zero_stats);
2241
2242 debugfs_create_u32("pgd_update", 0444, d_mmu_debug, &mmu_stats.pgd_update);
2243 debugfs_create_u32("pgd_update_pinned", 0444, d_mmu_debug,
2244 &mmu_stats.pgd_update_pinned);
2245 debugfs_create_u32("pgd_update_batched", 0444, d_mmu_debug,
2246 &mmu_stats.pgd_update_pinned);
2247
2248 debugfs_create_u32("pud_update", 0444, d_mmu_debug, &mmu_stats.pud_update);
2249 debugfs_create_u32("pud_update_pinned", 0444, d_mmu_debug,
2250 &mmu_stats.pud_update_pinned);
2251 debugfs_create_u32("pud_update_batched", 0444, d_mmu_debug,
2252 &mmu_stats.pud_update_pinned);
2253
2254 debugfs_create_u32("pmd_update", 0444, d_mmu_debug, &mmu_stats.pmd_update);
2255 debugfs_create_u32("pmd_update_pinned", 0444, d_mmu_debug,
2256 &mmu_stats.pmd_update_pinned);
2257 debugfs_create_u32("pmd_update_batched", 0444, d_mmu_debug,
2258 &mmu_stats.pmd_update_pinned);
2259
2260 debugfs_create_u32("pte_update", 0444, d_mmu_debug, &mmu_stats.pte_update);
2261// debugfs_create_u32("pte_update_pinned", 0444, d_mmu_debug,
2262// &mmu_stats.pte_update_pinned);
2263 debugfs_create_u32("pte_update_batched", 0444, d_mmu_debug,
2264 &mmu_stats.pte_update_pinned);
2265
2266 debugfs_create_u32("mmu_update", 0444, d_mmu_debug, &mmu_stats.mmu_update);
2267 debugfs_create_u32("mmu_update_extended", 0444, d_mmu_debug,
2268 &mmu_stats.mmu_update_extended);
2269 xen_debugfs_create_u32_array("mmu_update_histo", 0444, d_mmu_debug,
2270 mmu_stats.mmu_update_histo, 20);
2271
2272 debugfs_create_u32("set_pte_at", 0444, d_mmu_debug, &mmu_stats.set_pte_at);
2273 debugfs_create_u32("set_pte_at_batched", 0444, d_mmu_debug,
2274 &mmu_stats.set_pte_at_batched);
2275 debugfs_create_u32("set_pte_at_current", 0444, d_mmu_debug,
2276 &mmu_stats.set_pte_at_current);
2277 debugfs_create_u32("set_pte_at_kernel", 0444, d_mmu_debug,
2278 &mmu_stats.set_pte_at_kernel);
2279
2280 debugfs_create_u32("prot_commit", 0444, d_mmu_debug, &mmu_stats.prot_commit);
2281 debugfs_create_u32("prot_commit_batched", 0444, d_mmu_debug,
2282 &mmu_stats.prot_commit_batched);
2283
2284 return 0;
2285}
2286fs_initcall(xen_mmu_debugfs);
2287
2288#endif /* CONFIG_XEN_DEBUG_FS */