blob: b3b19d43b95128a5759c2f7b7fe55f94b943d296 [file] [log] [blame]
Jeremy Fitzhardingeb5eafe92010-12-06 16:29:22 -08001/*
2 * Xen leaves the responsibility for maintaining p2m mappings to the
3 * guests themselves, but it must also access and update the p2m array
4 * during suspend/resume when all the pages are reallocated.
5 *
6 * The p2m table is logically a flat array, but we implement it as a
7 * three-level tree to allow the address space to be sparse.
8 *
9 * Xen
10 * |
11 * p2m_top p2m_top_mfn
12 * / \ / \
13 * p2m_mid p2m_mid p2m_mid_mfn p2m_mid_mfn
14 * / \ / \ / /
15 * p2m p2m p2m p2m p2m p2m p2m ...
16 *
17 * The p2m_mid_mfn pages are mapped by p2m_top_mfn_p.
18 *
19 * The p2m_top and p2m_top_mfn levels are limited to 1 page, so the
20 * maximum representable pseudo-physical address space is:
21 * P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE pages
22 *
23 * P2M_PER_PAGE depends on the architecture, as a mfn is always
24 * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to
25 * 512 and 1024 entries respectively.
26 */
27
28#include <linux/init.h>
29#include <linux/module.h>
Jeremy Fitzhardinge448f28312010-12-15 13:19:33 +000030#include <linux/list.h>
31#include <linux/hash.h>
Jeremy Fitzhardingeb5eafe92010-12-06 16:29:22 -080032
33#include <asm/cache.h>
34#include <asm/setup.h>
35
36#include <asm/xen/page.h>
37#include <asm/xen/hypercall.h>
38#include <asm/xen/hypervisor.h>
39
40#include "xen-ops.h"
41
Jeremy Fitzhardinge448f28312010-12-15 13:19:33 +000042static void __init m2p_override_init(void);
43
Jeremy Fitzhardingeb5eafe92010-12-06 16:29:22 -080044unsigned long xen_max_p2m_pfn __read_mostly;
45
46#define P2M_PER_PAGE (PAGE_SIZE / sizeof(unsigned long))
47#define P2M_MID_PER_PAGE (PAGE_SIZE / sizeof(unsigned long *))
48#define P2M_TOP_PER_PAGE (PAGE_SIZE / sizeof(unsigned long **))
49
50#define MAX_P2M_PFN (P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE)
51
52/* Placeholders for holes in the address space */
53static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE);
54static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE);
55static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_missing_mfn, P2M_MID_PER_PAGE);
56
57static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE);
58static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE);
59static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE);
60
61RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
62RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
63
64static inline unsigned p2m_top_index(unsigned long pfn)
65{
66 BUG_ON(pfn >= MAX_P2M_PFN);
67 return pfn / (P2M_MID_PER_PAGE * P2M_PER_PAGE);
68}
69
70static inline unsigned p2m_mid_index(unsigned long pfn)
71{
72 return (pfn / P2M_PER_PAGE) % P2M_MID_PER_PAGE;
73}
74
75static inline unsigned p2m_index(unsigned long pfn)
76{
77 return pfn % P2M_PER_PAGE;
78}
79
80static void p2m_top_init(unsigned long ***top)
81{
82 unsigned i;
83
84 for (i = 0; i < P2M_TOP_PER_PAGE; i++)
85 top[i] = p2m_mid_missing;
86}
87
88static void p2m_top_mfn_init(unsigned long *top)
89{
90 unsigned i;
91
92 for (i = 0; i < P2M_TOP_PER_PAGE; i++)
93 top[i] = virt_to_mfn(p2m_mid_missing_mfn);
94}
95
96static void p2m_top_mfn_p_init(unsigned long **top)
97{
98 unsigned i;
99
100 for (i = 0; i < P2M_TOP_PER_PAGE; i++)
101 top[i] = p2m_mid_missing_mfn;
102}
103
104static void p2m_mid_init(unsigned long **mid)
105{
106 unsigned i;
107
108 for (i = 0; i < P2M_MID_PER_PAGE; i++)
109 mid[i] = p2m_missing;
110}
111
112static void p2m_mid_mfn_init(unsigned long *mid)
113{
114 unsigned i;
115
116 for (i = 0; i < P2M_MID_PER_PAGE; i++)
117 mid[i] = virt_to_mfn(p2m_missing);
118}
119
120static void p2m_init(unsigned long *p2m)
121{
122 unsigned i;
123
124 for (i = 0; i < P2M_MID_PER_PAGE; i++)
125 p2m[i] = INVALID_P2M_ENTRY;
126}
127
128/*
129 * Build the parallel p2m_top_mfn and p2m_mid_mfn structures
130 *
131 * This is called both at boot time, and after resuming from suspend:
132 * - At boot time we're called very early, and must use extend_brk()
133 * to allocate memory.
134 *
135 * - After resume we're called from within stop_machine, but the mfn
136 * tree should alreay be completely allocated.
137 */
138void xen_build_mfn_list_list(void)
139{
140 unsigned long pfn;
141
142 /* Pre-initialize p2m_top_mfn to be completely missing */
143 if (p2m_top_mfn == NULL) {
144 p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE);
145 p2m_mid_mfn_init(p2m_mid_missing_mfn);
146
147 p2m_top_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE);
148 p2m_top_mfn_p_init(p2m_top_mfn_p);
149
150 p2m_top_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE);
151 p2m_top_mfn_init(p2m_top_mfn);
152 } else {
153 /* Reinitialise, mfn's all change after migration */
154 p2m_mid_mfn_init(p2m_mid_missing_mfn);
155 }
156
157 for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) {
158 unsigned topidx = p2m_top_index(pfn);
159 unsigned mididx = p2m_mid_index(pfn);
160 unsigned long **mid;
161 unsigned long *mid_mfn_p;
162
163 mid = p2m_top[topidx];
164 mid_mfn_p = p2m_top_mfn_p[topidx];
165
166 /* Don't bother allocating any mfn mid levels if
167 * they're just missing, just update the stored mfn,
168 * since all could have changed over a migrate.
169 */
170 if (mid == p2m_mid_missing) {
171 BUG_ON(mididx);
172 BUG_ON(mid_mfn_p != p2m_mid_missing_mfn);
173 p2m_top_mfn[topidx] = virt_to_mfn(p2m_mid_missing_mfn);
174 pfn += (P2M_MID_PER_PAGE - 1) * P2M_PER_PAGE;
175 continue;
176 }
177
178 if (mid_mfn_p == p2m_mid_missing_mfn) {
179 /*
180 * XXX boot-time only! We should never find
181 * missing parts of the mfn tree after
182 * runtime. extend_brk() will BUG if we call
183 * it too late.
184 */
185 mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE);
186 p2m_mid_mfn_init(mid_mfn_p);
187
188 p2m_top_mfn_p[topidx] = mid_mfn_p;
189 }
190
191 p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p);
192 mid_mfn_p[mididx] = virt_to_mfn(mid[mididx]);
193 }
194}
195
196void xen_setup_mfn_list_list(void)
197{
198 BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
199
200 HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
201 virt_to_mfn(p2m_top_mfn);
202 HYPERVISOR_shared_info->arch.max_pfn = xen_max_p2m_pfn;
203}
204
205/* Set up p2m_top to point to the domain-builder provided p2m pages */
206void __init xen_build_dynamic_phys_to_machine(void)
207{
208 unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list;
209 unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages);
210 unsigned long pfn;
211
212 xen_max_p2m_pfn = max_pfn;
213
214 p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE);
215 p2m_init(p2m_missing);
216
217 p2m_mid_missing = extend_brk(PAGE_SIZE, PAGE_SIZE);
218 p2m_mid_init(p2m_mid_missing);
219
220 p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE);
221 p2m_top_init(p2m_top);
222
223 /*
224 * The domain builder gives us a pre-constructed p2m array in
225 * mfn_list for all the pages initially given to us, so we just
226 * need to graft that into our tree structure.
227 */
228 for (pfn = 0; pfn < max_pfn; pfn += P2M_PER_PAGE) {
229 unsigned topidx = p2m_top_index(pfn);
230 unsigned mididx = p2m_mid_index(pfn);
231
232 if (p2m_top[topidx] == p2m_mid_missing) {
233 unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
234 p2m_mid_init(mid);
235
236 p2m_top[topidx] = mid;
237 }
238
239 p2m_top[topidx][mididx] = &mfn_list[pfn];
240 }
Jeremy Fitzhardinge448f28312010-12-15 13:19:33 +0000241
242 m2p_override_init();
Jeremy Fitzhardingeb5eafe92010-12-06 16:29:22 -0800243}
244
245unsigned long get_phys_to_machine(unsigned long pfn)
246{
247 unsigned topidx, mididx, idx;
248
249 if (unlikely(pfn >= MAX_P2M_PFN))
250 return INVALID_P2M_ENTRY;
251
252 topidx = p2m_top_index(pfn);
253 mididx = p2m_mid_index(pfn);
254 idx = p2m_index(pfn);
255
256 return p2m_top[topidx][mididx][idx];
257}
258EXPORT_SYMBOL_GPL(get_phys_to_machine);
259
260static void *alloc_p2m_page(void)
261{
262 return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT);
263}
264
265static void free_p2m_page(void *p)
266{
267 free_page((unsigned long)p);
268}
269
270/*
271 * Fully allocate the p2m structure for a given pfn. We need to check
272 * that both the top and mid levels are allocated, and make sure the
273 * parallel mfn tree is kept in sync. We may race with other cpus, so
274 * the new pages are installed with cmpxchg; if we lose the race then
275 * simply free the page we allocated and use the one that's there.
276 */
277static bool alloc_p2m(unsigned long pfn)
278{
279 unsigned topidx, mididx;
280 unsigned long ***top_p, **mid;
281 unsigned long *top_mfn_p, *mid_mfn;
282
283 topidx = p2m_top_index(pfn);
284 mididx = p2m_mid_index(pfn);
285
286 top_p = &p2m_top[topidx];
287 mid = *top_p;
288
289 if (mid == p2m_mid_missing) {
290 /* Mid level is missing, allocate a new one */
291 mid = alloc_p2m_page();
292 if (!mid)
293 return false;
294
295 p2m_mid_init(mid);
296
297 if (cmpxchg(top_p, p2m_mid_missing, mid) != p2m_mid_missing)
298 free_p2m_page(mid);
299 }
300
301 top_mfn_p = &p2m_top_mfn[topidx];
302 mid_mfn = p2m_top_mfn_p[topidx];
303
304 BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p);
305
306 if (mid_mfn == p2m_mid_missing_mfn) {
307 /* Separately check the mid mfn level */
308 unsigned long missing_mfn;
309 unsigned long mid_mfn_mfn;
310
311 mid_mfn = alloc_p2m_page();
312 if (!mid_mfn)
313 return false;
314
315 p2m_mid_mfn_init(mid_mfn);
316
317 missing_mfn = virt_to_mfn(p2m_mid_missing_mfn);
318 mid_mfn_mfn = virt_to_mfn(mid_mfn);
319 if (cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn) != missing_mfn)
320 free_p2m_page(mid_mfn);
321 else
322 p2m_top_mfn_p[topidx] = mid_mfn;
323 }
324
325 if (p2m_top[topidx][mididx] == p2m_missing) {
326 /* p2m leaf page is missing */
327 unsigned long *p2m;
328
329 p2m = alloc_p2m_page();
330 if (!p2m)
331 return false;
332
333 p2m_init(p2m);
334
335 if (cmpxchg(&mid[mididx], p2m_missing, p2m) != p2m_missing)
336 free_p2m_page(p2m);
337 else
338 mid_mfn[mididx] = virt_to_mfn(p2m);
339 }
340
341 return true;
342}
343
344/* Try to install p2m mapping; fail if intermediate bits missing */
345bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
346{
347 unsigned topidx, mididx, idx;
348
349 if (unlikely(pfn >= MAX_P2M_PFN)) {
350 BUG_ON(mfn != INVALID_P2M_ENTRY);
351 return true;
352 }
353
354 topidx = p2m_top_index(pfn);
355 mididx = p2m_mid_index(pfn);
356 idx = p2m_index(pfn);
357
358 if (p2m_top[topidx][mididx] == p2m_missing)
359 return mfn == INVALID_P2M_ENTRY;
360
361 p2m_top[topidx][mididx][idx] = mfn;
362
363 return true;
364}
365
366bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
367{
368 if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
369 BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
370 return true;
371 }
372
373 if (unlikely(!__set_phys_to_machine(pfn, mfn))) {
374 if (!alloc_p2m(pfn))
375 return false;
376
377 if (!__set_phys_to_machine(pfn, mfn))
378 return false;
379 }
380
381 return true;
382}
Jeremy Fitzhardinge448f28312010-12-15 13:19:33 +0000383
384#define M2P_OVERRIDE_HASH_SHIFT 10
385#define M2P_OVERRIDE_HASH (1 << M2P_OVERRIDE_HASH_SHIFT)
386
387static RESERVE_BRK_ARRAY(struct list_head, m2p_overrides, M2P_OVERRIDE_HASH);
388static DEFINE_SPINLOCK(m2p_override_lock);
389
390static void __init m2p_override_init(void)
391{
392 unsigned i;
393
394 m2p_overrides = extend_brk(sizeof(*m2p_overrides) * M2P_OVERRIDE_HASH,
395 sizeof(unsigned long));
396
397 for (i = 0; i < M2P_OVERRIDE_HASH; i++)
398 INIT_LIST_HEAD(&m2p_overrides[i]);
399}
400
401static unsigned long mfn_hash(unsigned long mfn)
402{
403 return hash_long(mfn, M2P_OVERRIDE_HASH_SHIFT);
404}
405
406/* Add an MFN override for a particular page */
407void m2p_add_override(unsigned long mfn, struct page *page)
408{
409 unsigned long flags;
Stefano Stabellini9b705f02010-12-10 14:52:45 +0000410 unsigned long pfn = page_to_pfn(page);
Jeremy Fitzhardinge448f28312010-12-15 13:19:33 +0000411 page->private = mfn;
Stefano Stabellini9b705f02010-12-10 14:52:45 +0000412 page->index = pfn_to_mfn(pfn);
Jeremy Fitzhardinge448f28312010-12-15 13:19:33 +0000413
Stefano Stabellini9b705f02010-12-10 14:52:45 +0000414 __set_phys_to_machine(pfn, FOREIGN_FRAME(mfn));
Jeremy Fitzhardinge448f28312010-12-15 13:19:33 +0000415 spin_lock_irqsave(&m2p_override_lock, flags);
416 list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]);
417 spin_unlock_irqrestore(&m2p_override_lock, flags);
418}
419
420void m2p_remove_override(struct page *page)
421{
422 unsigned long flags;
Stefano Stabellini9b705f02010-12-10 14:52:45 +0000423 unsigned long mfn;
424 unsigned long pfn;
425
426 pfn = page_to_pfn(page);
427 mfn = get_phys_to_machine(pfn);
428 if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT))
429 return;
430
Jeremy Fitzhardinge448f28312010-12-15 13:19:33 +0000431 spin_lock_irqsave(&m2p_override_lock, flags);
432 list_del(&page->lru);
433 spin_unlock_irqrestore(&m2p_override_lock, flags);
Stefano Stabellini9b705f02010-12-10 14:52:45 +0000434 __set_phys_to_machine(pfn, page->index);
Jeremy Fitzhardinge448f28312010-12-15 13:19:33 +0000435}
436
437struct page *m2p_find_override(unsigned long mfn)
438{
439 unsigned long flags;
440 struct list_head *bucket = &m2p_overrides[mfn_hash(mfn)];
441 struct page *p, *ret;
442
443 ret = NULL;
444
445 spin_lock_irqsave(&m2p_override_lock, flags);
446
447 list_for_each_entry(p, bucket, lru) {
448 if (p->private == mfn) {
449 ret = p;
450 break;
451 }
452 }
453
454 spin_unlock_irqrestore(&m2p_override_lock, flags);
455
456 return ret;
457}
458
459unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn)
460{
461 struct page *p = m2p_find_override(mfn);
462 unsigned long ret = pfn;
463
464 if (p)
465 ret = page_to_pfn(p);
466
467 return ret;
468}