blob: 29de179920e8b651ffd0235b5da39e859de8dcf0 [file] [log] [blame]
Zhi Wang2707e442016-03-28 23:23:16 +08001/*
2 * GTT virtualization
3 *
4 * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 * Zhi Wang <zhi.a.wang@intel.com>
27 * Zhenyu Wang <zhenyuw@linux.intel.com>
28 * Xiao Zheng <xiao.zheng@intel.com>
29 *
30 * Contributors:
31 * Min He <min.he@intel.com>
32 * Bing Niu <bing.niu@intel.com>
33 *
34 */
35
36#include "i915_drv.h"
37#include "trace.h"
38
39static bool enable_out_of_sync = false;
40static int preallocated_oos_pages = 8192;
41
42/*
43 * validate a gm address and related range size,
44 * translate it to host gm address
45 */
46bool intel_gvt_ggtt_validate_range(struct intel_vgpu *vgpu, u64 addr, u32 size)
47{
48 if ((!vgpu_gmadr_is_valid(vgpu, addr)) || (size
49 && !vgpu_gmadr_is_valid(vgpu, addr + size - 1))) {
50 gvt_err("vgpu%d: invalid range gmadr 0x%llx size 0x%x\n",
51 vgpu->id, addr, size);
52 return false;
53 }
54 return true;
55}
56
57/* translate a guest gmadr to host gmadr */
58int intel_gvt_ggtt_gmadr_g2h(struct intel_vgpu *vgpu, u64 g_addr, u64 *h_addr)
59{
60 if (WARN(!vgpu_gmadr_is_valid(vgpu, g_addr),
61 "invalid guest gmadr %llx\n", g_addr))
62 return -EACCES;
63
64 if (vgpu_gmadr_is_aperture(vgpu, g_addr))
65 *h_addr = vgpu_aperture_gmadr_base(vgpu)
66 + (g_addr - vgpu_aperture_offset(vgpu));
67 else
68 *h_addr = vgpu_hidden_gmadr_base(vgpu)
69 + (g_addr - vgpu_hidden_offset(vgpu));
70 return 0;
71}
72
73/* translate a host gmadr to guest gmadr */
74int intel_gvt_ggtt_gmadr_h2g(struct intel_vgpu *vgpu, u64 h_addr, u64 *g_addr)
75{
76 if (WARN(!gvt_gmadr_is_valid(vgpu->gvt, h_addr),
77 "invalid host gmadr %llx\n", h_addr))
78 return -EACCES;
79
80 if (gvt_gmadr_is_aperture(vgpu->gvt, h_addr))
81 *g_addr = vgpu_aperture_gmadr_base(vgpu)
82 + (h_addr - gvt_aperture_gmadr_base(vgpu->gvt));
83 else
84 *g_addr = vgpu_hidden_gmadr_base(vgpu)
85 + (h_addr - gvt_hidden_gmadr_base(vgpu->gvt));
86 return 0;
87}
88
89int intel_gvt_ggtt_index_g2h(struct intel_vgpu *vgpu, unsigned long g_index,
90 unsigned long *h_index)
91{
92 u64 h_addr;
93 int ret;
94
95 ret = intel_gvt_ggtt_gmadr_g2h(vgpu, g_index << GTT_PAGE_SHIFT,
96 &h_addr);
97 if (ret)
98 return ret;
99
100 *h_index = h_addr >> GTT_PAGE_SHIFT;
101 return 0;
102}
103
104int intel_gvt_ggtt_h2g_index(struct intel_vgpu *vgpu, unsigned long h_index,
105 unsigned long *g_index)
106{
107 u64 g_addr;
108 int ret;
109
110 ret = intel_gvt_ggtt_gmadr_h2g(vgpu, h_index << GTT_PAGE_SHIFT,
111 &g_addr);
112 if (ret)
113 return ret;
114
115 *g_index = g_addr >> GTT_PAGE_SHIFT;
116 return 0;
117}
118
119#define gtt_type_is_entry(type) \
120 (type > GTT_TYPE_INVALID && type < GTT_TYPE_PPGTT_ENTRY \
121 && type != GTT_TYPE_PPGTT_PTE_ENTRY \
122 && type != GTT_TYPE_PPGTT_ROOT_ENTRY)
123
124#define gtt_type_is_pt(type) \
125 (type >= GTT_TYPE_PPGTT_PTE_PT && type < GTT_TYPE_MAX)
126
127#define gtt_type_is_pte_pt(type) \
128 (type == GTT_TYPE_PPGTT_PTE_PT)
129
130#define gtt_type_is_root_pointer(type) \
131 (gtt_type_is_entry(type) && type > GTT_TYPE_PPGTT_ROOT_ENTRY)
132
133#define gtt_init_entry(e, t, p, v) do { \
134 (e)->type = t; \
135 (e)->pdev = p; \
136 memcpy(&(e)->val64, &v, sizeof(v)); \
137} while (0)
138
139enum {
140 GTT_TYPE_INVALID = -1,
141
142 GTT_TYPE_GGTT_PTE,
143
144 GTT_TYPE_PPGTT_PTE_4K_ENTRY,
145 GTT_TYPE_PPGTT_PTE_2M_ENTRY,
146 GTT_TYPE_PPGTT_PTE_1G_ENTRY,
147
148 GTT_TYPE_PPGTT_PTE_ENTRY,
149
150 GTT_TYPE_PPGTT_PDE_ENTRY,
151 GTT_TYPE_PPGTT_PDP_ENTRY,
152 GTT_TYPE_PPGTT_PML4_ENTRY,
153
154 GTT_TYPE_PPGTT_ROOT_ENTRY,
155
156 GTT_TYPE_PPGTT_ROOT_L3_ENTRY,
157 GTT_TYPE_PPGTT_ROOT_L4_ENTRY,
158
159 GTT_TYPE_PPGTT_ENTRY,
160
161 GTT_TYPE_PPGTT_PTE_PT,
162 GTT_TYPE_PPGTT_PDE_PT,
163 GTT_TYPE_PPGTT_PDP_PT,
164 GTT_TYPE_PPGTT_PML4_PT,
165
166 GTT_TYPE_MAX,
167};
168
169/*
170 * Mappings between GTT_TYPE* enumerations.
171 * Following information can be found according to the given type:
172 * - type of next level page table
173 * - type of entry inside this level page table
174 * - type of entry with PSE set
175 *
176 * If the given type doesn't have such a kind of information,
177 * e.g. give a l4 root entry type, then request to get its PSE type,
178 * give a PTE page table type, then request to get its next level page
179 * table type, as we know l4 root entry doesn't have a PSE bit,
180 * and a PTE page table doesn't have a next level page table type,
181 * GTT_TYPE_INVALID will be returned. This is useful when traversing a
182 * page table.
183 */
184
185struct gtt_type_table_entry {
186 int entry_type;
187 int next_pt_type;
188 int pse_entry_type;
189};
190
191#define GTT_TYPE_TABLE_ENTRY(type, e_type, npt_type, pse_type) \
192 [type] = { \
193 .entry_type = e_type, \
194 .next_pt_type = npt_type, \
195 .pse_entry_type = pse_type, \
196 }
197
198static struct gtt_type_table_entry gtt_type_table[] = {
199 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L4_ENTRY,
200 GTT_TYPE_PPGTT_ROOT_L4_ENTRY,
201 GTT_TYPE_PPGTT_PML4_PT,
202 GTT_TYPE_INVALID),
203 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_PT,
204 GTT_TYPE_PPGTT_PML4_ENTRY,
205 GTT_TYPE_PPGTT_PDP_PT,
206 GTT_TYPE_INVALID),
207 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_ENTRY,
208 GTT_TYPE_PPGTT_PML4_ENTRY,
209 GTT_TYPE_PPGTT_PDP_PT,
210 GTT_TYPE_INVALID),
211 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_PT,
212 GTT_TYPE_PPGTT_PDP_ENTRY,
213 GTT_TYPE_PPGTT_PDE_PT,
214 GTT_TYPE_PPGTT_PTE_1G_ENTRY),
215 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L3_ENTRY,
216 GTT_TYPE_PPGTT_ROOT_L3_ENTRY,
217 GTT_TYPE_PPGTT_PDE_PT,
218 GTT_TYPE_PPGTT_PTE_1G_ENTRY),
219 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_ENTRY,
220 GTT_TYPE_PPGTT_PDP_ENTRY,
221 GTT_TYPE_PPGTT_PDE_PT,
222 GTT_TYPE_PPGTT_PTE_1G_ENTRY),
223 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_PT,
224 GTT_TYPE_PPGTT_PDE_ENTRY,
225 GTT_TYPE_PPGTT_PTE_PT,
226 GTT_TYPE_PPGTT_PTE_2M_ENTRY),
227 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_ENTRY,
228 GTT_TYPE_PPGTT_PDE_ENTRY,
229 GTT_TYPE_PPGTT_PTE_PT,
230 GTT_TYPE_PPGTT_PTE_2M_ENTRY),
231 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_PT,
232 GTT_TYPE_PPGTT_PTE_4K_ENTRY,
233 GTT_TYPE_INVALID,
234 GTT_TYPE_INVALID),
235 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_4K_ENTRY,
236 GTT_TYPE_PPGTT_PTE_4K_ENTRY,
237 GTT_TYPE_INVALID,
238 GTT_TYPE_INVALID),
239 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_2M_ENTRY,
240 GTT_TYPE_PPGTT_PDE_ENTRY,
241 GTT_TYPE_INVALID,
242 GTT_TYPE_PPGTT_PTE_2M_ENTRY),
243 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_1G_ENTRY,
244 GTT_TYPE_PPGTT_PDP_ENTRY,
245 GTT_TYPE_INVALID,
246 GTT_TYPE_PPGTT_PTE_1G_ENTRY),
247 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_GGTT_PTE,
248 GTT_TYPE_GGTT_PTE,
249 GTT_TYPE_INVALID,
250 GTT_TYPE_INVALID),
251};
252
253static inline int get_next_pt_type(int type)
254{
255 return gtt_type_table[type].next_pt_type;
256}
257
258static inline int get_entry_type(int type)
259{
260 return gtt_type_table[type].entry_type;
261}
262
263static inline int get_pse_type(int type)
264{
265 return gtt_type_table[type].pse_entry_type;
266}
267
268static u64 read_pte64(struct drm_i915_private *dev_priv, unsigned long index)
269{
270 void *addr = (u64 *)dev_priv->ggtt.gsm + index;
271 u64 pte;
272
273#ifdef readq
274 pte = readq(addr);
275#else
276 pte = ioread32(addr);
277 pte |= ioread32(addr + 4) << 32;
278#endif
279 return pte;
280}
281
282static void write_pte64(struct drm_i915_private *dev_priv,
283 unsigned long index, u64 pte)
284{
285 void *addr = (u64 *)dev_priv->ggtt.gsm + index;
286
287#ifdef writeq
288 writeq(pte, addr);
289#else
290 iowrite32((u32)pte, addr);
291 iowrite32(pte >> 32, addr + 4);
292#endif
293 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
294 POSTING_READ(GFX_FLSH_CNTL_GEN6);
295}
296
297static inline struct intel_gvt_gtt_entry *gtt_get_entry64(void *pt,
298 struct intel_gvt_gtt_entry *e,
299 unsigned long index, bool hypervisor_access, unsigned long gpa,
300 struct intel_vgpu *vgpu)
301{
302 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
303 int ret;
304
305 if (WARN_ON(info->gtt_entry_size != 8))
306 return e;
307
308 if (hypervisor_access) {
309 ret = intel_gvt_hypervisor_read_gpa(vgpu, gpa +
310 (index << info->gtt_entry_size_shift),
311 &e->val64, 8);
312 WARN_ON(ret);
313 } else if (!pt) {
314 e->val64 = read_pte64(vgpu->gvt->dev_priv, index);
315 } else {
316 e->val64 = *((u64 *)pt + index);
317 }
318 return e;
319}
320
321static inline struct intel_gvt_gtt_entry *gtt_set_entry64(void *pt,
322 struct intel_gvt_gtt_entry *e,
323 unsigned long index, bool hypervisor_access, unsigned long gpa,
324 struct intel_vgpu *vgpu)
325{
326 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
327 int ret;
328
329 if (WARN_ON(info->gtt_entry_size != 8))
330 return e;
331
332 if (hypervisor_access) {
333 ret = intel_gvt_hypervisor_write_gpa(vgpu, gpa +
334 (index << info->gtt_entry_size_shift),
335 &e->val64, 8);
336 WARN_ON(ret);
337 } else if (!pt) {
338 write_pte64(vgpu->gvt->dev_priv, index, e->val64);
339 } else {
340 *((u64 *)pt + index) = e->val64;
341 }
342 return e;
343}
344
345#define GTT_HAW 46
346
347#define ADDR_1G_MASK (((1UL << (GTT_HAW - 30 + 1)) - 1) << 30)
348#define ADDR_2M_MASK (((1UL << (GTT_HAW - 21 + 1)) - 1) << 21)
349#define ADDR_4K_MASK (((1UL << (GTT_HAW - 12 + 1)) - 1) << 12)
350
351static unsigned long gen8_gtt_get_pfn(struct intel_gvt_gtt_entry *e)
352{
353 unsigned long pfn;
354
355 if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY)
356 pfn = (e->val64 & ADDR_1G_MASK) >> 12;
357 else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY)
358 pfn = (e->val64 & ADDR_2M_MASK) >> 12;
359 else
360 pfn = (e->val64 & ADDR_4K_MASK) >> 12;
361 return pfn;
362}
363
364static void gen8_gtt_set_pfn(struct intel_gvt_gtt_entry *e, unsigned long pfn)
365{
366 if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) {
367 e->val64 &= ~ADDR_1G_MASK;
368 pfn &= (ADDR_1G_MASK >> 12);
369 } else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY) {
370 e->val64 &= ~ADDR_2M_MASK;
371 pfn &= (ADDR_2M_MASK >> 12);
372 } else {
373 e->val64 &= ~ADDR_4K_MASK;
374 pfn &= (ADDR_4K_MASK >> 12);
375 }
376
377 e->val64 |= (pfn << 12);
378}
379
380static bool gen8_gtt_test_pse(struct intel_gvt_gtt_entry *e)
381{
382 /* Entry doesn't have PSE bit. */
383 if (get_pse_type(e->type) == GTT_TYPE_INVALID)
384 return false;
385
386 e->type = get_entry_type(e->type);
387 if (!(e->val64 & (1 << 7)))
388 return false;
389
390 e->type = get_pse_type(e->type);
391 return true;
392}
393
394static bool gen8_gtt_test_present(struct intel_gvt_gtt_entry *e)
395{
396 /*
397 * i915 writes PDP root pointer registers without present bit,
398 * it also works, so we need to treat root pointer entry
399 * specifically.
400 */
401 if (e->type == GTT_TYPE_PPGTT_ROOT_L3_ENTRY
402 || e->type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY)
403 return (e->val64 != 0);
404 else
405 return (e->val64 & (1 << 0));
406}
407
408static void gtt_entry_clear_present(struct intel_gvt_gtt_entry *e)
409{
410 e->val64 &= ~(1 << 0);
411}
412
413/*
414 * Per-platform GMA routines.
415 */
416static unsigned long gma_to_ggtt_pte_index(unsigned long gma)
417{
418 unsigned long x = (gma >> GTT_PAGE_SHIFT);
419
420 trace_gma_index(__func__, gma, x);
421 return x;
422}
423
424#define DEFINE_PPGTT_GMA_TO_INDEX(prefix, ename, exp) \
425static unsigned long prefix##_gma_to_##ename##_index(unsigned long gma) \
426{ \
427 unsigned long x = (exp); \
428 trace_gma_index(__func__, gma, x); \
429 return x; \
430}
431
432DEFINE_PPGTT_GMA_TO_INDEX(gen8, pte, (gma >> 12 & 0x1ff));
433DEFINE_PPGTT_GMA_TO_INDEX(gen8, pde, (gma >> 21 & 0x1ff));
434DEFINE_PPGTT_GMA_TO_INDEX(gen8, l3_pdp, (gma >> 30 & 0x3));
435DEFINE_PPGTT_GMA_TO_INDEX(gen8, l4_pdp, (gma >> 30 & 0x1ff));
436DEFINE_PPGTT_GMA_TO_INDEX(gen8, pml4, (gma >> 39 & 0x1ff));
437
438static struct intel_gvt_gtt_pte_ops gen8_gtt_pte_ops = {
439 .get_entry = gtt_get_entry64,
440 .set_entry = gtt_set_entry64,
441 .clear_present = gtt_entry_clear_present,
442 .test_present = gen8_gtt_test_present,
443 .test_pse = gen8_gtt_test_pse,
444 .get_pfn = gen8_gtt_get_pfn,
445 .set_pfn = gen8_gtt_set_pfn,
446};
447
448static struct intel_gvt_gtt_gma_ops gen8_gtt_gma_ops = {
449 .gma_to_ggtt_pte_index = gma_to_ggtt_pte_index,
450 .gma_to_pte_index = gen8_gma_to_pte_index,
451 .gma_to_pde_index = gen8_gma_to_pde_index,
452 .gma_to_l3_pdp_index = gen8_gma_to_l3_pdp_index,
453 .gma_to_l4_pdp_index = gen8_gma_to_l4_pdp_index,
454 .gma_to_pml4_index = gen8_gma_to_pml4_index,
455};
456
457static int gtt_entry_p2m(struct intel_vgpu *vgpu, struct intel_gvt_gtt_entry *p,
458 struct intel_gvt_gtt_entry *m)
459{
460 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
461 unsigned long gfn, mfn;
462
463 *m = *p;
464
465 if (!ops->test_present(p))
466 return 0;
467
468 gfn = ops->get_pfn(p);
469
470 mfn = intel_gvt_hypervisor_gfn_to_mfn(vgpu, gfn);
471 if (mfn == INTEL_GVT_INVALID_ADDR) {
472 gvt_err("fail to translate gfn: 0x%lx\n", gfn);
473 return -ENXIO;
474 }
475
476 ops->set_pfn(m, mfn);
477 return 0;
478}
479
480/*
481 * MM helpers.
482 */
483struct intel_gvt_gtt_entry *intel_vgpu_mm_get_entry(struct intel_vgpu_mm *mm,
484 void *page_table, struct intel_gvt_gtt_entry *e,
485 unsigned long index)
486{
487 struct intel_gvt *gvt = mm->vgpu->gvt;
488 struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
489
490 e->type = mm->page_table_entry_type;
491
492 ops->get_entry(page_table, e, index, false, 0, mm->vgpu);
493 ops->test_pse(e);
494 return e;
495}
496
497struct intel_gvt_gtt_entry *intel_vgpu_mm_set_entry(struct intel_vgpu_mm *mm,
498 void *page_table, struct intel_gvt_gtt_entry *e,
499 unsigned long index)
500{
501 struct intel_gvt *gvt = mm->vgpu->gvt;
502 struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
503
504 return ops->set_entry(page_table, e, index, false, 0, mm->vgpu);
505}
506
507/*
508 * PPGTT shadow page table helpers.
509 */
510static inline struct intel_gvt_gtt_entry *ppgtt_spt_get_entry(
511 struct intel_vgpu_ppgtt_spt *spt,
512 void *page_table, int type,
513 struct intel_gvt_gtt_entry *e, unsigned long index,
514 bool guest)
515{
516 struct intel_gvt *gvt = spt->vgpu->gvt;
517 struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
518
519 e->type = get_entry_type(type);
520
521 if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n"))
522 return e;
523
524 ops->get_entry(page_table, e, index, guest,
525 spt->guest_page.gfn << GTT_PAGE_SHIFT,
526 spt->vgpu);
527 ops->test_pse(e);
528 return e;
529}
530
531static inline struct intel_gvt_gtt_entry *ppgtt_spt_set_entry(
532 struct intel_vgpu_ppgtt_spt *spt,
533 void *page_table, int type,
534 struct intel_gvt_gtt_entry *e, unsigned long index,
535 bool guest)
536{
537 struct intel_gvt *gvt = spt->vgpu->gvt;
538 struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
539
540 if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n"))
541 return e;
542
543 return ops->set_entry(page_table, e, index, guest,
544 spt->guest_page.gfn << GTT_PAGE_SHIFT,
545 spt->vgpu);
546}
547
548#define ppgtt_get_guest_entry(spt, e, index) \
549 ppgtt_spt_get_entry(spt, NULL, \
550 spt->guest_page_type, e, index, true)
551
552#define ppgtt_set_guest_entry(spt, e, index) \
553 ppgtt_spt_set_entry(spt, NULL, \
554 spt->guest_page_type, e, index, true)
555
556#define ppgtt_get_shadow_entry(spt, e, index) \
557 ppgtt_spt_get_entry(spt, spt->shadow_page.vaddr, \
558 spt->shadow_page.type, e, index, false)
559
560#define ppgtt_set_shadow_entry(spt, e, index) \
561 ppgtt_spt_set_entry(spt, spt->shadow_page.vaddr, \
562 spt->shadow_page.type, e, index, false)
563
564/**
565 * intel_vgpu_init_guest_page - init a guest page data structure
566 * @vgpu: a vGPU
567 * @p: a guest page data structure
568 * @gfn: guest memory page frame number
569 * @handler: function will be called when target guest memory page has
570 * been modified.
571 *
572 * This function is called when user wants to track a guest memory page.
573 *
574 * Returns:
575 * Zero on success, negative error code if failed.
576 */
577int intel_vgpu_init_guest_page(struct intel_vgpu *vgpu,
578 struct intel_vgpu_guest_page *p,
579 unsigned long gfn,
580 int (*handler)(void *, u64, void *, int),
581 void *data)
582{
583 INIT_HLIST_NODE(&p->node);
584
585 p->writeprotection = false;
586 p->gfn = gfn;
587 p->handler = handler;
588 p->data = data;
589 p->oos_page = NULL;
590 p->write_cnt = 0;
591
592 hash_add(vgpu->gtt.guest_page_hash_table, &p->node, p->gfn);
593 return 0;
594}
595
596static int detach_oos_page(struct intel_vgpu *vgpu,
597 struct intel_vgpu_oos_page *oos_page);
598
599/**
600 * intel_vgpu_clean_guest_page - release the resource owned by guest page data
601 * structure
602 * @vgpu: a vGPU
603 * @p: a tracked guest page
604 *
605 * This function is called when user tries to stop tracking a guest memory
606 * page.
607 */
608void intel_vgpu_clean_guest_page(struct intel_vgpu *vgpu,
609 struct intel_vgpu_guest_page *p)
610{
611 if (!hlist_unhashed(&p->node))
612 hash_del(&p->node);
613
614 if (p->oos_page)
615 detach_oos_page(vgpu, p->oos_page);
616
617 if (p->writeprotection)
618 intel_gvt_hypervisor_unset_wp_page(vgpu, p);
619}
620
621/**
622 * intel_vgpu_find_guest_page - find a guest page data structure by GFN.
623 * @vgpu: a vGPU
624 * @gfn: guest memory page frame number
625 *
626 * This function is called when emulation logic wants to know if a trapped GFN
627 * is a tracked guest page.
628 *
629 * Returns:
630 * Pointer to guest page data structure, NULL if failed.
631 */
632struct intel_vgpu_guest_page *intel_vgpu_find_guest_page(
633 struct intel_vgpu *vgpu, unsigned long gfn)
634{
635 struct intel_vgpu_guest_page *p;
636
637 hash_for_each_possible(vgpu->gtt.guest_page_hash_table,
638 p, node, gfn) {
639 if (p->gfn == gfn)
640 return p;
641 }
642 return NULL;
643}
644
645static inline int init_shadow_page(struct intel_vgpu *vgpu,
646 struct intel_vgpu_shadow_page *p, int type)
647{
648 p->vaddr = page_address(p->page);
649 p->type = type;
650
651 INIT_HLIST_NODE(&p->node);
652
653 p->mfn = intel_gvt_hypervisor_virt_to_mfn(p->vaddr);
654 if (p->mfn == INTEL_GVT_INVALID_ADDR)
655 return -EFAULT;
656
657 hash_add(vgpu->gtt.shadow_page_hash_table, &p->node, p->mfn);
658 return 0;
659}
660
661static inline void clean_shadow_page(struct intel_vgpu_shadow_page *p)
662{
663 if (!hlist_unhashed(&p->node))
664 hash_del(&p->node);
665}
666
667static inline struct intel_vgpu_shadow_page *find_shadow_page(
668 struct intel_vgpu *vgpu, unsigned long mfn)
669{
670 struct intel_vgpu_shadow_page *p;
671
672 hash_for_each_possible(vgpu->gtt.shadow_page_hash_table,
673 p, node, mfn) {
674 if (p->mfn == mfn)
675 return p;
676 }
677 return NULL;
678}
679
680#define guest_page_to_ppgtt_spt(ptr) \
681 container_of(ptr, struct intel_vgpu_ppgtt_spt, guest_page)
682
683#define shadow_page_to_ppgtt_spt(ptr) \
684 container_of(ptr, struct intel_vgpu_ppgtt_spt, shadow_page)
685
686static void *alloc_spt(gfp_t gfp_mask)
687{
688 struct intel_vgpu_ppgtt_spt *spt;
689
690 spt = kzalloc(sizeof(*spt), gfp_mask);
691 if (!spt)
692 return NULL;
693
694 spt->shadow_page.page = alloc_page(gfp_mask);
695 if (!spt->shadow_page.page) {
696 kfree(spt);
697 return NULL;
698 }
699 return spt;
700}
701
702static void free_spt(struct intel_vgpu_ppgtt_spt *spt)
703{
704 __free_page(spt->shadow_page.page);
705 kfree(spt);
706}
707
708static void ppgtt_free_shadow_page(struct intel_vgpu_ppgtt_spt *spt)
709{
710 trace_spt_free(spt->vgpu->id, spt, spt->shadow_page.type);
711
712 clean_shadow_page(&spt->shadow_page);
713 intel_vgpu_clean_guest_page(spt->vgpu, &spt->guest_page);
714 list_del_init(&spt->post_shadow_list);
715
716 free_spt(spt);
717}
718
719static void ppgtt_free_all_shadow_page(struct intel_vgpu *vgpu)
720{
721 struct hlist_node *n;
722 struct intel_vgpu_shadow_page *sp;
723 int i;
724
725 hash_for_each_safe(vgpu->gtt.shadow_page_hash_table, i, n, sp, node)
726 ppgtt_free_shadow_page(shadow_page_to_ppgtt_spt(sp));
727}
728
729static int ppgtt_handle_guest_write_page_table_bytes(void *gp,
730 u64 pa, void *p_data, int bytes);
731
732static int ppgtt_write_protection_handler(void *gp, u64 pa,
733 void *p_data, int bytes)
734{
735 struct intel_vgpu_guest_page *gpt = (struct intel_vgpu_guest_page *)gp;
736 int ret;
737
738 if (bytes != 4 && bytes != 8)
739 return -EINVAL;
740
741 if (!gpt->writeprotection)
742 return -EINVAL;
743
744 ret = ppgtt_handle_guest_write_page_table_bytes(gp,
745 pa, p_data, bytes);
746 if (ret)
747 return ret;
748 return ret;
749}
750
751static int reclaim_one_mm(struct intel_gvt *gvt);
752
753static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_shadow_page(
754 struct intel_vgpu *vgpu, int type, unsigned long gfn)
755{
756 struct intel_vgpu_ppgtt_spt *spt = NULL;
757 int ret;
758
759retry:
760 spt = alloc_spt(GFP_KERNEL | __GFP_ZERO);
761 if (!spt) {
762 if (reclaim_one_mm(vgpu->gvt))
763 goto retry;
764
765 gvt_err("fail to allocate ppgtt shadow page\n");
766 return ERR_PTR(-ENOMEM);
767 }
768
769 spt->vgpu = vgpu;
770 spt->guest_page_type = type;
771 atomic_set(&spt->refcount, 1);
772 INIT_LIST_HEAD(&spt->post_shadow_list);
773
774 /*
775 * TODO: guest page type may be different with shadow page type,
776 * when we support PSE page in future.
777 */
778 ret = init_shadow_page(vgpu, &spt->shadow_page, type);
779 if (ret) {
780 gvt_err("fail to initialize shadow page for spt\n");
781 goto err;
782 }
783
784 ret = intel_vgpu_init_guest_page(vgpu, &spt->guest_page,
785 gfn, ppgtt_write_protection_handler, NULL);
786 if (ret) {
787 gvt_err("fail to initialize guest page for spt\n");
788 goto err;
789 }
790
791 trace_spt_alloc(vgpu->id, spt, type, spt->shadow_page.mfn, gfn);
792 return spt;
793err:
794 ppgtt_free_shadow_page(spt);
795 return ERR_PTR(ret);
796}
797
798static struct intel_vgpu_ppgtt_spt *ppgtt_find_shadow_page(
799 struct intel_vgpu *vgpu, unsigned long mfn)
800{
801 struct intel_vgpu_shadow_page *p = find_shadow_page(vgpu, mfn);
802
803 if (p)
804 return shadow_page_to_ppgtt_spt(p);
805
806 gvt_err("vgpu%d: fail to find ppgtt shadow page: 0x%lx\n",
807 vgpu->id, mfn);
808 return NULL;
809}
810
811#define pt_entry_size_shift(spt) \
812 ((spt)->vgpu->gvt->device_info.gtt_entry_size_shift)
813
814#define pt_entries(spt) \
815 (GTT_PAGE_SIZE >> pt_entry_size_shift(spt))
816
817#define for_each_present_guest_entry(spt, e, i) \
818 for (i = 0; i < pt_entries(spt); i++) \
819 if (spt->vgpu->gvt->gtt.pte_ops->test_present( \
820 ppgtt_get_guest_entry(spt, e, i)))
821
822#define for_each_present_shadow_entry(spt, e, i) \
823 for (i = 0; i < pt_entries(spt); i++) \
824 if (spt->vgpu->gvt->gtt.pte_ops->test_present( \
825 ppgtt_get_shadow_entry(spt, e, i)))
826
827static void ppgtt_get_shadow_page(struct intel_vgpu_ppgtt_spt *spt)
828{
829 int v = atomic_read(&spt->refcount);
830
831 trace_spt_refcount(spt->vgpu->id, "inc", spt, v, (v + 1));
832
833 atomic_inc(&spt->refcount);
834}
835
836static int ppgtt_invalidate_shadow_page(struct intel_vgpu_ppgtt_spt *spt);
837
838static int ppgtt_invalidate_shadow_page_by_shadow_entry(struct intel_vgpu *vgpu,
839 struct intel_gvt_gtt_entry *e)
840{
841 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
842 struct intel_vgpu_ppgtt_spt *s;
843
844 if (WARN_ON(!gtt_type_is_pt(get_next_pt_type(e->type))))
845 return -EINVAL;
846
847 if (ops->get_pfn(e) == vgpu->gtt.scratch_page_mfn)
848 return 0;
849
850 s = ppgtt_find_shadow_page(vgpu, ops->get_pfn(e));
851 if (!s) {
852 gvt_err("vgpu%d: fail to find shadow page: mfn: 0x%lx\n",
853 vgpu->id, ops->get_pfn(e));
854 return -ENXIO;
855 }
856 return ppgtt_invalidate_shadow_page(s);
857}
858
859static int ppgtt_invalidate_shadow_page(struct intel_vgpu_ppgtt_spt *spt)
860{
861 struct intel_gvt_gtt_entry e;
862 unsigned long index;
863 int ret;
864 int v = atomic_read(&spt->refcount);
865
866 trace_spt_change(spt->vgpu->id, "die", spt,
867 spt->guest_page.gfn, spt->shadow_page.type);
868
869 trace_spt_refcount(spt->vgpu->id, "dec", spt, v, (v - 1));
870
871 if (atomic_dec_return(&spt->refcount) > 0)
872 return 0;
873
874 if (gtt_type_is_pte_pt(spt->shadow_page.type))
875 goto release;
876
877 for_each_present_shadow_entry(spt, &e, index) {
878 if (!gtt_type_is_pt(get_next_pt_type(e.type))) {
879 gvt_err("GVT doesn't support pse bit for now\n");
880 return -EINVAL;
881 }
882 ret = ppgtt_invalidate_shadow_page_by_shadow_entry(
883 spt->vgpu, &e);
884 if (ret)
885 goto fail;
886 }
887release:
888 trace_spt_change(spt->vgpu->id, "release", spt,
889 spt->guest_page.gfn, spt->shadow_page.type);
890 ppgtt_free_shadow_page(spt);
891 return 0;
892fail:
893 gvt_err("vgpu%d: fail: shadow page %p shadow entry 0x%llx type %d\n",
894 spt->vgpu->id, spt, e.val64, e.type);
895 return ret;
896}
897
898static int ppgtt_populate_shadow_page(struct intel_vgpu_ppgtt_spt *spt);
899
900static struct intel_vgpu_ppgtt_spt *ppgtt_populate_shadow_page_by_guest_entry(
901 struct intel_vgpu *vgpu, struct intel_gvt_gtt_entry *we)
902{
903 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
904 struct intel_vgpu_ppgtt_spt *s = NULL;
905 struct intel_vgpu_guest_page *g;
906 int ret;
907
908 if (WARN_ON(!gtt_type_is_pt(get_next_pt_type(we->type)))) {
909 ret = -EINVAL;
910 goto fail;
911 }
912
913 g = intel_vgpu_find_guest_page(vgpu, ops->get_pfn(we));
914 if (g) {
915 s = guest_page_to_ppgtt_spt(g);
916 ppgtt_get_shadow_page(s);
917 } else {
918 int type = get_next_pt_type(we->type);
919
920 s = ppgtt_alloc_shadow_page(vgpu, type, ops->get_pfn(we));
921 if (IS_ERR(s)) {
922 ret = PTR_ERR(s);
923 goto fail;
924 }
925
926 ret = intel_gvt_hypervisor_set_wp_page(vgpu, &s->guest_page);
927 if (ret)
928 goto fail;
929
930 ret = ppgtt_populate_shadow_page(s);
931 if (ret)
932 goto fail;
933
934 trace_spt_change(vgpu->id, "new", s, s->guest_page.gfn,
935 s->shadow_page.type);
936 }
937 return s;
938fail:
939 gvt_err("vgpu%d: fail: shadow page %p guest entry 0x%llx type %d\n",
940 vgpu->id, s, we->val64, we->type);
941 return ERR_PTR(ret);
942}
943
944static inline void ppgtt_generate_shadow_entry(struct intel_gvt_gtt_entry *se,
945 struct intel_vgpu_ppgtt_spt *s, struct intel_gvt_gtt_entry *ge)
946{
947 struct intel_gvt_gtt_pte_ops *ops = s->vgpu->gvt->gtt.pte_ops;
948
949 se->type = ge->type;
950 se->val64 = ge->val64;
951
952 ops->set_pfn(se, s->shadow_page.mfn);
953}
954
955static int ppgtt_populate_shadow_page(struct intel_vgpu_ppgtt_spt *spt)
956{
957 struct intel_vgpu *vgpu = spt->vgpu;
958 struct intel_vgpu_ppgtt_spt *s;
959 struct intel_gvt_gtt_entry se, ge;
960 unsigned long i;
961 int ret;
962
963 trace_spt_change(spt->vgpu->id, "born", spt,
964 spt->guest_page.gfn, spt->shadow_page.type);
965
966 if (gtt_type_is_pte_pt(spt->shadow_page.type)) {
967 for_each_present_guest_entry(spt, &ge, i) {
968 ret = gtt_entry_p2m(vgpu, &ge, &se);
969 if (ret)
970 goto fail;
971 ppgtt_set_shadow_entry(spt, &se, i);
972 }
973 return 0;
974 }
975
976 for_each_present_guest_entry(spt, &ge, i) {
977 if (!gtt_type_is_pt(get_next_pt_type(ge.type))) {
978 gvt_err("GVT doesn't support pse bit now\n");
979 ret = -EINVAL;
980 goto fail;
981 }
982
983 s = ppgtt_populate_shadow_page_by_guest_entry(vgpu, &ge);
984 if (IS_ERR(s)) {
985 ret = PTR_ERR(s);
986 goto fail;
987 }
988 ppgtt_get_shadow_entry(spt, &se, i);
989 ppgtt_generate_shadow_entry(&se, s, &ge);
990 ppgtt_set_shadow_entry(spt, &se, i);
991 }
992 return 0;
993fail:
994 gvt_err("vgpu%d: fail: shadow page %p guest entry 0x%llx type %d\n",
995 vgpu->id, spt, ge.val64, ge.type);
996 return ret;
997}
998
999static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_guest_page *gpt,
1000 struct intel_gvt_gtt_entry *we, unsigned long index)
1001{
1002 struct intel_vgpu_ppgtt_spt *spt = guest_page_to_ppgtt_spt(gpt);
1003 struct intel_vgpu_shadow_page *sp = &spt->shadow_page;
1004 struct intel_vgpu *vgpu = spt->vgpu;
1005 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1006 struct intel_gvt_gtt_entry e;
1007 int ret;
1008
1009 trace_gpt_change(spt->vgpu->id, "remove", spt, sp->type,
1010 we->val64, index);
1011
1012 ppgtt_get_shadow_entry(spt, &e, index);
1013 if (!ops->test_present(&e))
1014 return 0;
1015
1016 if (ops->get_pfn(&e) == vgpu->gtt.scratch_page_mfn)
1017 return 0;
1018
1019 if (gtt_type_is_pt(get_next_pt_type(we->type))) {
1020 struct intel_vgpu_guest_page *g =
1021 intel_vgpu_find_guest_page(vgpu, ops->get_pfn(we));
1022 if (!g) {
1023 gvt_err("fail to find guest page\n");
1024 ret = -ENXIO;
1025 goto fail;
1026 }
1027 ret = ppgtt_invalidate_shadow_page(guest_page_to_ppgtt_spt(g));
1028 if (ret)
1029 goto fail;
1030 }
1031 ops->set_pfn(&e, vgpu->gtt.scratch_page_mfn);
1032 ppgtt_set_shadow_entry(spt, &e, index);
1033 return 0;
1034fail:
1035 gvt_err("vgpu%d: fail: shadow page %p guest entry 0x%llx type %d\n",
1036 vgpu->id, spt, we->val64, we->type);
1037 return ret;
1038}
1039
1040static int ppgtt_handle_guest_entry_add(struct intel_vgpu_guest_page *gpt,
1041 struct intel_gvt_gtt_entry *we, unsigned long index)
1042{
1043 struct intel_vgpu_ppgtt_spt *spt = guest_page_to_ppgtt_spt(gpt);
1044 struct intel_vgpu_shadow_page *sp = &spt->shadow_page;
1045 struct intel_vgpu *vgpu = spt->vgpu;
1046 struct intel_gvt_gtt_entry m;
1047 struct intel_vgpu_ppgtt_spt *s;
1048 int ret;
1049
1050 trace_gpt_change(spt->vgpu->id, "add", spt, sp->type,
1051 we->val64, index);
1052
1053 if (gtt_type_is_pt(get_next_pt_type(we->type))) {
1054 s = ppgtt_populate_shadow_page_by_guest_entry(vgpu, we);
1055 if (IS_ERR(s)) {
1056 ret = PTR_ERR(s);
1057 goto fail;
1058 }
1059 ppgtt_get_shadow_entry(spt, &m, index);
1060 ppgtt_generate_shadow_entry(&m, s, we);
1061 ppgtt_set_shadow_entry(spt, &m, index);
1062 } else {
1063 ret = gtt_entry_p2m(vgpu, we, &m);
1064 if (ret)
1065 goto fail;
1066 ppgtt_set_shadow_entry(spt, &m, index);
1067 }
1068 return 0;
1069fail:
1070 gvt_err("vgpu%d: fail: spt %p guest entry 0x%llx type %d\n", vgpu->id,
1071 spt, we->val64, we->type);
1072 return ret;
1073}
1074
1075static int sync_oos_page(struct intel_vgpu *vgpu,
1076 struct intel_vgpu_oos_page *oos_page)
1077{
1078 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
1079 struct intel_gvt *gvt = vgpu->gvt;
1080 struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
1081 struct intel_vgpu_ppgtt_spt *spt =
1082 guest_page_to_ppgtt_spt(oos_page->guest_page);
1083 struct intel_gvt_gtt_entry old, new, m;
1084 int index;
1085 int ret;
1086
1087 trace_oos_change(vgpu->id, "sync", oos_page->id,
1088 oos_page->guest_page, spt->guest_page_type);
1089
1090 old.type = new.type = get_entry_type(spt->guest_page_type);
1091 old.val64 = new.val64 = 0;
1092
1093 for (index = 0; index < (GTT_PAGE_SIZE >> info->gtt_entry_size_shift);
1094 index++) {
1095 ops->get_entry(oos_page->mem, &old, index, false, 0, vgpu);
1096 ops->get_entry(NULL, &new, index, true,
1097 oos_page->guest_page->gfn << PAGE_SHIFT, vgpu);
1098
1099 if (old.val64 == new.val64
1100 && !test_and_clear_bit(index, spt->post_shadow_bitmap))
1101 continue;
1102
1103 trace_oos_sync(vgpu->id, oos_page->id,
1104 oos_page->guest_page, spt->guest_page_type,
1105 new.val64, index);
1106
1107 ret = gtt_entry_p2m(vgpu, &new, &m);
1108 if (ret)
1109 return ret;
1110
1111 ops->set_entry(oos_page->mem, &new, index, false, 0, vgpu);
1112 ppgtt_set_shadow_entry(spt, &m, index);
1113 }
1114
1115 oos_page->guest_page->write_cnt = 0;
1116 list_del_init(&spt->post_shadow_list);
1117 return 0;
1118}
1119
1120static int detach_oos_page(struct intel_vgpu *vgpu,
1121 struct intel_vgpu_oos_page *oos_page)
1122{
1123 struct intel_gvt *gvt = vgpu->gvt;
1124 struct intel_vgpu_ppgtt_spt *spt =
1125 guest_page_to_ppgtt_spt(oos_page->guest_page);
1126
1127 trace_oos_change(vgpu->id, "detach", oos_page->id,
1128 oos_page->guest_page, spt->guest_page_type);
1129
1130 oos_page->guest_page->write_cnt = 0;
1131 oos_page->guest_page->oos_page = NULL;
1132 oos_page->guest_page = NULL;
1133
1134 list_del_init(&oos_page->vm_list);
1135 list_move_tail(&oos_page->list, &gvt->gtt.oos_page_free_list_head);
1136
1137 return 0;
1138}
1139
1140static int attach_oos_page(struct intel_vgpu *vgpu,
1141 struct intel_vgpu_oos_page *oos_page,
1142 struct intel_vgpu_guest_page *gpt)
1143{
1144 struct intel_gvt *gvt = vgpu->gvt;
1145 int ret;
1146
1147 ret = intel_gvt_hypervisor_read_gpa(vgpu, gpt->gfn << GTT_PAGE_SHIFT,
1148 oos_page->mem, GTT_PAGE_SIZE);
1149 if (ret)
1150 return ret;
1151
1152 oos_page->guest_page = gpt;
1153 gpt->oos_page = oos_page;
1154
1155 list_move_tail(&oos_page->list, &gvt->gtt.oos_page_use_list_head);
1156
1157 trace_oos_change(vgpu->id, "attach", gpt->oos_page->id,
1158 gpt, guest_page_to_ppgtt_spt(gpt)->guest_page_type);
1159 return 0;
1160}
1161
1162static int ppgtt_set_guest_page_sync(struct intel_vgpu *vgpu,
1163 struct intel_vgpu_guest_page *gpt)
1164{
1165 int ret;
1166
1167 ret = intel_gvt_hypervisor_set_wp_page(vgpu, gpt);
1168 if (ret)
1169 return ret;
1170
1171 trace_oos_change(vgpu->id, "set page sync", gpt->oos_page->id,
1172 gpt, guest_page_to_ppgtt_spt(gpt)->guest_page_type);
1173
1174 list_del_init(&gpt->oos_page->vm_list);
1175 return sync_oos_page(vgpu, gpt->oos_page);
1176}
1177
1178static int ppgtt_allocate_oos_page(struct intel_vgpu *vgpu,
1179 struct intel_vgpu_guest_page *gpt)
1180{
1181 struct intel_gvt *gvt = vgpu->gvt;
1182 struct intel_gvt_gtt *gtt = &gvt->gtt;
1183 struct intel_vgpu_oos_page *oos_page = gpt->oos_page;
1184 int ret;
1185
1186 WARN(oos_page, "shadow PPGTT page has already has a oos page\n");
1187
1188 if (list_empty(&gtt->oos_page_free_list_head)) {
1189 oos_page = container_of(gtt->oos_page_use_list_head.next,
1190 struct intel_vgpu_oos_page, list);
1191 ret = ppgtt_set_guest_page_sync(vgpu, oos_page->guest_page);
1192 if (ret)
1193 return ret;
1194 ret = detach_oos_page(vgpu, oos_page);
1195 if (ret)
1196 return ret;
1197 } else
1198 oos_page = container_of(gtt->oos_page_free_list_head.next,
1199 struct intel_vgpu_oos_page, list);
1200 return attach_oos_page(vgpu, oos_page, gpt);
1201}
1202
1203static int ppgtt_set_guest_page_oos(struct intel_vgpu *vgpu,
1204 struct intel_vgpu_guest_page *gpt)
1205{
1206 struct intel_vgpu_oos_page *oos_page = gpt->oos_page;
1207
1208 if (WARN(!oos_page, "shadow PPGTT page should have a oos page\n"))
1209 return -EINVAL;
1210
1211 trace_oos_change(vgpu->id, "set page out of sync", gpt->oos_page->id,
1212 gpt, guest_page_to_ppgtt_spt(gpt)->guest_page_type);
1213
1214 list_add_tail(&oos_page->vm_list, &vgpu->gtt.oos_page_list_head);
1215 return intel_gvt_hypervisor_unset_wp_page(vgpu, gpt);
1216}
1217
1218/**
1219 * intel_vgpu_sync_oos_pages - sync all the out-of-synced shadow for vGPU
1220 * @vgpu: a vGPU
1221 *
1222 * This function is called before submitting a guest workload to host,
1223 * to sync all the out-of-synced shadow for vGPU
1224 *
1225 * Returns:
1226 * Zero on success, negative error code if failed.
1227 */
1228int intel_vgpu_sync_oos_pages(struct intel_vgpu *vgpu)
1229{
1230 struct list_head *pos, *n;
1231 struct intel_vgpu_oos_page *oos_page;
1232 int ret;
1233
1234 if (!enable_out_of_sync)
1235 return 0;
1236
1237 list_for_each_safe(pos, n, &vgpu->gtt.oos_page_list_head) {
1238 oos_page = container_of(pos,
1239 struct intel_vgpu_oos_page, vm_list);
1240 ret = ppgtt_set_guest_page_sync(vgpu, oos_page->guest_page);
1241 if (ret)
1242 return ret;
1243 }
1244 return 0;
1245}
1246
1247/*
1248 * The heart of PPGTT shadow page table.
1249 */
1250static int ppgtt_handle_guest_write_page_table(
1251 struct intel_vgpu_guest_page *gpt,
1252 struct intel_gvt_gtt_entry *we, unsigned long index)
1253{
1254 struct intel_vgpu_ppgtt_spt *spt = guest_page_to_ppgtt_spt(gpt);
1255 struct intel_vgpu *vgpu = spt->vgpu;
1256 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1257 struct intel_gvt_gtt_entry ge;
1258
1259 int old_present, new_present;
1260 int ret;
1261
1262 ppgtt_get_guest_entry(spt, &ge, index);
1263
1264 old_present = ops->test_present(&ge);
1265 new_present = ops->test_present(we);
1266
1267 ppgtt_set_guest_entry(spt, we, index);
1268
1269 if (old_present) {
1270 ret = ppgtt_handle_guest_entry_removal(gpt, &ge, index);
1271 if (ret)
1272 goto fail;
1273 }
1274 if (new_present) {
1275 ret = ppgtt_handle_guest_entry_add(gpt, we, index);
1276 if (ret)
1277 goto fail;
1278 }
1279 return 0;
1280fail:
1281 gvt_err("vgpu%d: fail: shadow page %p guest entry 0x%llx type %d.\n",
1282 vgpu->id, spt, we->val64, we->type);
1283 return ret;
1284}
1285
1286static inline bool can_do_out_of_sync(struct intel_vgpu_guest_page *gpt)
1287{
1288 return enable_out_of_sync
1289 && gtt_type_is_pte_pt(
1290 guest_page_to_ppgtt_spt(gpt)->guest_page_type)
1291 && gpt->write_cnt >= 2;
1292}
1293
1294static void ppgtt_set_post_shadow(struct intel_vgpu_ppgtt_spt *spt,
1295 unsigned long index)
1296{
1297 set_bit(index, spt->post_shadow_bitmap);
1298 if (!list_empty(&spt->post_shadow_list))
1299 return;
1300
1301 list_add_tail(&spt->post_shadow_list,
1302 &spt->vgpu->gtt.post_shadow_list_head);
1303}
1304
1305/**
1306 * intel_vgpu_flush_post_shadow - flush the post shadow transactions
1307 * @vgpu: a vGPU
1308 *
1309 * This function is called before submitting a guest workload to host,
1310 * to flush all the post shadows for a vGPU.
1311 *
1312 * Returns:
1313 * Zero on success, negative error code if failed.
1314 */
1315int intel_vgpu_flush_post_shadow(struct intel_vgpu *vgpu)
1316{
1317 struct list_head *pos, *n;
1318 struct intel_vgpu_ppgtt_spt *spt;
1319 struct intel_gvt_gtt_entry ge, e;
1320 unsigned long index;
1321 int ret;
1322
1323 list_for_each_safe(pos, n, &vgpu->gtt.post_shadow_list_head) {
1324 spt = container_of(pos, struct intel_vgpu_ppgtt_spt,
1325 post_shadow_list);
1326
1327 for_each_set_bit(index, spt->post_shadow_bitmap,
1328 GTT_ENTRY_NUM_IN_ONE_PAGE) {
1329 ppgtt_get_guest_entry(spt, &ge, index);
1330 e = ge;
1331 e.val64 = 0;
1332 ppgtt_set_guest_entry(spt, &e, index);
1333
1334 ret = ppgtt_handle_guest_write_page_table(
1335 &spt->guest_page, &ge, index);
1336 if (ret)
1337 return ret;
1338 clear_bit(index, spt->post_shadow_bitmap);
1339 }
1340 list_del_init(&spt->post_shadow_list);
1341 }
1342 return 0;
1343}
1344
1345static int ppgtt_handle_guest_write_page_table_bytes(void *gp,
1346 u64 pa, void *p_data, int bytes)
1347{
1348 struct intel_vgpu_guest_page *gpt = (struct intel_vgpu_guest_page *)gp;
1349 struct intel_vgpu_ppgtt_spt *spt = guest_page_to_ppgtt_spt(gpt);
1350 struct intel_vgpu *vgpu = spt->vgpu;
1351 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1352 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
1353 struct intel_gvt_gtt_entry we;
1354 unsigned long index;
1355 int ret;
1356
1357 index = (pa & (PAGE_SIZE - 1)) >> info->gtt_entry_size_shift;
1358
1359 ppgtt_get_guest_entry(spt, &we, index);
1360 memcpy((void *)&we.val64 + (pa & (info->gtt_entry_size - 1)),
1361 p_data, bytes);
1362
1363 ops->test_pse(&we);
1364
1365 if (bytes == info->gtt_entry_size) {
1366 ret = ppgtt_handle_guest_write_page_table(gpt, &we, index);
1367 if (ret)
1368 return ret;
1369 } else {
1370 struct intel_gvt_gtt_entry ge;
1371
1372 ppgtt_get_guest_entry(spt, &ge, index);
1373
1374 if (!test_bit(index, spt->post_shadow_bitmap)) {
1375 ret = ppgtt_handle_guest_entry_removal(gpt,
1376 &ge, index);
1377 if (ret)
1378 return ret;
1379 }
1380
1381 ppgtt_set_post_shadow(spt, index);
1382 ppgtt_set_guest_entry(spt, &we, index);
1383 }
1384
1385 if (!enable_out_of_sync)
1386 return 0;
1387
1388 gpt->write_cnt++;
1389
1390 if (gpt->oos_page)
1391 ops->set_entry(gpt->oos_page->mem, &we, index,
1392 false, 0, vgpu);
1393
1394 if (can_do_out_of_sync(gpt)) {
1395 if (!gpt->oos_page)
1396 ppgtt_allocate_oos_page(vgpu, gpt);
1397
1398 ret = ppgtt_set_guest_page_oos(vgpu, gpt);
1399 if (ret < 0)
1400 return ret;
1401 }
1402 return 0;
1403}
1404
1405/*
1406 * mm page table allocation policy for bdw+
1407 * - for ggtt, only virtual page table will be allocated.
1408 * - for ppgtt, dedicated virtual/shadow page table will be allocated.
1409 */
1410static int gen8_mm_alloc_page_table(struct intel_vgpu_mm *mm)
1411{
1412 struct intel_vgpu *vgpu = mm->vgpu;
1413 struct intel_gvt *gvt = vgpu->gvt;
1414 const struct intel_gvt_device_info *info = &gvt->device_info;
1415 void *mem;
1416
1417 if (mm->type == INTEL_GVT_MM_PPGTT) {
1418 mm->page_table_entry_cnt = 4;
1419 mm->page_table_entry_size = mm->page_table_entry_cnt *
1420 info->gtt_entry_size;
1421 mem = kzalloc(mm->has_shadow_page_table ?
1422 mm->page_table_entry_size * 2
1423 : mm->page_table_entry_size,
1424 GFP_ATOMIC);
1425 if (!mem)
1426 return -ENOMEM;
1427 mm->virtual_page_table = mem;
1428 if (!mm->has_shadow_page_table)
1429 return 0;
1430 mm->shadow_page_table = mem + mm->page_table_entry_size;
1431 } else if (mm->type == INTEL_GVT_MM_GGTT) {
1432 mm->page_table_entry_cnt =
1433 (gvt_ggtt_gm_sz(gvt) >> GTT_PAGE_SHIFT);
1434 mm->page_table_entry_size = mm->page_table_entry_cnt *
1435 info->gtt_entry_size;
1436 mem = vzalloc(mm->page_table_entry_size);
1437 if (!mem)
1438 return -ENOMEM;
1439 mm->virtual_page_table = mem;
1440 }
1441 return 0;
1442}
1443
1444static void gen8_mm_free_page_table(struct intel_vgpu_mm *mm)
1445{
1446 if (mm->type == INTEL_GVT_MM_PPGTT) {
1447 kfree(mm->virtual_page_table);
1448 } else if (mm->type == INTEL_GVT_MM_GGTT) {
1449 if (mm->virtual_page_table)
1450 vfree(mm->virtual_page_table);
1451 }
1452 mm->virtual_page_table = mm->shadow_page_table = NULL;
1453}
1454
1455static void invalidate_mm(struct intel_vgpu_mm *mm)
1456{
1457 struct intel_vgpu *vgpu = mm->vgpu;
1458 struct intel_gvt *gvt = vgpu->gvt;
1459 struct intel_gvt_gtt *gtt = &gvt->gtt;
1460 struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops;
1461 struct intel_gvt_gtt_entry se;
1462 int i;
1463
1464 if (WARN_ON(!mm->has_shadow_page_table || !mm->shadowed))
1465 return;
1466
1467 for (i = 0; i < mm->page_table_entry_cnt; i++) {
1468 ppgtt_get_shadow_root_entry(mm, &se, i);
1469 if (!ops->test_present(&se))
1470 continue;
1471 ppgtt_invalidate_shadow_page_by_shadow_entry(
1472 vgpu, &se);
1473 se.val64 = 0;
1474 ppgtt_set_shadow_root_entry(mm, &se, i);
1475
1476 trace_gpt_change(vgpu->id, "destroy root pointer",
1477 NULL, se.type, se.val64, i);
1478 }
1479 mm->shadowed = false;
1480}
1481
1482/**
1483 * intel_vgpu_destroy_mm - destroy a mm object
1484 * @mm: a kref object
1485 *
1486 * This function is used to destroy a mm object for vGPU
1487 *
1488 */
1489void intel_vgpu_destroy_mm(struct kref *mm_ref)
1490{
1491 struct intel_vgpu_mm *mm = container_of(mm_ref, typeof(*mm), ref);
1492 struct intel_vgpu *vgpu = mm->vgpu;
1493 struct intel_gvt *gvt = vgpu->gvt;
1494 struct intel_gvt_gtt *gtt = &gvt->gtt;
1495
1496 if (!mm->initialized)
1497 goto out;
1498
1499 list_del(&mm->list);
1500 list_del(&mm->lru_list);
1501
1502 if (mm->has_shadow_page_table)
1503 invalidate_mm(mm);
1504
1505 gtt->mm_free_page_table(mm);
1506out:
1507 kfree(mm);
1508}
1509
1510static int shadow_mm(struct intel_vgpu_mm *mm)
1511{
1512 struct intel_vgpu *vgpu = mm->vgpu;
1513 struct intel_gvt *gvt = vgpu->gvt;
1514 struct intel_gvt_gtt *gtt = &gvt->gtt;
1515 struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops;
1516 struct intel_vgpu_ppgtt_spt *spt;
1517 struct intel_gvt_gtt_entry ge, se;
1518 int i;
1519 int ret;
1520
1521 if (WARN_ON(!mm->has_shadow_page_table || mm->shadowed))
1522 return 0;
1523
1524 mm->shadowed = true;
1525
1526 for (i = 0; i < mm->page_table_entry_cnt; i++) {
1527 ppgtt_get_guest_root_entry(mm, &ge, i);
1528 if (!ops->test_present(&ge))
1529 continue;
1530
1531 trace_gpt_change(vgpu->id, __func__, NULL,
1532 ge.type, ge.val64, i);
1533
1534 spt = ppgtt_populate_shadow_page_by_guest_entry(vgpu, &ge);
1535 if (IS_ERR(spt)) {
1536 gvt_err("fail to populate guest root pointer\n");
1537 ret = PTR_ERR(spt);
1538 goto fail;
1539 }
1540 ppgtt_generate_shadow_entry(&se, spt, &ge);
1541 ppgtt_set_shadow_root_entry(mm, &se, i);
1542
1543 trace_gpt_change(vgpu->id, "populate root pointer",
1544 NULL, se.type, se.val64, i);
1545 }
1546 return 0;
1547fail:
1548 invalidate_mm(mm);
1549 return ret;
1550}
1551
1552/**
1553 * intel_vgpu_create_mm - create a mm object for a vGPU
1554 * @vgpu: a vGPU
1555 * @mm_type: mm object type, should be PPGTT or GGTT
1556 * @virtual_page_table: page table root pointers. Could be NULL if user wants
1557 * to populate shadow later.
1558 * @page_table_level: describe the page table level of the mm object
1559 * @pde_base_index: pde root pointer base in GGTT MMIO.
1560 *
1561 * This function is used to create a mm object for a vGPU.
1562 *
1563 * Returns:
1564 * Zero on success, negative error code in pointer if failed.
1565 */
1566struct intel_vgpu_mm *intel_vgpu_create_mm(struct intel_vgpu *vgpu,
1567 int mm_type, void *virtual_page_table, int page_table_level,
1568 u32 pde_base_index)
1569{
1570 struct intel_gvt *gvt = vgpu->gvt;
1571 struct intel_gvt_gtt *gtt = &gvt->gtt;
1572 struct intel_vgpu_mm *mm;
1573 int ret;
1574
1575 mm = kzalloc(sizeof(*mm), GFP_ATOMIC);
1576 if (!mm) {
1577 ret = -ENOMEM;
1578 goto fail;
1579 }
1580
1581 mm->type = mm_type;
1582
1583 if (page_table_level == 1)
1584 mm->page_table_entry_type = GTT_TYPE_GGTT_PTE;
1585 else if (page_table_level == 3)
1586 mm->page_table_entry_type = GTT_TYPE_PPGTT_ROOT_L3_ENTRY;
1587 else if (page_table_level == 4)
1588 mm->page_table_entry_type = GTT_TYPE_PPGTT_ROOT_L4_ENTRY;
1589 else {
1590 WARN_ON(1);
1591 ret = -EINVAL;
1592 goto fail;
1593 }
1594
1595 mm->page_table_level = page_table_level;
1596 mm->pde_base_index = pde_base_index;
1597
1598 mm->vgpu = vgpu;
1599 mm->has_shadow_page_table = !!(mm_type == INTEL_GVT_MM_PPGTT);
1600
1601 kref_init(&mm->ref);
1602 atomic_set(&mm->pincount, 0);
1603 INIT_LIST_HEAD(&mm->list);
1604 INIT_LIST_HEAD(&mm->lru_list);
1605 list_add_tail(&mm->list, &vgpu->gtt.mm_list_head);
1606
1607 ret = gtt->mm_alloc_page_table(mm);
1608 if (ret) {
1609 gvt_err("fail to allocate page table for mm\n");
1610 goto fail;
1611 }
1612
1613 mm->initialized = true;
1614
1615 if (virtual_page_table)
1616 memcpy(mm->virtual_page_table, virtual_page_table,
1617 mm->page_table_entry_size);
1618
1619 if (mm->has_shadow_page_table) {
1620 ret = shadow_mm(mm);
1621 if (ret)
1622 goto fail;
1623 list_add_tail(&mm->lru_list, &gvt->gtt.mm_lru_list_head);
1624 }
1625 return mm;
1626fail:
1627 gvt_err("fail to create mm\n");
1628 if (mm)
1629 intel_gvt_mm_unreference(mm);
1630 return ERR_PTR(ret);
1631}
1632
1633/**
1634 * intel_vgpu_unpin_mm - decrease the pin count of a vGPU mm object
1635 * @mm: a vGPU mm object
1636 *
1637 * This function is called when user doesn't want to use a vGPU mm object
1638 */
1639void intel_vgpu_unpin_mm(struct intel_vgpu_mm *mm)
1640{
1641 if (WARN_ON(mm->type != INTEL_GVT_MM_PPGTT))
1642 return;
1643
1644 atomic_dec(&mm->pincount);
1645}
1646
1647/**
1648 * intel_vgpu_pin_mm - increase the pin count of a vGPU mm object
1649 * @vgpu: a vGPU
1650 *
1651 * This function is called when user wants to use a vGPU mm object. If this
1652 * mm object hasn't been shadowed yet, the shadow will be populated at this
1653 * time.
1654 *
1655 * Returns:
1656 * Zero on success, negative error code if failed.
1657 */
1658int intel_vgpu_pin_mm(struct intel_vgpu_mm *mm)
1659{
1660 int ret;
1661
1662 if (WARN_ON(mm->type != INTEL_GVT_MM_PPGTT))
1663 return 0;
1664
1665 atomic_inc(&mm->pincount);
1666
1667 if (!mm->shadowed) {
1668 ret = shadow_mm(mm);
1669 if (ret)
1670 return ret;
1671 }
1672
1673 list_del_init(&mm->lru_list);
1674 list_add_tail(&mm->lru_list, &mm->vgpu->gvt->gtt.mm_lru_list_head);
1675 return 0;
1676}
1677
1678static int reclaim_one_mm(struct intel_gvt *gvt)
1679{
1680 struct intel_vgpu_mm *mm;
1681 struct list_head *pos, *n;
1682
1683 list_for_each_safe(pos, n, &gvt->gtt.mm_lru_list_head) {
1684 mm = container_of(pos, struct intel_vgpu_mm, lru_list);
1685
1686 if (mm->type != INTEL_GVT_MM_PPGTT)
1687 continue;
1688 if (atomic_read(&mm->pincount))
1689 continue;
1690
1691 list_del_init(&mm->lru_list);
1692 invalidate_mm(mm);
1693 return 1;
1694 }
1695 return 0;
1696}
1697
1698/*
1699 * GMA translation APIs.
1700 */
1701static inline int ppgtt_get_next_level_entry(struct intel_vgpu_mm *mm,
1702 struct intel_gvt_gtt_entry *e, unsigned long index, bool guest)
1703{
1704 struct intel_vgpu *vgpu = mm->vgpu;
1705 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1706 struct intel_vgpu_ppgtt_spt *s;
1707
1708 if (WARN_ON(!mm->has_shadow_page_table))
1709 return -EINVAL;
1710
1711 s = ppgtt_find_shadow_page(vgpu, ops->get_pfn(e));
1712 if (!s)
1713 return -ENXIO;
1714
1715 if (!guest)
1716 ppgtt_get_shadow_entry(s, e, index);
1717 else
1718 ppgtt_get_guest_entry(s, e, index);
1719 return 0;
1720}
1721
1722/**
1723 * intel_vgpu_gma_to_gpa - translate a gma to GPA
1724 * @mm: mm object. could be a PPGTT or GGTT mm object
1725 * @gma: graphics memory address in this mm object
1726 *
1727 * This function is used to translate a graphics memory address in specific
1728 * graphics memory space to guest physical address.
1729 *
1730 * Returns:
1731 * Guest physical address on success, INTEL_GVT_INVALID_ADDR if failed.
1732 */
1733unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma)
1734{
1735 struct intel_vgpu *vgpu = mm->vgpu;
1736 struct intel_gvt *gvt = vgpu->gvt;
1737 struct intel_gvt_gtt_pte_ops *pte_ops = gvt->gtt.pte_ops;
1738 struct intel_gvt_gtt_gma_ops *gma_ops = gvt->gtt.gma_ops;
1739 unsigned long gpa = INTEL_GVT_INVALID_ADDR;
1740 unsigned long gma_index[4];
1741 struct intel_gvt_gtt_entry e;
1742 int i, index;
1743 int ret;
1744
1745 if (mm->type != INTEL_GVT_MM_GGTT && mm->type != INTEL_GVT_MM_PPGTT)
1746 return INTEL_GVT_INVALID_ADDR;
1747
1748 if (mm->type == INTEL_GVT_MM_GGTT) {
1749 if (!vgpu_gmadr_is_valid(vgpu, gma))
1750 goto err;
1751
1752 ggtt_get_guest_entry(mm, &e,
1753 gma_ops->gma_to_ggtt_pte_index(gma));
1754 gpa = (pte_ops->get_pfn(&e) << GTT_PAGE_SHIFT)
1755 + (gma & ~GTT_PAGE_MASK);
1756
1757 trace_gma_translate(vgpu->id, "ggtt", 0, 0, gma, gpa);
1758 return gpa;
1759 }
1760
1761 switch (mm->page_table_level) {
1762 case 4:
1763 ppgtt_get_shadow_root_entry(mm, &e, 0);
1764 gma_index[0] = gma_ops->gma_to_pml4_index(gma);
1765 gma_index[1] = gma_ops->gma_to_l4_pdp_index(gma);
1766 gma_index[2] = gma_ops->gma_to_pde_index(gma);
1767 gma_index[3] = gma_ops->gma_to_pte_index(gma);
1768 index = 4;
1769 break;
1770 case 3:
1771 ppgtt_get_shadow_root_entry(mm, &e,
1772 gma_ops->gma_to_l3_pdp_index(gma));
1773 gma_index[0] = gma_ops->gma_to_pde_index(gma);
1774 gma_index[1] = gma_ops->gma_to_pte_index(gma);
1775 index = 2;
1776 break;
1777 case 2:
1778 ppgtt_get_shadow_root_entry(mm, &e,
1779 gma_ops->gma_to_pde_index(gma));
1780 gma_index[0] = gma_ops->gma_to_pte_index(gma);
1781 index = 1;
1782 break;
1783 default:
1784 WARN_ON(1);
1785 goto err;
1786 }
1787
1788 /* walk into the shadow page table and get gpa from guest entry */
1789 for (i = 0; i < index; i++) {
1790 ret = ppgtt_get_next_level_entry(mm, &e, gma_index[i],
1791 (i == index - 1));
1792 if (ret)
1793 goto err;
1794 }
1795
1796 gpa = (pte_ops->get_pfn(&e) << GTT_PAGE_SHIFT)
1797 + (gma & ~GTT_PAGE_MASK);
1798
1799 trace_gma_translate(vgpu->id, "ppgtt", 0,
1800 mm->page_table_level, gma, gpa);
1801 return gpa;
1802err:
1803 gvt_err("invalid mm type: %d gma %lx\n", mm->type, gma);
1804 return INTEL_GVT_INVALID_ADDR;
1805}
1806
1807static int emulate_gtt_mmio_read(struct intel_vgpu *vgpu,
1808 unsigned int off, void *p_data, unsigned int bytes)
1809{
1810 struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm;
1811 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
1812 unsigned long index = off >> info->gtt_entry_size_shift;
1813 struct intel_gvt_gtt_entry e;
1814
1815 if (bytes != 4 && bytes != 8)
1816 return -EINVAL;
1817
1818 ggtt_get_guest_entry(ggtt_mm, &e, index);
1819 memcpy(p_data, (void *)&e.val64 + (off & (info->gtt_entry_size - 1)),
1820 bytes);
1821 return 0;
1822}
1823
1824/**
1825 * intel_vgpu_emulate_gtt_mmio_read - emulate GTT MMIO register read
1826 * @vgpu: a vGPU
1827 * @off: register offset
1828 * @p_data: data will be returned to guest
1829 * @bytes: data length
1830 *
1831 * This function is used to emulate the GTT MMIO register read
1832 *
1833 * Returns:
1834 * Zero on success, error code if failed.
1835 */
1836int intel_vgpu_emulate_gtt_mmio_read(struct intel_vgpu *vgpu, unsigned int off,
1837 void *p_data, unsigned int bytes)
1838{
1839 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
1840 int ret;
1841
1842 if (bytes != 4 && bytes != 8)
1843 return -EINVAL;
1844
1845 off -= info->gtt_start_offset;
1846 ret = emulate_gtt_mmio_read(vgpu, off, p_data, bytes);
1847 return ret;
1848}
1849
1850static int emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
1851 void *p_data, unsigned int bytes)
1852{
1853 struct intel_gvt *gvt = vgpu->gvt;
1854 const struct intel_gvt_device_info *info = &gvt->device_info;
1855 struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm;
1856 struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
1857 unsigned long g_gtt_index = off >> info->gtt_entry_size_shift;
1858 unsigned long gma;
1859 struct intel_gvt_gtt_entry e, m;
1860 int ret;
1861
1862 if (bytes != 4 && bytes != 8)
1863 return -EINVAL;
1864
1865 gma = g_gtt_index << GTT_PAGE_SHIFT;
1866
1867 /* the VM may configure the whole GM space when ballooning is used */
1868 if (WARN_ONCE(!vgpu_gmadr_is_valid(vgpu, gma),
1869 "vgpu%d: found oob ggtt write, offset %x\n",
1870 vgpu->id, off)) {
1871 return 0;
1872 }
1873
1874 ggtt_get_guest_entry(ggtt_mm, &e, g_gtt_index);
1875
1876 memcpy((void *)&e.val64 + (off & (info->gtt_entry_size - 1)), p_data,
1877 bytes);
1878
1879 if (ops->test_present(&e)) {
1880 ret = gtt_entry_p2m(vgpu, &e, &m);
1881 if (ret) {
1882 gvt_err("vgpu%d: fail to translate guest gtt entry\n",
1883 vgpu->id);
1884 return ret;
1885 }
1886 } else {
1887 m = e;
1888 m.val64 = 0;
1889 }
1890
1891 ggtt_set_shadow_entry(ggtt_mm, &m, g_gtt_index);
1892 ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index);
1893 return 0;
1894}
1895
1896/*
1897 * intel_vgpu_emulate_gtt_mmio_write - emulate GTT MMIO register write
1898 * @vgpu: a vGPU
1899 * @off: register offset
1900 * @p_data: data from guest write
1901 * @bytes: data length
1902 *
1903 * This function is used to emulate the GTT MMIO register write
1904 *
1905 * Returns:
1906 * Zero on success, error code if failed.
1907 */
1908int intel_vgpu_emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
1909 void *p_data, unsigned int bytes)
1910{
1911 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
1912 int ret;
1913
1914 if (bytes != 4 && bytes != 8)
1915 return -EINVAL;
1916
1917 off -= info->gtt_start_offset;
1918 ret = emulate_gtt_mmio_write(vgpu, off, p_data, bytes);
1919 return ret;
1920}
1921
1922bool intel_gvt_create_scratch_page(struct intel_vgpu *vgpu)
1923{
1924 struct intel_vgpu_gtt *gtt = &vgpu->gtt;
1925 void *p;
1926 void *vaddr;
1927 unsigned long mfn;
1928
1929 gtt->scratch_page = alloc_page(GFP_KERNEL);
1930 if (!gtt->scratch_page) {
1931 gvt_err("Failed to allocate scratch page.\n");
1932 return -ENOMEM;
1933 }
1934
1935 /* set to zero */
1936 p = kmap_atomic(gtt->scratch_page);
1937 memset(p, 0, PAGE_SIZE);
1938 kunmap_atomic(p);
1939
1940 /* translate page to mfn */
1941 vaddr = page_address(gtt->scratch_page);
1942 mfn = intel_gvt_hypervisor_virt_to_mfn(vaddr);
1943
1944 if (mfn == INTEL_GVT_INVALID_ADDR) {
1945 gvt_err("fail to translate vaddr:0x%llx\n", (u64)vaddr);
1946 __free_page(gtt->scratch_page);
1947 gtt->scratch_page = NULL;
1948 return -ENXIO;
1949 }
1950
1951 gtt->scratch_page_mfn = mfn;
1952 gvt_dbg_core("vgpu%d create scratch page: mfn=0x%lx\n", vgpu->id, mfn);
1953 return 0;
1954}
1955
1956void intel_gvt_release_scratch_page(struct intel_vgpu *vgpu)
1957{
1958 if (vgpu->gtt.scratch_page != NULL) {
1959 __free_page(vgpu->gtt.scratch_page);
1960 vgpu->gtt.scratch_page = NULL;
1961 vgpu->gtt.scratch_page_mfn = 0;
1962 }
1963}
1964
1965/**
1966 * intel_vgpu_init_gtt - initialize per-vGPU graphics memory virulization
1967 * @vgpu: a vGPU
1968 *
1969 * This function is used to initialize per-vGPU graphics memory virtualization
1970 * components.
1971 *
1972 * Returns:
1973 * Zero on success, error code if failed.
1974 */
1975int intel_vgpu_init_gtt(struct intel_vgpu *vgpu)
1976{
1977 struct intel_vgpu_gtt *gtt = &vgpu->gtt;
1978 struct intel_vgpu_mm *ggtt_mm;
1979
1980 hash_init(gtt->guest_page_hash_table);
1981 hash_init(gtt->shadow_page_hash_table);
1982
1983 INIT_LIST_HEAD(&gtt->mm_list_head);
1984 INIT_LIST_HEAD(&gtt->oos_page_list_head);
1985 INIT_LIST_HEAD(&gtt->post_shadow_list_head);
1986
1987 ggtt_mm = intel_vgpu_create_mm(vgpu, INTEL_GVT_MM_GGTT,
1988 NULL, 1, 0);
1989 if (IS_ERR(ggtt_mm)) {
1990 gvt_err("fail to create mm for ggtt.\n");
1991 return PTR_ERR(ggtt_mm);
1992 }
1993
1994 gtt->ggtt_mm = ggtt_mm;
1995
1996 intel_gvt_create_scratch_page(vgpu);
1997 return 0;
1998}
1999
2000/**
2001 * intel_vgpu_clean_gtt - clean up per-vGPU graphics memory virulization
2002 * @vgpu: a vGPU
2003 *
2004 * This function is used to clean up per-vGPU graphics memory virtualization
2005 * components.
2006 *
2007 * Returns:
2008 * Zero on success, error code if failed.
2009 */
2010void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu)
2011{
2012 struct list_head *pos, *n;
2013 struct intel_vgpu_mm *mm;
2014
2015 ppgtt_free_all_shadow_page(vgpu);
2016 intel_gvt_release_scratch_page(vgpu);
2017
2018 list_for_each_safe(pos, n, &vgpu->gtt.mm_list_head) {
2019 mm = container_of(pos, struct intel_vgpu_mm, list);
2020 vgpu->gvt->gtt.mm_free_page_table(mm);
2021 list_del(&mm->list);
2022 list_del(&mm->lru_list);
2023 kfree(mm);
2024 }
2025}
2026
2027static void clean_spt_oos(struct intel_gvt *gvt)
2028{
2029 struct intel_gvt_gtt *gtt = &gvt->gtt;
2030 struct list_head *pos, *n;
2031 struct intel_vgpu_oos_page *oos_page;
2032
2033 WARN(!list_empty(&gtt->oos_page_use_list_head),
2034 "someone is still using oos page\n");
2035
2036 list_for_each_safe(pos, n, &gtt->oos_page_free_list_head) {
2037 oos_page = container_of(pos, struct intel_vgpu_oos_page, list);
2038 list_del(&oos_page->list);
2039 kfree(oos_page);
2040 }
2041}
2042
2043static int setup_spt_oos(struct intel_gvt *gvt)
2044{
2045 struct intel_gvt_gtt *gtt = &gvt->gtt;
2046 struct intel_vgpu_oos_page *oos_page;
2047 int i;
2048 int ret;
2049
2050 INIT_LIST_HEAD(&gtt->oos_page_free_list_head);
2051 INIT_LIST_HEAD(&gtt->oos_page_use_list_head);
2052
2053 for (i = 0; i < preallocated_oos_pages; i++) {
2054 oos_page = kzalloc(sizeof(*oos_page), GFP_KERNEL);
2055 if (!oos_page) {
2056 gvt_err("fail to pre-allocate oos page\n");
2057 ret = -ENOMEM;
2058 goto fail;
2059 }
2060
2061 INIT_LIST_HEAD(&oos_page->list);
2062 INIT_LIST_HEAD(&oos_page->vm_list);
2063 oos_page->id = i;
2064 list_add_tail(&oos_page->list, &gtt->oos_page_free_list_head);
2065 }
2066
2067 gvt_dbg_mm("%d oos pages preallocated\n", i);
2068
2069 return 0;
2070fail:
2071 clean_spt_oos(gvt);
2072 return ret;
2073}
2074
2075/**
2076 * intel_vgpu_find_ppgtt_mm - find a PPGTT mm object
2077 * @vgpu: a vGPU
2078 * @page_table_level: PPGTT page table level
2079 * @root_entry: PPGTT page table root pointers
2080 *
2081 * This function is used to find a PPGTT mm object from mm object pool
2082 *
2083 * Returns:
2084 * pointer to mm object on success, NULL if failed.
2085 */
2086struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu,
2087 int page_table_level, void *root_entry)
2088{
2089 struct list_head *pos;
2090 struct intel_vgpu_mm *mm;
2091 u64 *src, *dst;
2092
2093 list_for_each(pos, &vgpu->gtt.mm_list_head) {
2094 mm = container_of(pos, struct intel_vgpu_mm, list);
2095 if (mm->type != INTEL_GVT_MM_PPGTT)
2096 continue;
2097
2098 if (mm->page_table_level != page_table_level)
2099 continue;
2100
2101 src = root_entry;
2102 dst = mm->virtual_page_table;
2103
2104 if (page_table_level == 3) {
2105 if (src[0] == dst[0]
2106 && src[1] == dst[1]
2107 && src[2] == dst[2]
2108 && src[3] == dst[3])
2109 return mm;
2110 } else {
2111 if (src[0] == dst[0])
2112 return mm;
2113 }
2114 }
2115 return NULL;
2116}
2117
2118/**
2119 * intel_vgpu_g2v_create_ppgtt_mm - create a PPGTT mm object from
2120 * g2v notification
2121 * @vgpu: a vGPU
2122 * @page_table_level: PPGTT page table level
2123 *
2124 * This function is used to create a PPGTT mm object from a guest to GVT-g
2125 * notification.
2126 *
2127 * Returns:
2128 * Zero on success, negative error code if failed.
2129 */
2130int intel_vgpu_g2v_create_ppgtt_mm(struct intel_vgpu *vgpu,
2131 int page_table_level)
2132{
2133 u64 *pdp = (u64 *)&vgpu_vreg64(vgpu, vgtif_reg(pdp[0]));
2134 struct intel_vgpu_mm *mm;
2135
2136 if (WARN_ON((page_table_level != 4) && (page_table_level != 3)))
2137 return -EINVAL;
2138
2139 mm = intel_vgpu_find_ppgtt_mm(vgpu, page_table_level, pdp);
2140 if (mm) {
2141 intel_gvt_mm_reference(mm);
2142 } else {
2143 mm = intel_vgpu_create_mm(vgpu, INTEL_GVT_MM_PPGTT,
2144 pdp, page_table_level, 0);
2145 if (IS_ERR(mm)) {
2146 gvt_err("fail to create mm\n");
2147 return PTR_ERR(mm);
2148 }
2149 }
2150 return 0;
2151}
2152
2153/**
2154 * intel_vgpu_g2v_destroy_ppgtt_mm - destroy a PPGTT mm object from
2155 * g2v notification
2156 * @vgpu: a vGPU
2157 * @page_table_level: PPGTT page table level
2158 *
2159 * This function is used to create a PPGTT mm object from a guest to GVT-g
2160 * notification.
2161 *
2162 * Returns:
2163 * Zero on success, negative error code if failed.
2164 */
2165int intel_vgpu_g2v_destroy_ppgtt_mm(struct intel_vgpu *vgpu,
2166 int page_table_level)
2167{
2168 u64 *pdp = (u64 *)&vgpu_vreg64(vgpu, vgtif_reg(pdp[0]));
2169 struct intel_vgpu_mm *mm;
2170
2171 if (WARN_ON((page_table_level != 4) && (page_table_level != 3)))
2172 return -EINVAL;
2173
2174 mm = intel_vgpu_find_ppgtt_mm(vgpu, page_table_level, pdp);
2175 if (!mm) {
2176 gvt_err("fail to find ppgtt instance.\n");
2177 return -EINVAL;
2178 }
2179 intel_gvt_mm_unreference(mm);
2180 return 0;
2181}
2182
2183/**
2184 * intel_gvt_init_gtt - initialize mm components of a GVT device
2185 * @gvt: GVT device
2186 *
2187 * This function is called at the initialization stage, to initialize
2188 * the mm components of a GVT device.
2189 *
2190 * Returns:
2191 * zero on success, negative error code if failed.
2192 */
2193int intel_gvt_init_gtt(struct intel_gvt *gvt)
2194{
2195 int ret;
2196
2197 gvt_dbg_core("init gtt\n");
2198
2199 if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)) {
2200 gvt->gtt.pte_ops = &gen8_gtt_pte_ops;
2201 gvt->gtt.gma_ops = &gen8_gtt_gma_ops;
2202 gvt->gtt.mm_alloc_page_table = gen8_mm_alloc_page_table;
2203 gvt->gtt.mm_free_page_table = gen8_mm_free_page_table;
2204 } else {
2205 return -ENODEV;
2206 }
2207
2208 if (enable_out_of_sync) {
2209 ret = setup_spt_oos(gvt);
2210 if (ret) {
2211 gvt_err("fail to initialize SPT oos\n");
2212 return ret;
2213 }
2214 }
2215 INIT_LIST_HEAD(&gvt->gtt.mm_lru_list_head);
2216 return 0;
2217}
2218
2219/**
2220 * intel_gvt_clean_gtt - clean up mm components of a GVT device
2221 * @gvt: GVT device
2222 *
2223 * This function is called at the driver unloading stage, to clean up the
2224 * the mm components of a GVT device.
2225 *
2226 */
2227void intel_gvt_clean_gtt(struct intel_gvt *gvt)
2228{
2229 if (enable_out_of_sync)
2230 clean_spt_oos(gvt);
2231}