blob: 82454fb1d5668cf89454c696cc948431a23fe82b [file] [log] [blame]
Zhi Wang2707e442016-03-28 23:23:16 +08001/*
2 * GTT virtualization
3 *
4 * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 * Zhi Wang <zhi.a.wang@intel.com>
27 * Zhenyu Wang <zhenyuw@linux.intel.com>
28 * Xiao Zheng <xiao.zheng@intel.com>
29 *
30 * Contributors:
31 * Min He <min.he@intel.com>
32 * Bing Niu <bing.niu@intel.com>
33 *
34 */
35
36#include "i915_drv.h"
Zhenyu Wangfeddf6e2016-10-20 17:15:03 +080037#include "gvt.h"
38#include "i915_pvinfo.h"
Zhi Wang2707e442016-03-28 23:23:16 +080039#include "trace.h"
40
41static bool enable_out_of_sync = false;
42static int preallocated_oos_pages = 8192;
43
44/*
45 * validate a gm address and related range size,
46 * translate it to host gm address
47 */
48bool intel_gvt_ggtt_validate_range(struct intel_vgpu *vgpu, u64 addr, u32 size)
49{
50 if ((!vgpu_gmadr_is_valid(vgpu, addr)) || (size
51 && !vgpu_gmadr_is_valid(vgpu, addr + size - 1))) {
Tina Zhang695fbc02017-03-10 04:26:53 -050052 gvt_vgpu_err("invalid range gmadr 0x%llx size 0x%x\n",
53 addr, size);
Zhi Wang2707e442016-03-28 23:23:16 +080054 return false;
55 }
56 return true;
57}
58
59/* translate a guest gmadr to host gmadr */
60int intel_gvt_ggtt_gmadr_g2h(struct intel_vgpu *vgpu, u64 g_addr, u64 *h_addr)
61{
62 if (WARN(!vgpu_gmadr_is_valid(vgpu, g_addr),
63 "invalid guest gmadr %llx\n", g_addr))
64 return -EACCES;
65
66 if (vgpu_gmadr_is_aperture(vgpu, g_addr))
67 *h_addr = vgpu_aperture_gmadr_base(vgpu)
68 + (g_addr - vgpu_aperture_offset(vgpu));
69 else
70 *h_addr = vgpu_hidden_gmadr_base(vgpu)
71 + (g_addr - vgpu_hidden_offset(vgpu));
72 return 0;
73}
74
75/* translate a host gmadr to guest gmadr */
76int intel_gvt_ggtt_gmadr_h2g(struct intel_vgpu *vgpu, u64 h_addr, u64 *g_addr)
77{
78 if (WARN(!gvt_gmadr_is_valid(vgpu->gvt, h_addr),
79 "invalid host gmadr %llx\n", h_addr))
80 return -EACCES;
81
82 if (gvt_gmadr_is_aperture(vgpu->gvt, h_addr))
83 *g_addr = vgpu_aperture_gmadr_base(vgpu)
84 + (h_addr - gvt_aperture_gmadr_base(vgpu->gvt));
85 else
86 *g_addr = vgpu_hidden_gmadr_base(vgpu)
87 + (h_addr - gvt_hidden_gmadr_base(vgpu->gvt));
88 return 0;
89}
90
91int intel_gvt_ggtt_index_g2h(struct intel_vgpu *vgpu, unsigned long g_index,
92 unsigned long *h_index)
93{
94 u64 h_addr;
95 int ret;
96
Zhi Wang9556e112017-10-10 13:51:32 +080097 ret = intel_gvt_ggtt_gmadr_g2h(vgpu, g_index << I915_GTT_PAGE_SHIFT,
Zhi Wang2707e442016-03-28 23:23:16 +080098 &h_addr);
99 if (ret)
100 return ret;
101
Zhi Wang9556e112017-10-10 13:51:32 +0800102 *h_index = h_addr >> I915_GTT_PAGE_SHIFT;
Zhi Wang2707e442016-03-28 23:23:16 +0800103 return 0;
104}
105
106int intel_gvt_ggtt_h2g_index(struct intel_vgpu *vgpu, unsigned long h_index,
107 unsigned long *g_index)
108{
109 u64 g_addr;
110 int ret;
111
Zhi Wang9556e112017-10-10 13:51:32 +0800112 ret = intel_gvt_ggtt_gmadr_h2g(vgpu, h_index << I915_GTT_PAGE_SHIFT,
Zhi Wang2707e442016-03-28 23:23:16 +0800113 &g_addr);
114 if (ret)
115 return ret;
116
Zhi Wang9556e112017-10-10 13:51:32 +0800117 *g_index = g_addr >> I915_GTT_PAGE_SHIFT;
Zhi Wang2707e442016-03-28 23:23:16 +0800118 return 0;
119}
120
121#define gtt_type_is_entry(type) \
122 (type > GTT_TYPE_INVALID && type < GTT_TYPE_PPGTT_ENTRY \
123 && type != GTT_TYPE_PPGTT_PTE_ENTRY \
124 && type != GTT_TYPE_PPGTT_ROOT_ENTRY)
125
126#define gtt_type_is_pt(type) \
127 (type >= GTT_TYPE_PPGTT_PTE_PT && type < GTT_TYPE_MAX)
128
129#define gtt_type_is_pte_pt(type) \
130 (type == GTT_TYPE_PPGTT_PTE_PT)
131
132#define gtt_type_is_root_pointer(type) \
133 (gtt_type_is_entry(type) && type > GTT_TYPE_PPGTT_ROOT_ENTRY)
134
135#define gtt_init_entry(e, t, p, v) do { \
136 (e)->type = t; \
137 (e)->pdev = p; \
138 memcpy(&(e)->val64, &v, sizeof(v)); \
139} while (0)
140
Zhi Wang2707e442016-03-28 23:23:16 +0800141/*
142 * Mappings between GTT_TYPE* enumerations.
143 * Following information can be found according to the given type:
144 * - type of next level page table
145 * - type of entry inside this level page table
146 * - type of entry with PSE set
147 *
148 * If the given type doesn't have such a kind of information,
149 * e.g. give a l4 root entry type, then request to get its PSE type,
150 * give a PTE page table type, then request to get its next level page
151 * table type, as we know l4 root entry doesn't have a PSE bit,
152 * and a PTE page table doesn't have a next level page table type,
153 * GTT_TYPE_INVALID will be returned. This is useful when traversing a
154 * page table.
155 */
156
157struct gtt_type_table_entry {
158 int entry_type;
Zhi Wang054f4eb2017-10-10 17:19:30 +0800159 int pt_type;
Zhi Wang2707e442016-03-28 23:23:16 +0800160 int next_pt_type;
161 int pse_entry_type;
162};
163
Zhi Wang054f4eb2017-10-10 17:19:30 +0800164#define GTT_TYPE_TABLE_ENTRY(type, e_type, cpt_type, npt_type, pse_type) \
Zhi Wang2707e442016-03-28 23:23:16 +0800165 [type] = { \
166 .entry_type = e_type, \
Zhi Wang054f4eb2017-10-10 17:19:30 +0800167 .pt_type = cpt_type, \
Zhi Wang2707e442016-03-28 23:23:16 +0800168 .next_pt_type = npt_type, \
169 .pse_entry_type = pse_type, \
170 }
171
172static struct gtt_type_table_entry gtt_type_table[] = {
173 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L4_ENTRY,
174 GTT_TYPE_PPGTT_ROOT_L4_ENTRY,
Zhi Wang054f4eb2017-10-10 17:19:30 +0800175 GTT_TYPE_INVALID,
Zhi Wang2707e442016-03-28 23:23:16 +0800176 GTT_TYPE_PPGTT_PML4_PT,
177 GTT_TYPE_INVALID),
178 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_PT,
179 GTT_TYPE_PPGTT_PML4_ENTRY,
Zhi Wang054f4eb2017-10-10 17:19:30 +0800180 GTT_TYPE_PPGTT_PML4_PT,
Zhi Wang2707e442016-03-28 23:23:16 +0800181 GTT_TYPE_PPGTT_PDP_PT,
182 GTT_TYPE_INVALID),
183 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_ENTRY,
184 GTT_TYPE_PPGTT_PML4_ENTRY,
Zhi Wang054f4eb2017-10-10 17:19:30 +0800185 GTT_TYPE_PPGTT_PML4_PT,
Zhi Wang2707e442016-03-28 23:23:16 +0800186 GTT_TYPE_PPGTT_PDP_PT,
187 GTT_TYPE_INVALID),
188 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_PT,
189 GTT_TYPE_PPGTT_PDP_ENTRY,
Zhi Wang054f4eb2017-10-10 17:19:30 +0800190 GTT_TYPE_PPGTT_PDP_PT,
Zhi Wang2707e442016-03-28 23:23:16 +0800191 GTT_TYPE_PPGTT_PDE_PT,
192 GTT_TYPE_PPGTT_PTE_1G_ENTRY),
193 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L3_ENTRY,
194 GTT_TYPE_PPGTT_ROOT_L3_ENTRY,
Zhi Wang054f4eb2017-10-10 17:19:30 +0800195 GTT_TYPE_INVALID,
Zhi Wang2707e442016-03-28 23:23:16 +0800196 GTT_TYPE_PPGTT_PDE_PT,
197 GTT_TYPE_PPGTT_PTE_1G_ENTRY),
198 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_ENTRY,
199 GTT_TYPE_PPGTT_PDP_ENTRY,
Zhi Wang054f4eb2017-10-10 17:19:30 +0800200 GTT_TYPE_PPGTT_PDP_PT,
Zhi Wang2707e442016-03-28 23:23:16 +0800201 GTT_TYPE_PPGTT_PDE_PT,
202 GTT_TYPE_PPGTT_PTE_1G_ENTRY),
203 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_PT,
204 GTT_TYPE_PPGTT_PDE_ENTRY,
Zhi Wang054f4eb2017-10-10 17:19:30 +0800205 GTT_TYPE_PPGTT_PDE_PT,
Zhi Wang2707e442016-03-28 23:23:16 +0800206 GTT_TYPE_PPGTT_PTE_PT,
207 GTT_TYPE_PPGTT_PTE_2M_ENTRY),
208 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_ENTRY,
209 GTT_TYPE_PPGTT_PDE_ENTRY,
Zhi Wang054f4eb2017-10-10 17:19:30 +0800210 GTT_TYPE_PPGTT_PDE_PT,
Zhi Wang2707e442016-03-28 23:23:16 +0800211 GTT_TYPE_PPGTT_PTE_PT,
212 GTT_TYPE_PPGTT_PTE_2M_ENTRY),
213 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_PT,
214 GTT_TYPE_PPGTT_PTE_4K_ENTRY,
Zhi Wang054f4eb2017-10-10 17:19:30 +0800215 GTT_TYPE_PPGTT_PTE_PT,
Zhi Wang2707e442016-03-28 23:23:16 +0800216 GTT_TYPE_INVALID,
217 GTT_TYPE_INVALID),
218 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_4K_ENTRY,
219 GTT_TYPE_PPGTT_PTE_4K_ENTRY,
Zhi Wang054f4eb2017-10-10 17:19:30 +0800220 GTT_TYPE_PPGTT_PTE_PT,
Zhi Wang2707e442016-03-28 23:23:16 +0800221 GTT_TYPE_INVALID,
222 GTT_TYPE_INVALID),
223 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_2M_ENTRY,
224 GTT_TYPE_PPGTT_PDE_ENTRY,
Zhi Wang054f4eb2017-10-10 17:19:30 +0800225 GTT_TYPE_PPGTT_PDE_PT,
Zhi Wang2707e442016-03-28 23:23:16 +0800226 GTT_TYPE_INVALID,
227 GTT_TYPE_PPGTT_PTE_2M_ENTRY),
228 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_1G_ENTRY,
229 GTT_TYPE_PPGTT_PDP_ENTRY,
Zhi Wang054f4eb2017-10-10 17:19:30 +0800230 GTT_TYPE_PPGTT_PDP_PT,
Zhi Wang2707e442016-03-28 23:23:16 +0800231 GTT_TYPE_INVALID,
232 GTT_TYPE_PPGTT_PTE_1G_ENTRY),
233 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_GGTT_PTE,
234 GTT_TYPE_GGTT_PTE,
235 GTT_TYPE_INVALID,
Zhi Wang054f4eb2017-10-10 17:19:30 +0800236 GTT_TYPE_INVALID,
Zhi Wang2707e442016-03-28 23:23:16 +0800237 GTT_TYPE_INVALID),
238};
239
240static inline int get_next_pt_type(int type)
241{
242 return gtt_type_table[type].next_pt_type;
243}
244
Zhi Wang054f4eb2017-10-10 17:19:30 +0800245static inline int get_pt_type(int type)
246{
247 return gtt_type_table[type].pt_type;
248}
249
Zhi Wang2707e442016-03-28 23:23:16 +0800250static inline int get_entry_type(int type)
251{
252 return gtt_type_table[type].entry_type;
253}
254
255static inline int get_pse_type(int type)
256{
257 return gtt_type_table[type].pse_entry_type;
258}
259
260static u64 read_pte64(struct drm_i915_private *dev_priv, unsigned long index)
261{
Du, Changbin321927d2016-10-20 14:08:46 +0800262 void __iomem *addr = (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + index;
Zhi Wang2707e442016-03-28 23:23:16 +0800263
Changbin Du905a5032016-12-30 14:10:53 +0800264 return readq(addr);
Zhi Wang2707e442016-03-28 23:23:16 +0800265}
266
Chuanxiao Dongaf2c6392017-06-02 15:34:24 +0800267static void gtt_invalidate(struct drm_i915_private *dev_priv)
268{
269 mmio_hw_access_pre(dev_priv);
270 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
271 mmio_hw_access_post(dev_priv);
272}
273
Zhi Wang2707e442016-03-28 23:23:16 +0800274static void write_pte64(struct drm_i915_private *dev_priv,
275 unsigned long index, u64 pte)
276{
Du, Changbin321927d2016-10-20 14:08:46 +0800277 void __iomem *addr = (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + index;
Zhi Wang2707e442016-03-28 23:23:16 +0800278
Zhi Wang2707e442016-03-28 23:23:16 +0800279 writeq(pte, addr);
Zhi Wang2707e442016-03-28 23:23:16 +0800280}
281
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800282static inline int gtt_get_entry64(void *pt,
Zhi Wang2707e442016-03-28 23:23:16 +0800283 struct intel_gvt_gtt_entry *e,
284 unsigned long index, bool hypervisor_access, unsigned long gpa,
285 struct intel_vgpu *vgpu)
286{
287 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
288 int ret;
289
290 if (WARN_ON(info->gtt_entry_size != 8))
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800291 return -EINVAL;
Zhi Wang2707e442016-03-28 23:23:16 +0800292
293 if (hypervisor_access) {
294 ret = intel_gvt_hypervisor_read_gpa(vgpu, gpa +
295 (index << info->gtt_entry_size_shift),
296 &e->val64, 8);
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800297 if (WARN_ON(ret))
298 return ret;
Zhi Wang2707e442016-03-28 23:23:16 +0800299 } else if (!pt) {
300 e->val64 = read_pte64(vgpu->gvt->dev_priv, index);
301 } else {
302 e->val64 = *((u64 *)pt + index);
303 }
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800304 return 0;
Zhi Wang2707e442016-03-28 23:23:16 +0800305}
306
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800307static inline int gtt_set_entry64(void *pt,
Zhi Wang2707e442016-03-28 23:23:16 +0800308 struct intel_gvt_gtt_entry *e,
309 unsigned long index, bool hypervisor_access, unsigned long gpa,
310 struct intel_vgpu *vgpu)
311{
312 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
313 int ret;
314
315 if (WARN_ON(info->gtt_entry_size != 8))
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800316 return -EINVAL;
Zhi Wang2707e442016-03-28 23:23:16 +0800317
318 if (hypervisor_access) {
319 ret = intel_gvt_hypervisor_write_gpa(vgpu, gpa +
320 (index << info->gtt_entry_size_shift),
321 &e->val64, 8);
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800322 if (WARN_ON(ret))
323 return ret;
Zhi Wang2707e442016-03-28 23:23:16 +0800324 } else if (!pt) {
325 write_pte64(vgpu->gvt->dev_priv, index, e->val64);
326 } else {
327 *((u64 *)pt + index) = e->val64;
328 }
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800329 return 0;
Zhi Wang2707e442016-03-28 23:23:16 +0800330}
331
332#define GTT_HAW 46
333
Xiong Zhangb721b652017-11-28 07:29:54 +0800334#define ADDR_1G_MASK (((1UL << (GTT_HAW - 30)) - 1) << 30)
335#define ADDR_2M_MASK (((1UL << (GTT_HAW - 21)) - 1) << 21)
336#define ADDR_4K_MASK (((1UL << (GTT_HAW - 12)) - 1) << 12)
Zhi Wang2707e442016-03-28 23:23:16 +0800337
338static unsigned long gen8_gtt_get_pfn(struct intel_gvt_gtt_entry *e)
339{
340 unsigned long pfn;
341
342 if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY)
343 pfn = (e->val64 & ADDR_1G_MASK) >> 12;
344 else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY)
345 pfn = (e->val64 & ADDR_2M_MASK) >> 12;
346 else
347 pfn = (e->val64 & ADDR_4K_MASK) >> 12;
348 return pfn;
349}
350
351static void gen8_gtt_set_pfn(struct intel_gvt_gtt_entry *e, unsigned long pfn)
352{
353 if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) {
354 e->val64 &= ~ADDR_1G_MASK;
355 pfn &= (ADDR_1G_MASK >> 12);
356 } else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY) {
357 e->val64 &= ~ADDR_2M_MASK;
358 pfn &= (ADDR_2M_MASK >> 12);
359 } else {
360 e->val64 &= ~ADDR_4K_MASK;
361 pfn &= (ADDR_4K_MASK >> 12);
362 }
363
364 e->val64 |= (pfn << 12);
365}
366
367static bool gen8_gtt_test_pse(struct intel_gvt_gtt_entry *e)
368{
369 /* Entry doesn't have PSE bit. */
370 if (get_pse_type(e->type) == GTT_TYPE_INVALID)
371 return false;
372
373 e->type = get_entry_type(e->type);
Zhi Wang5e86cce2017-09-26 15:02:21 +0800374 if (!(e->val64 & BIT(7)))
Zhi Wang2707e442016-03-28 23:23:16 +0800375 return false;
376
377 e->type = get_pse_type(e->type);
378 return true;
379}
380
381static bool gen8_gtt_test_present(struct intel_gvt_gtt_entry *e)
382{
383 /*
384 * i915 writes PDP root pointer registers without present bit,
385 * it also works, so we need to treat root pointer entry
386 * specifically.
387 */
388 if (e->type == GTT_TYPE_PPGTT_ROOT_L3_ENTRY
389 || e->type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY)
390 return (e->val64 != 0);
391 else
Zhi Wang5e86cce2017-09-26 15:02:21 +0800392 return (e->val64 & BIT(0));
Zhi Wang2707e442016-03-28 23:23:16 +0800393}
394
395static void gtt_entry_clear_present(struct intel_gvt_gtt_entry *e)
396{
Zhi Wang5e86cce2017-09-26 15:02:21 +0800397 e->val64 &= ~BIT(0);
Zhi Wang2707e442016-03-28 23:23:16 +0800398}
399
Zhi Wang655c64e2017-10-10 17:24:26 +0800400static void gtt_entry_set_present(struct intel_gvt_gtt_entry *e)
401{
402 e->val64 |= BIT(0);
Zhi Wang2707e442016-03-28 23:23:16 +0800403}
404
405/*
406 * Per-platform GMA routines.
407 */
408static unsigned long gma_to_ggtt_pte_index(unsigned long gma)
409{
Zhi Wang9556e112017-10-10 13:51:32 +0800410 unsigned long x = (gma >> I915_GTT_PAGE_SHIFT);
Zhi Wang2707e442016-03-28 23:23:16 +0800411
412 trace_gma_index(__func__, gma, x);
413 return x;
414}
415
416#define DEFINE_PPGTT_GMA_TO_INDEX(prefix, ename, exp) \
417static unsigned long prefix##_gma_to_##ename##_index(unsigned long gma) \
418{ \
419 unsigned long x = (exp); \
420 trace_gma_index(__func__, gma, x); \
421 return x; \
422}
423
424DEFINE_PPGTT_GMA_TO_INDEX(gen8, pte, (gma >> 12 & 0x1ff));
425DEFINE_PPGTT_GMA_TO_INDEX(gen8, pde, (gma >> 21 & 0x1ff));
426DEFINE_PPGTT_GMA_TO_INDEX(gen8, l3_pdp, (gma >> 30 & 0x3));
427DEFINE_PPGTT_GMA_TO_INDEX(gen8, l4_pdp, (gma >> 30 & 0x1ff));
428DEFINE_PPGTT_GMA_TO_INDEX(gen8, pml4, (gma >> 39 & 0x1ff));
429
430static struct intel_gvt_gtt_pte_ops gen8_gtt_pte_ops = {
431 .get_entry = gtt_get_entry64,
432 .set_entry = gtt_set_entry64,
433 .clear_present = gtt_entry_clear_present,
Zhi Wang655c64e2017-10-10 17:24:26 +0800434 .set_present = gtt_entry_set_present,
Zhi Wang2707e442016-03-28 23:23:16 +0800435 .test_present = gen8_gtt_test_present,
436 .test_pse = gen8_gtt_test_pse,
437 .get_pfn = gen8_gtt_get_pfn,
438 .set_pfn = gen8_gtt_set_pfn,
439};
440
441static struct intel_gvt_gtt_gma_ops gen8_gtt_gma_ops = {
442 .gma_to_ggtt_pte_index = gma_to_ggtt_pte_index,
443 .gma_to_pte_index = gen8_gma_to_pte_index,
444 .gma_to_pde_index = gen8_gma_to_pde_index,
445 .gma_to_l3_pdp_index = gen8_gma_to_l3_pdp_index,
446 .gma_to_l4_pdp_index = gen8_gma_to_l4_pdp_index,
447 .gma_to_pml4_index = gen8_gma_to_pml4_index,
448};
449
450static int gtt_entry_p2m(struct intel_vgpu *vgpu, struct intel_gvt_gtt_entry *p,
451 struct intel_gvt_gtt_entry *m)
452{
453 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
454 unsigned long gfn, mfn;
455
456 *m = *p;
457
458 if (!ops->test_present(p))
459 return 0;
460
461 gfn = ops->get_pfn(p);
462
463 mfn = intel_gvt_hypervisor_gfn_to_mfn(vgpu, gfn);
464 if (mfn == INTEL_GVT_INVALID_ADDR) {
Tina Zhang695fbc02017-03-10 04:26:53 -0500465 gvt_vgpu_err("fail to translate gfn: 0x%lx\n", gfn);
Zhi Wang2707e442016-03-28 23:23:16 +0800466 return -ENXIO;
467 }
468
469 ops->set_pfn(m, mfn);
470 return 0;
471}
472
473/*
474 * MM helpers.
475 */
Changbin Du3aff3512018-01-30 19:19:42 +0800476static void _ppgtt_get_root_entry(struct intel_vgpu_mm *mm,
477 struct intel_gvt_gtt_entry *entry, unsigned long index,
478 bool guest)
Zhi Wang2707e442016-03-28 23:23:16 +0800479{
Changbin Du3aff3512018-01-30 19:19:42 +0800480 struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
Zhi Wang2707e442016-03-28 23:23:16 +0800481
Changbin Du3aff3512018-01-30 19:19:42 +0800482 GEM_BUG_ON(mm->type != INTEL_GVT_MM_PPGTT);
Zhi Wang2707e442016-03-28 23:23:16 +0800483
Changbin Du3aff3512018-01-30 19:19:42 +0800484 entry->type = mm->ppgtt_mm.root_entry_type;
485 pte_ops->get_entry(guest ? mm->ppgtt_mm.guest_pdps :
486 mm->ppgtt_mm.shadow_pdps,
487 entry, index, false, 0, mm->vgpu);
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800488
Changbin Du3aff3512018-01-30 19:19:42 +0800489 pte_ops->test_pse(entry);
Zhi Wang2707e442016-03-28 23:23:16 +0800490}
491
Changbin Du3aff3512018-01-30 19:19:42 +0800492static inline void ppgtt_get_guest_root_entry(struct intel_vgpu_mm *mm,
493 struct intel_gvt_gtt_entry *entry, unsigned long index)
Zhi Wang2707e442016-03-28 23:23:16 +0800494{
Changbin Du3aff3512018-01-30 19:19:42 +0800495 _ppgtt_get_root_entry(mm, entry, index, true);
496}
Zhi Wang2707e442016-03-28 23:23:16 +0800497
Changbin Du3aff3512018-01-30 19:19:42 +0800498static inline void ppgtt_get_shadow_root_entry(struct intel_vgpu_mm *mm,
499 struct intel_gvt_gtt_entry *entry, unsigned long index)
500{
501 _ppgtt_get_root_entry(mm, entry, index, false);
502}
503
504static void _ppgtt_set_root_entry(struct intel_vgpu_mm *mm,
505 struct intel_gvt_gtt_entry *entry, unsigned long index,
506 bool guest)
507{
508 struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
509
510 pte_ops->set_entry(guest ? mm->ppgtt_mm.guest_pdps :
511 mm->ppgtt_mm.shadow_pdps,
512 entry, index, false, 0, mm->vgpu);
513}
514
515static inline void ppgtt_set_guest_root_entry(struct intel_vgpu_mm *mm,
516 struct intel_gvt_gtt_entry *entry, unsigned long index)
517{
518 _ppgtt_set_root_entry(mm, entry, index, true);
519}
520
521static inline void ppgtt_set_shadow_root_entry(struct intel_vgpu_mm *mm,
522 struct intel_gvt_gtt_entry *entry, unsigned long index)
523{
524 _ppgtt_set_root_entry(mm, entry, index, false);
525}
526
527static void ggtt_get_guest_entry(struct intel_vgpu_mm *mm,
528 struct intel_gvt_gtt_entry *entry, unsigned long index)
529{
530 struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
531
532 GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
533
534 entry->type = GTT_TYPE_GGTT_PTE;
535 pte_ops->get_entry(mm->ggtt_mm.virtual_ggtt, entry, index,
536 false, 0, mm->vgpu);
537}
538
539static void ggtt_set_guest_entry(struct intel_vgpu_mm *mm,
540 struct intel_gvt_gtt_entry *entry, unsigned long index)
541{
542 struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
543
544 GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
545
546 pte_ops->set_entry(mm->ggtt_mm.virtual_ggtt, entry, index,
547 false, 0, mm->vgpu);
548}
549
550static void ggtt_set_host_entry(struct intel_vgpu_mm *mm,
551 struct intel_gvt_gtt_entry *entry, unsigned long index)
552{
553 struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
554
555 GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
556
557 pte_ops->set_entry(NULL, entry, index, false, 0, mm->vgpu);
Zhi Wang2707e442016-03-28 23:23:16 +0800558}
559
560/*
561 * PPGTT shadow page table helpers.
562 */
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800563static inline int ppgtt_spt_get_entry(
Zhi Wang2707e442016-03-28 23:23:16 +0800564 struct intel_vgpu_ppgtt_spt *spt,
565 void *page_table, int type,
566 struct intel_gvt_gtt_entry *e, unsigned long index,
567 bool guest)
568{
569 struct intel_gvt *gvt = spt->vgpu->gvt;
570 struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800571 int ret;
Zhi Wang2707e442016-03-28 23:23:16 +0800572
573 e->type = get_entry_type(type);
574
575 if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n"))
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800576 return -EINVAL;
Zhi Wang2707e442016-03-28 23:23:16 +0800577
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800578 ret = ops->get_entry(page_table, e, index, guest,
Zhi Wang9556e112017-10-10 13:51:32 +0800579 spt->guest_page.track.gfn << I915_GTT_PAGE_SHIFT,
Zhi Wang2707e442016-03-28 23:23:16 +0800580 spt->vgpu);
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800581 if (ret)
582 return ret;
583
Zhi Wang2707e442016-03-28 23:23:16 +0800584 ops->test_pse(e);
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800585 return 0;
Zhi Wang2707e442016-03-28 23:23:16 +0800586}
587
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800588static inline int ppgtt_spt_set_entry(
Zhi Wang2707e442016-03-28 23:23:16 +0800589 struct intel_vgpu_ppgtt_spt *spt,
590 void *page_table, int type,
591 struct intel_gvt_gtt_entry *e, unsigned long index,
592 bool guest)
593{
594 struct intel_gvt *gvt = spt->vgpu->gvt;
595 struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
596
597 if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n"))
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800598 return -EINVAL;
Zhi Wang2707e442016-03-28 23:23:16 +0800599
600 return ops->set_entry(page_table, e, index, guest,
Zhi Wang9556e112017-10-10 13:51:32 +0800601 spt->guest_page.track.gfn << I915_GTT_PAGE_SHIFT,
Zhi Wang2707e442016-03-28 23:23:16 +0800602 spt->vgpu);
603}
604
605#define ppgtt_get_guest_entry(spt, e, index) \
606 ppgtt_spt_get_entry(spt, NULL, \
607 spt->guest_page_type, e, index, true)
608
609#define ppgtt_set_guest_entry(spt, e, index) \
610 ppgtt_spt_set_entry(spt, NULL, \
611 spt->guest_page_type, e, index, true)
612
613#define ppgtt_get_shadow_entry(spt, e, index) \
614 ppgtt_spt_get_entry(spt, spt->shadow_page.vaddr, \
615 spt->shadow_page.type, e, index, false)
616
617#define ppgtt_set_shadow_entry(spt, e, index) \
618 ppgtt_spt_set_entry(spt, spt->shadow_page.vaddr, \
619 spt->shadow_page.type, e, index, false)
620
621/**
Zhi Wang7d1e5cd2017-09-29 02:47:55 +0800622 * intel_vgpu_init_page_track - init a page track data structure
Zhi Wang2707e442016-03-28 23:23:16 +0800623 * @vgpu: a vGPU
Zhi Wang7d1e5cd2017-09-29 02:47:55 +0800624 * @t: a page track data structure
Zhi Wang2707e442016-03-28 23:23:16 +0800625 * @gfn: guest memory page frame number
Zhi Wang7d1e5cd2017-09-29 02:47:55 +0800626 * @handler: the function will be called when target guest memory page has
Zhi Wang2707e442016-03-28 23:23:16 +0800627 * been modified.
628 *
Zhi Wang7d1e5cd2017-09-29 02:47:55 +0800629 * This function is called when a user wants to prepare a page track data
630 * structure to track a guest memory page.
Zhi Wang2707e442016-03-28 23:23:16 +0800631 *
632 * Returns:
633 * Zero on success, negative error code if failed.
634 */
Zhi Wang7d1e5cd2017-09-29 02:47:55 +0800635int intel_vgpu_init_page_track(struct intel_vgpu *vgpu,
636 struct intel_vgpu_page_track *t,
637 unsigned long gfn,
638 int (*handler)(void *, u64, void *, int),
639 void *data)
640{
641 INIT_HLIST_NODE(&t->node);
642
643 t->tracked = false;
644 t->gfn = gfn;
645 t->handler = handler;
646 t->data = data;
647
648 hash_add(vgpu->gtt.tracked_guest_page_hash_table, &t->node, t->gfn);
649 return 0;
650}
651
652/**
653 * intel_vgpu_clean_page_track - release a page track data structure
654 * @vgpu: a vGPU
655 * @t: a page track data structure
656 *
657 * This function is called before a user frees a page track data structure.
658 */
659void intel_vgpu_clean_page_track(struct intel_vgpu *vgpu,
660 struct intel_vgpu_page_track *t)
661{
662 if (!hlist_unhashed(&t->node))
663 hash_del(&t->node);
664
665 if (t->tracked)
666 intel_gvt_hypervisor_disable_page_track(vgpu, t);
667}
668
669/**
670 * intel_vgpu_find_tracked_page - find a tracked guest page
671 * @vgpu: a vGPU
672 * @gfn: guest memory page frame number
673 *
674 * This function is called when the emulation layer wants to figure out if a
675 * trapped GFN is a tracked guest page.
676 *
677 * Returns:
678 * Pointer to page track data structure, NULL if not found.
679 */
680struct intel_vgpu_page_track *intel_vgpu_find_tracked_page(
681 struct intel_vgpu *vgpu, unsigned long gfn)
682{
683 struct intel_vgpu_page_track *t;
684
685 hash_for_each_possible(vgpu->gtt.tracked_guest_page_hash_table,
686 t, node, gfn) {
687 if (t->gfn == gfn)
688 return t;
689 }
690 return NULL;
691}
692
693static int init_guest_page(struct intel_vgpu *vgpu,
Zhi Wang2707e442016-03-28 23:23:16 +0800694 struct intel_vgpu_guest_page *p,
695 unsigned long gfn,
696 int (*handler)(void *, u64, void *, int),
697 void *data)
698{
Zhi Wang2707e442016-03-28 23:23:16 +0800699 p->oos_page = NULL;
700 p->write_cnt = 0;
701
Zhi Wang7d1e5cd2017-09-29 02:47:55 +0800702 return intel_vgpu_init_page_track(vgpu, &p->track, gfn, handler, data);
Zhi Wang2707e442016-03-28 23:23:16 +0800703}
704
705static int detach_oos_page(struct intel_vgpu *vgpu,
706 struct intel_vgpu_oos_page *oos_page);
707
Zhi Wang7d1e5cd2017-09-29 02:47:55 +0800708static void clean_guest_page(struct intel_vgpu *vgpu,
Zhi Wang2707e442016-03-28 23:23:16 +0800709 struct intel_vgpu_guest_page *p)
710{
Zhi Wang2707e442016-03-28 23:23:16 +0800711 if (p->oos_page)
712 detach_oos_page(vgpu, p->oos_page);
713
Zhi Wang7d1e5cd2017-09-29 02:47:55 +0800714 intel_vgpu_clean_page_track(vgpu, &p->track);
Zhi Wang2707e442016-03-28 23:23:16 +0800715}
716
717static inline int init_shadow_page(struct intel_vgpu *vgpu,
Zhi Wang22115ce2017-10-10 14:34:11 +0800718 struct intel_vgpu_shadow_page *p, int type, bool hash)
Zhi Wang2707e442016-03-28 23:23:16 +0800719{
Chuanxiao Dong5de6bd42017-02-09 11:37:11 +0800720 struct device *kdev = &vgpu->gvt->dev_priv->drm.pdev->dev;
721 dma_addr_t daddr;
722
723 daddr = dma_map_page(kdev, p->page, 0, 4096, PCI_DMA_BIDIRECTIONAL);
724 if (dma_mapping_error(kdev, daddr)) {
Tina Zhang695fbc02017-03-10 04:26:53 -0500725 gvt_vgpu_err("fail to map dma addr\n");
Chuanxiao Dong5de6bd42017-02-09 11:37:11 +0800726 return -EINVAL;
727 }
728
Zhi Wang2707e442016-03-28 23:23:16 +0800729 p->vaddr = page_address(p->page);
730 p->type = type;
731
732 INIT_HLIST_NODE(&p->node);
733
Zhi Wang9556e112017-10-10 13:51:32 +0800734 p->mfn = daddr >> I915_GTT_PAGE_SHIFT;
Zhi Wang22115ce2017-10-10 14:34:11 +0800735 if (hash)
736 hash_add(vgpu->gtt.shadow_page_hash_table, &p->node, p->mfn);
Zhi Wang2707e442016-03-28 23:23:16 +0800737 return 0;
738}
739
Chuanxiao Dong5de6bd42017-02-09 11:37:11 +0800740static inline void clean_shadow_page(struct intel_vgpu *vgpu,
741 struct intel_vgpu_shadow_page *p)
Zhi Wang2707e442016-03-28 23:23:16 +0800742{
Chuanxiao Dong5de6bd42017-02-09 11:37:11 +0800743 struct device *kdev = &vgpu->gvt->dev_priv->drm.pdev->dev;
744
Zhi Wang9556e112017-10-10 13:51:32 +0800745 dma_unmap_page(kdev, p->mfn << I915_GTT_PAGE_SHIFT, 4096,
Chuanxiao Dong5de6bd42017-02-09 11:37:11 +0800746 PCI_DMA_BIDIRECTIONAL);
747
Zhi Wang2707e442016-03-28 23:23:16 +0800748 if (!hlist_unhashed(&p->node))
749 hash_del(&p->node);
750}
751
752static inline struct intel_vgpu_shadow_page *find_shadow_page(
753 struct intel_vgpu *vgpu, unsigned long mfn)
754{
755 struct intel_vgpu_shadow_page *p;
756
757 hash_for_each_possible(vgpu->gtt.shadow_page_hash_table,
758 p, node, mfn) {
759 if (p->mfn == mfn)
760 return p;
761 }
762 return NULL;
763}
764
Zhi Wang7d1e5cd2017-09-29 02:47:55 +0800765#define page_track_to_guest_page(ptr) \
766 container_of(ptr, struct intel_vgpu_guest_page, track)
767
Zhi Wang2707e442016-03-28 23:23:16 +0800768#define guest_page_to_ppgtt_spt(ptr) \
769 container_of(ptr, struct intel_vgpu_ppgtt_spt, guest_page)
770
771#define shadow_page_to_ppgtt_spt(ptr) \
772 container_of(ptr, struct intel_vgpu_ppgtt_spt, shadow_page)
773
774static void *alloc_spt(gfp_t gfp_mask)
775{
776 struct intel_vgpu_ppgtt_spt *spt;
777
778 spt = kzalloc(sizeof(*spt), gfp_mask);
779 if (!spt)
780 return NULL;
781
782 spt->shadow_page.page = alloc_page(gfp_mask);
783 if (!spt->shadow_page.page) {
784 kfree(spt);
785 return NULL;
786 }
787 return spt;
788}
789
790static void free_spt(struct intel_vgpu_ppgtt_spt *spt)
791{
792 __free_page(spt->shadow_page.page);
793 kfree(spt);
794}
795
796static void ppgtt_free_shadow_page(struct intel_vgpu_ppgtt_spt *spt)
797{
798 trace_spt_free(spt->vgpu->id, spt, spt->shadow_page.type);
799
Chuanxiao Dong5de6bd42017-02-09 11:37:11 +0800800 clean_shadow_page(spt->vgpu, &spt->shadow_page);
Zhi Wang7d1e5cd2017-09-29 02:47:55 +0800801 clean_guest_page(spt->vgpu, &spt->guest_page);
Zhi Wang2707e442016-03-28 23:23:16 +0800802 list_del_init(&spt->post_shadow_list);
803
804 free_spt(spt);
805}
806
807static void ppgtt_free_all_shadow_page(struct intel_vgpu *vgpu)
808{
809 struct hlist_node *n;
810 struct intel_vgpu_shadow_page *sp;
811 int i;
812
813 hash_for_each_safe(vgpu->gtt.shadow_page_hash_table, i, n, sp, node)
814 ppgtt_free_shadow_page(shadow_page_to_ppgtt_spt(sp));
815}
816
Zhi Wang7d1e5cd2017-09-29 02:47:55 +0800817static int ppgtt_handle_guest_write_page_table_bytes(
818 struct intel_vgpu_guest_page *gpt,
Zhi Wang2707e442016-03-28 23:23:16 +0800819 u64 pa, void *p_data, int bytes);
820
Zhi Wang7d1e5cd2017-09-29 02:47:55 +0800821static int ppgtt_write_protection_handler(void *data, u64 pa,
Zhi Wang2707e442016-03-28 23:23:16 +0800822 void *p_data, int bytes)
823{
Zhi Wang7d1e5cd2017-09-29 02:47:55 +0800824 struct intel_vgpu_page_track *t = data;
825 struct intel_vgpu_guest_page *p = page_track_to_guest_page(t);
Zhi Wang2707e442016-03-28 23:23:16 +0800826 int ret;
827
828 if (bytes != 4 && bytes != 8)
829 return -EINVAL;
830
Zhi Wang7d1e5cd2017-09-29 02:47:55 +0800831 if (!t->tracked)
Zhi Wang2707e442016-03-28 23:23:16 +0800832 return -EINVAL;
833
Zhi Wang7d1e5cd2017-09-29 02:47:55 +0800834 ret = ppgtt_handle_guest_write_page_table_bytes(p,
Zhi Wang2707e442016-03-28 23:23:16 +0800835 pa, p_data, bytes);
836 if (ret)
837 return ret;
838 return ret;
839}
840
Changbin Duede9d0c2018-01-30 19:19:40 +0800841static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt);
Zhi Wang2707e442016-03-28 23:23:16 +0800842
843static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_shadow_page(
844 struct intel_vgpu *vgpu, int type, unsigned long gfn)
845{
846 struct intel_vgpu_ppgtt_spt *spt = NULL;
847 int ret;
848
849retry:
850 spt = alloc_spt(GFP_KERNEL | __GFP_ZERO);
851 if (!spt) {
Changbin Duede9d0c2018-01-30 19:19:40 +0800852 if (reclaim_one_ppgtt_mm(vgpu->gvt))
Zhi Wang2707e442016-03-28 23:23:16 +0800853 goto retry;
854
Tina Zhang695fbc02017-03-10 04:26:53 -0500855 gvt_vgpu_err("fail to allocate ppgtt shadow page\n");
Zhi Wang2707e442016-03-28 23:23:16 +0800856 return ERR_PTR(-ENOMEM);
857 }
858
859 spt->vgpu = vgpu;
860 spt->guest_page_type = type;
861 atomic_set(&spt->refcount, 1);
862 INIT_LIST_HEAD(&spt->post_shadow_list);
863
864 /*
865 * TODO: guest page type may be different with shadow page type,
866 * when we support PSE page in future.
867 */
Zhi Wang22115ce2017-10-10 14:34:11 +0800868 ret = init_shadow_page(vgpu, &spt->shadow_page, type, true);
Zhi Wang2707e442016-03-28 23:23:16 +0800869 if (ret) {
Tina Zhang695fbc02017-03-10 04:26:53 -0500870 gvt_vgpu_err("fail to initialize shadow page for spt\n");
Zhi Wang2707e442016-03-28 23:23:16 +0800871 goto err;
872 }
873
Zhi Wang7d1e5cd2017-09-29 02:47:55 +0800874 ret = init_guest_page(vgpu, &spt->guest_page,
Zhi Wang2707e442016-03-28 23:23:16 +0800875 gfn, ppgtt_write_protection_handler, NULL);
876 if (ret) {
Tina Zhang695fbc02017-03-10 04:26:53 -0500877 gvt_vgpu_err("fail to initialize guest page for spt\n");
Zhi Wang2707e442016-03-28 23:23:16 +0800878 goto err;
879 }
880
881 trace_spt_alloc(vgpu->id, spt, type, spt->shadow_page.mfn, gfn);
882 return spt;
883err:
884 ppgtt_free_shadow_page(spt);
885 return ERR_PTR(ret);
886}
887
888static struct intel_vgpu_ppgtt_spt *ppgtt_find_shadow_page(
889 struct intel_vgpu *vgpu, unsigned long mfn)
890{
891 struct intel_vgpu_shadow_page *p = find_shadow_page(vgpu, mfn);
892
893 if (p)
894 return shadow_page_to_ppgtt_spt(p);
895
Tina Zhang695fbc02017-03-10 04:26:53 -0500896 gvt_vgpu_err("fail to find ppgtt shadow page: 0x%lx\n", mfn);
Zhi Wang2707e442016-03-28 23:23:16 +0800897 return NULL;
898}
899
900#define pt_entry_size_shift(spt) \
901 ((spt)->vgpu->gvt->device_info.gtt_entry_size_shift)
902
903#define pt_entries(spt) \
Zhi Wang9556e112017-10-10 13:51:32 +0800904 (I915_GTT_PAGE_SIZE >> pt_entry_size_shift(spt))
Zhi Wang2707e442016-03-28 23:23:16 +0800905
906#define for_each_present_guest_entry(spt, e, i) \
907 for (i = 0; i < pt_entries(spt); i++) \
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800908 if (!ppgtt_get_guest_entry(spt, e, i) && \
909 spt->vgpu->gvt->gtt.pte_ops->test_present(e))
Zhi Wang2707e442016-03-28 23:23:16 +0800910
911#define for_each_present_shadow_entry(spt, e, i) \
912 for (i = 0; i < pt_entries(spt); i++) \
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800913 if (!ppgtt_get_shadow_entry(spt, e, i) && \
914 spt->vgpu->gvt->gtt.pte_ops->test_present(e))
Zhi Wang2707e442016-03-28 23:23:16 +0800915
916static void ppgtt_get_shadow_page(struct intel_vgpu_ppgtt_spt *spt)
917{
918 int v = atomic_read(&spt->refcount);
919
920 trace_spt_refcount(spt->vgpu->id, "inc", spt, v, (v + 1));
921
922 atomic_inc(&spt->refcount);
923}
924
925static int ppgtt_invalidate_shadow_page(struct intel_vgpu_ppgtt_spt *spt);
926
927static int ppgtt_invalidate_shadow_page_by_shadow_entry(struct intel_vgpu *vgpu,
928 struct intel_gvt_gtt_entry *e)
929{
930 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
931 struct intel_vgpu_ppgtt_spt *s;
Ping Gao3b6411c2016-11-04 13:47:35 +0800932 intel_gvt_gtt_type_t cur_pt_type;
Zhi Wang2707e442016-03-28 23:23:16 +0800933
934 if (WARN_ON(!gtt_type_is_pt(get_next_pt_type(e->type))))
935 return -EINVAL;
936
Ping Gao3b6411c2016-11-04 13:47:35 +0800937 if (e->type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY
938 && e->type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY) {
939 cur_pt_type = get_next_pt_type(e->type) + 1;
940 if (ops->get_pfn(e) ==
941 vgpu->gtt.scratch_pt[cur_pt_type].page_mfn)
942 return 0;
943 }
Zhi Wang2707e442016-03-28 23:23:16 +0800944 s = ppgtt_find_shadow_page(vgpu, ops->get_pfn(e));
945 if (!s) {
Tina Zhang695fbc02017-03-10 04:26:53 -0500946 gvt_vgpu_err("fail to find shadow page: mfn: 0x%lx\n",
947 ops->get_pfn(e));
Zhi Wang2707e442016-03-28 23:23:16 +0800948 return -ENXIO;
949 }
950 return ppgtt_invalidate_shadow_page(s);
951}
952
953static int ppgtt_invalidate_shadow_page(struct intel_vgpu_ppgtt_spt *spt)
954{
Tina Zhang695fbc02017-03-10 04:26:53 -0500955 struct intel_vgpu *vgpu = spt->vgpu;
Zhi Wang2707e442016-03-28 23:23:16 +0800956 struct intel_gvt_gtt_entry e;
957 unsigned long index;
958 int ret;
959 int v = atomic_read(&spt->refcount);
960
961 trace_spt_change(spt->vgpu->id, "die", spt,
Zhi Wang7d1e5cd2017-09-29 02:47:55 +0800962 spt->guest_page.track.gfn, spt->shadow_page.type);
Zhi Wang2707e442016-03-28 23:23:16 +0800963
964 trace_spt_refcount(spt->vgpu->id, "dec", spt, v, (v - 1));
965
966 if (atomic_dec_return(&spt->refcount) > 0)
967 return 0;
968
969 if (gtt_type_is_pte_pt(spt->shadow_page.type))
970 goto release;
971
972 for_each_present_shadow_entry(spt, &e, index) {
973 if (!gtt_type_is_pt(get_next_pt_type(e.type))) {
Tina Zhang695fbc02017-03-10 04:26:53 -0500974 gvt_vgpu_err("GVT doesn't support pse bit for now\n");
Zhi Wang2707e442016-03-28 23:23:16 +0800975 return -EINVAL;
976 }
977 ret = ppgtt_invalidate_shadow_page_by_shadow_entry(
978 spt->vgpu, &e);
979 if (ret)
980 goto fail;
981 }
982release:
983 trace_spt_change(spt->vgpu->id, "release", spt,
Zhi Wang7d1e5cd2017-09-29 02:47:55 +0800984 spt->guest_page.track.gfn, spt->shadow_page.type);
Zhi Wang2707e442016-03-28 23:23:16 +0800985 ppgtt_free_shadow_page(spt);
986 return 0;
987fail:
Tina Zhang695fbc02017-03-10 04:26:53 -0500988 gvt_vgpu_err("fail: shadow page %p shadow entry 0x%llx type %d\n",
989 spt, e.val64, e.type);
Zhi Wang2707e442016-03-28 23:23:16 +0800990 return ret;
991}
992
993static int ppgtt_populate_shadow_page(struct intel_vgpu_ppgtt_spt *spt);
994
995static struct intel_vgpu_ppgtt_spt *ppgtt_populate_shadow_page_by_guest_entry(
996 struct intel_vgpu *vgpu, struct intel_gvt_gtt_entry *we)
997{
998 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
999 struct intel_vgpu_ppgtt_spt *s = NULL;
1000 struct intel_vgpu_guest_page *g;
Zhi Wang7d1e5cd2017-09-29 02:47:55 +08001001 struct intel_vgpu_page_track *t;
Zhi Wang2707e442016-03-28 23:23:16 +08001002 int ret;
1003
1004 if (WARN_ON(!gtt_type_is_pt(get_next_pt_type(we->type)))) {
1005 ret = -EINVAL;
1006 goto fail;
1007 }
1008
Zhi Wang7d1e5cd2017-09-29 02:47:55 +08001009 t = intel_vgpu_find_tracked_page(vgpu, ops->get_pfn(we));
1010 if (t) {
1011 g = page_track_to_guest_page(t);
Zhi Wang2707e442016-03-28 23:23:16 +08001012 s = guest_page_to_ppgtt_spt(g);
1013 ppgtt_get_shadow_page(s);
1014 } else {
1015 int type = get_next_pt_type(we->type);
1016
1017 s = ppgtt_alloc_shadow_page(vgpu, type, ops->get_pfn(we));
1018 if (IS_ERR(s)) {
1019 ret = PTR_ERR(s);
1020 goto fail;
1021 }
1022
Zhi Wang7d1e5cd2017-09-29 02:47:55 +08001023 ret = intel_gvt_hypervisor_enable_page_track(vgpu,
1024 &s->guest_page.track);
Zhi Wang2707e442016-03-28 23:23:16 +08001025 if (ret)
1026 goto fail;
1027
1028 ret = ppgtt_populate_shadow_page(s);
1029 if (ret)
1030 goto fail;
1031
Zhi Wang7d1e5cd2017-09-29 02:47:55 +08001032 trace_spt_change(vgpu->id, "new", s, s->guest_page.track.gfn,
Zhi Wang2707e442016-03-28 23:23:16 +08001033 s->shadow_page.type);
1034 }
1035 return s;
1036fail:
Tina Zhang695fbc02017-03-10 04:26:53 -05001037 gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
1038 s, we->val64, we->type);
Zhi Wang2707e442016-03-28 23:23:16 +08001039 return ERR_PTR(ret);
1040}
1041
1042static inline void ppgtt_generate_shadow_entry(struct intel_gvt_gtt_entry *se,
1043 struct intel_vgpu_ppgtt_spt *s, struct intel_gvt_gtt_entry *ge)
1044{
1045 struct intel_gvt_gtt_pte_ops *ops = s->vgpu->gvt->gtt.pte_ops;
1046
1047 se->type = ge->type;
1048 se->val64 = ge->val64;
1049
1050 ops->set_pfn(se, s->shadow_page.mfn);
1051}
1052
1053static int ppgtt_populate_shadow_page(struct intel_vgpu_ppgtt_spt *spt)
1054{
1055 struct intel_vgpu *vgpu = spt->vgpu;
Hang Yuancc753fb2017-12-22 18:06:31 +08001056 struct intel_gvt *gvt = vgpu->gvt;
1057 struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
Zhi Wang2707e442016-03-28 23:23:16 +08001058 struct intel_vgpu_ppgtt_spt *s;
1059 struct intel_gvt_gtt_entry se, ge;
Hang Yuancc753fb2017-12-22 18:06:31 +08001060 unsigned long gfn, i;
Zhi Wang2707e442016-03-28 23:23:16 +08001061 int ret;
1062
1063 trace_spt_change(spt->vgpu->id, "born", spt,
Zhi Wang7d1e5cd2017-09-29 02:47:55 +08001064 spt->guest_page.track.gfn, spt->shadow_page.type);
Zhi Wang2707e442016-03-28 23:23:16 +08001065
1066 if (gtt_type_is_pte_pt(spt->shadow_page.type)) {
1067 for_each_present_guest_entry(spt, &ge, i) {
Hang Yuancc753fb2017-12-22 18:06:31 +08001068 gfn = ops->get_pfn(&ge);
1069 if (!intel_gvt_hypervisor_is_valid_gfn(vgpu, gfn) ||
1070 gtt_entry_p2m(vgpu, &ge, &se))
1071 ops->set_pfn(&se, gvt->gtt.scratch_mfn);
Zhi Wang2707e442016-03-28 23:23:16 +08001072 ppgtt_set_shadow_entry(spt, &se, i);
1073 }
1074 return 0;
1075 }
1076
1077 for_each_present_guest_entry(spt, &ge, i) {
1078 if (!gtt_type_is_pt(get_next_pt_type(ge.type))) {
Tina Zhang695fbc02017-03-10 04:26:53 -05001079 gvt_vgpu_err("GVT doesn't support pse bit now\n");
Zhi Wang2707e442016-03-28 23:23:16 +08001080 ret = -EINVAL;
1081 goto fail;
1082 }
1083
1084 s = ppgtt_populate_shadow_page_by_guest_entry(vgpu, &ge);
1085 if (IS_ERR(s)) {
1086 ret = PTR_ERR(s);
1087 goto fail;
1088 }
1089 ppgtt_get_shadow_entry(spt, &se, i);
1090 ppgtt_generate_shadow_entry(&se, s, &ge);
1091 ppgtt_set_shadow_entry(spt, &se, i);
1092 }
1093 return 0;
1094fail:
Tina Zhang695fbc02017-03-10 04:26:53 -05001095 gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
1096 spt, ge.val64, ge.type);
Zhi Wang2707e442016-03-28 23:23:16 +08001097 return ret;
1098}
1099
1100static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_guest_page *gpt,
Tina Zhang6b3816d2017-08-14 15:24:14 +08001101 struct intel_gvt_gtt_entry *se, unsigned long index)
Zhi Wang2707e442016-03-28 23:23:16 +08001102{
1103 struct intel_vgpu_ppgtt_spt *spt = guest_page_to_ppgtt_spt(gpt);
1104 struct intel_vgpu_shadow_page *sp = &spt->shadow_page;
1105 struct intel_vgpu *vgpu = spt->vgpu;
1106 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
Zhi Wang2707e442016-03-28 23:23:16 +08001107 int ret;
1108
Tina Zhang6b3816d2017-08-14 15:24:14 +08001109 trace_gpt_change(spt->vgpu->id, "remove", spt, sp->type, se->val64,
Bing Niu9baf0922016-11-07 10:44:36 +08001110 index);
1111
Tina Zhang6b3816d2017-08-14 15:24:14 +08001112 if (!ops->test_present(se))
Zhi Wang2707e442016-03-28 23:23:16 +08001113 return 0;
1114
Tina Zhang6b3816d2017-08-14 15:24:14 +08001115 if (ops->get_pfn(se) == vgpu->gtt.scratch_pt[sp->type].page_mfn)
Zhi Wang2707e442016-03-28 23:23:16 +08001116 return 0;
1117
Tina Zhang6b3816d2017-08-14 15:24:14 +08001118 if (gtt_type_is_pt(get_next_pt_type(se->type))) {
Bing Niu9baf0922016-11-07 10:44:36 +08001119 struct intel_vgpu_ppgtt_spt *s =
Tina Zhang6b3816d2017-08-14 15:24:14 +08001120 ppgtt_find_shadow_page(vgpu, ops->get_pfn(se));
Bing Niu9baf0922016-11-07 10:44:36 +08001121 if (!s) {
Tina Zhang695fbc02017-03-10 04:26:53 -05001122 gvt_vgpu_err("fail to find guest page\n");
Zhi Wang2707e442016-03-28 23:23:16 +08001123 ret = -ENXIO;
1124 goto fail;
1125 }
Bing Niu9baf0922016-11-07 10:44:36 +08001126 ret = ppgtt_invalidate_shadow_page(s);
Zhi Wang2707e442016-03-28 23:23:16 +08001127 if (ret)
1128 goto fail;
1129 }
Zhi Wang2707e442016-03-28 23:23:16 +08001130 return 0;
1131fail:
Tina Zhang695fbc02017-03-10 04:26:53 -05001132 gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
Tina Zhang6b3816d2017-08-14 15:24:14 +08001133 spt, se->val64, se->type);
Zhi Wang2707e442016-03-28 23:23:16 +08001134 return ret;
1135}
1136
1137static int ppgtt_handle_guest_entry_add(struct intel_vgpu_guest_page *gpt,
1138 struct intel_gvt_gtt_entry *we, unsigned long index)
1139{
1140 struct intel_vgpu_ppgtt_spt *spt = guest_page_to_ppgtt_spt(gpt);
1141 struct intel_vgpu_shadow_page *sp = &spt->shadow_page;
1142 struct intel_vgpu *vgpu = spt->vgpu;
1143 struct intel_gvt_gtt_entry m;
1144 struct intel_vgpu_ppgtt_spt *s;
1145 int ret;
1146
1147 trace_gpt_change(spt->vgpu->id, "add", spt, sp->type,
1148 we->val64, index);
1149
1150 if (gtt_type_is_pt(get_next_pt_type(we->type))) {
1151 s = ppgtt_populate_shadow_page_by_guest_entry(vgpu, we);
1152 if (IS_ERR(s)) {
1153 ret = PTR_ERR(s);
1154 goto fail;
1155 }
1156 ppgtt_get_shadow_entry(spt, &m, index);
1157 ppgtt_generate_shadow_entry(&m, s, we);
1158 ppgtt_set_shadow_entry(spt, &m, index);
1159 } else {
1160 ret = gtt_entry_p2m(vgpu, we, &m);
1161 if (ret)
1162 goto fail;
1163 ppgtt_set_shadow_entry(spt, &m, index);
1164 }
1165 return 0;
1166fail:
Tina Zhang695fbc02017-03-10 04:26:53 -05001167 gvt_vgpu_err("fail: spt %p guest entry 0x%llx type %d\n",
1168 spt, we->val64, we->type);
Zhi Wang2707e442016-03-28 23:23:16 +08001169 return ret;
1170}
1171
1172static int sync_oos_page(struct intel_vgpu *vgpu,
1173 struct intel_vgpu_oos_page *oos_page)
1174{
1175 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
1176 struct intel_gvt *gvt = vgpu->gvt;
1177 struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
1178 struct intel_vgpu_ppgtt_spt *spt =
1179 guest_page_to_ppgtt_spt(oos_page->guest_page);
1180 struct intel_gvt_gtt_entry old, new, m;
1181 int index;
1182 int ret;
1183
1184 trace_oos_change(vgpu->id, "sync", oos_page->id,
1185 oos_page->guest_page, spt->guest_page_type);
1186
1187 old.type = new.type = get_entry_type(spt->guest_page_type);
1188 old.val64 = new.val64 = 0;
1189
Zhi Wang9556e112017-10-10 13:51:32 +08001190 for (index = 0; index < (I915_GTT_PAGE_SIZE >>
1191 info->gtt_entry_size_shift); index++) {
Zhi Wang2707e442016-03-28 23:23:16 +08001192 ops->get_entry(oos_page->mem, &old, index, false, 0, vgpu);
1193 ops->get_entry(NULL, &new, index, true,
Zhi Wang7d1e5cd2017-09-29 02:47:55 +08001194 oos_page->guest_page->track.gfn << PAGE_SHIFT, vgpu);
Zhi Wang2707e442016-03-28 23:23:16 +08001195
1196 if (old.val64 == new.val64
1197 && !test_and_clear_bit(index, spt->post_shadow_bitmap))
1198 continue;
1199
1200 trace_oos_sync(vgpu->id, oos_page->id,
1201 oos_page->guest_page, spt->guest_page_type,
1202 new.val64, index);
1203
1204 ret = gtt_entry_p2m(vgpu, &new, &m);
1205 if (ret)
1206 return ret;
1207
1208 ops->set_entry(oos_page->mem, &new, index, false, 0, vgpu);
1209 ppgtt_set_shadow_entry(spt, &m, index);
1210 }
1211
1212 oos_page->guest_page->write_cnt = 0;
1213 list_del_init(&spt->post_shadow_list);
1214 return 0;
1215}
1216
1217static int detach_oos_page(struct intel_vgpu *vgpu,
1218 struct intel_vgpu_oos_page *oos_page)
1219{
1220 struct intel_gvt *gvt = vgpu->gvt;
1221 struct intel_vgpu_ppgtt_spt *spt =
1222 guest_page_to_ppgtt_spt(oos_page->guest_page);
1223
1224 trace_oos_change(vgpu->id, "detach", oos_page->id,
1225 oos_page->guest_page, spt->guest_page_type);
1226
1227 oos_page->guest_page->write_cnt = 0;
1228 oos_page->guest_page->oos_page = NULL;
1229 oos_page->guest_page = NULL;
1230
1231 list_del_init(&oos_page->vm_list);
1232 list_move_tail(&oos_page->list, &gvt->gtt.oos_page_free_list_head);
1233
1234 return 0;
1235}
1236
1237static int attach_oos_page(struct intel_vgpu *vgpu,
1238 struct intel_vgpu_oos_page *oos_page,
1239 struct intel_vgpu_guest_page *gpt)
1240{
1241 struct intel_gvt *gvt = vgpu->gvt;
1242 int ret;
1243
Zhi Wang7d1e5cd2017-09-29 02:47:55 +08001244 ret = intel_gvt_hypervisor_read_gpa(vgpu,
Zhi Wang9556e112017-10-10 13:51:32 +08001245 gpt->track.gfn << I915_GTT_PAGE_SHIFT,
1246 oos_page->mem, I915_GTT_PAGE_SIZE);
Zhi Wang2707e442016-03-28 23:23:16 +08001247 if (ret)
1248 return ret;
1249
1250 oos_page->guest_page = gpt;
1251 gpt->oos_page = oos_page;
1252
1253 list_move_tail(&oos_page->list, &gvt->gtt.oos_page_use_list_head);
1254
1255 trace_oos_change(vgpu->id, "attach", gpt->oos_page->id,
1256 gpt, guest_page_to_ppgtt_spt(gpt)->guest_page_type);
1257 return 0;
1258}
1259
1260static int ppgtt_set_guest_page_sync(struct intel_vgpu *vgpu,
1261 struct intel_vgpu_guest_page *gpt)
1262{
1263 int ret;
1264
Zhi Wang7d1e5cd2017-09-29 02:47:55 +08001265 ret = intel_gvt_hypervisor_enable_page_track(vgpu, &gpt->track);
Zhi Wang2707e442016-03-28 23:23:16 +08001266 if (ret)
1267 return ret;
1268
1269 trace_oos_change(vgpu->id, "set page sync", gpt->oos_page->id,
1270 gpt, guest_page_to_ppgtt_spt(gpt)->guest_page_type);
1271
1272 list_del_init(&gpt->oos_page->vm_list);
1273 return sync_oos_page(vgpu, gpt->oos_page);
1274}
1275
1276static int ppgtt_allocate_oos_page(struct intel_vgpu *vgpu,
1277 struct intel_vgpu_guest_page *gpt)
1278{
1279 struct intel_gvt *gvt = vgpu->gvt;
1280 struct intel_gvt_gtt *gtt = &gvt->gtt;
1281 struct intel_vgpu_oos_page *oos_page = gpt->oos_page;
1282 int ret;
1283
1284 WARN(oos_page, "shadow PPGTT page has already has a oos page\n");
1285
1286 if (list_empty(&gtt->oos_page_free_list_head)) {
1287 oos_page = container_of(gtt->oos_page_use_list_head.next,
1288 struct intel_vgpu_oos_page, list);
1289 ret = ppgtt_set_guest_page_sync(vgpu, oos_page->guest_page);
1290 if (ret)
1291 return ret;
1292 ret = detach_oos_page(vgpu, oos_page);
1293 if (ret)
1294 return ret;
1295 } else
1296 oos_page = container_of(gtt->oos_page_free_list_head.next,
1297 struct intel_vgpu_oos_page, list);
1298 return attach_oos_page(vgpu, oos_page, gpt);
1299}
1300
1301static int ppgtt_set_guest_page_oos(struct intel_vgpu *vgpu,
1302 struct intel_vgpu_guest_page *gpt)
1303{
1304 struct intel_vgpu_oos_page *oos_page = gpt->oos_page;
1305
1306 if (WARN(!oos_page, "shadow PPGTT page should have a oos page\n"))
1307 return -EINVAL;
1308
1309 trace_oos_change(vgpu->id, "set page out of sync", gpt->oos_page->id,
1310 gpt, guest_page_to_ppgtt_spt(gpt)->guest_page_type);
1311
1312 list_add_tail(&oos_page->vm_list, &vgpu->gtt.oos_page_list_head);
Zhi Wang7d1e5cd2017-09-29 02:47:55 +08001313 return intel_gvt_hypervisor_disable_page_track(vgpu, &gpt->track);
Zhi Wang2707e442016-03-28 23:23:16 +08001314}
1315
1316/**
1317 * intel_vgpu_sync_oos_pages - sync all the out-of-synced shadow for vGPU
1318 * @vgpu: a vGPU
1319 *
1320 * This function is called before submitting a guest workload to host,
1321 * to sync all the out-of-synced shadow for vGPU
1322 *
1323 * Returns:
1324 * Zero on success, negative error code if failed.
1325 */
1326int intel_vgpu_sync_oos_pages(struct intel_vgpu *vgpu)
1327{
1328 struct list_head *pos, *n;
1329 struct intel_vgpu_oos_page *oos_page;
1330 int ret;
1331
1332 if (!enable_out_of_sync)
1333 return 0;
1334
1335 list_for_each_safe(pos, n, &vgpu->gtt.oos_page_list_head) {
1336 oos_page = container_of(pos,
1337 struct intel_vgpu_oos_page, vm_list);
1338 ret = ppgtt_set_guest_page_sync(vgpu, oos_page->guest_page);
1339 if (ret)
1340 return ret;
1341 }
1342 return 0;
1343}
1344
1345/*
1346 * The heart of PPGTT shadow page table.
1347 */
1348static int ppgtt_handle_guest_write_page_table(
1349 struct intel_vgpu_guest_page *gpt,
1350 struct intel_gvt_gtt_entry *we, unsigned long index)
1351{
1352 struct intel_vgpu_ppgtt_spt *spt = guest_page_to_ppgtt_spt(gpt);
1353 struct intel_vgpu *vgpu = spt->vgpu;
Tina Zhang6b3816d2017-08-14 15:24:14 +08001354 int type = spt->shadow_page.type;
Zhi Wang2707e442016-03-28 23:23:16 +08001355 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
Tina Zhang6b3816d2017-08-14 15:24:14 +08001356 struct intel_gvt_gtt_entry se;
Zhi Wang2707e442016-03-28 23:23:16 +08001357
Zhi Wang2707e442016-03-28 23:23:16 +08001358 int ret;
Bing Niu9baf0922016-11-07 10:44:36 +08001359 int new_present;
Zhi Wang2707e442016-03-28 23:23:16 +08001360
Zhi Wang2707e442016-03-28 23:23:16 +08001361 new_present = ops->test_present(we);
1362
Tina Zhang6b3816d2017-08-14 15:24:14 +08001363 /*
1364 * Adding the new entry first and then removing the old one, that can
1365 * guarantee the ppgtt table is validated during the window between
1366 * adding and removal.
1367 */
1368 ppgtt_get_shadow_entry(spt, &se, index);
Zhi Wang2707e442016-03-28 23:23:16 +08001369
Zhi Wang2707e442016-03-28 23:23:16 +08001370 if (new_present) {
1371 ret = ppgtt_handle_guest_entry_add(gpt, we, index);
1372 if (ret)
1373 goto fail;
1374 }
Tina Zhang6b3816d2017-08-14 15:24:14 +08001375
1376 ret = ppgtt_handle_guest_entry_removal(gpt, &se, index);
1377 if (ret)
1378 goto fail;
1379
1380 if (!new_present) {
1381 ops->set_pfn(&se, vgpu->gtt.scratch_pt[type].page_mfn);
1382 ppgtt_set_shadow_entry(spt, &se, index);
1383 }
1384
Zhi Wang2707e442016-03-28 23:23:16 +08001385 return 0;
1386fail:
Tina Zhang695fbc02017-03-10 04:26:53 -05001387 gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d.\n",
1388 spt, we->val64, we->type);
Zhi Wang2707e442016-03-28 23:23:16 +08001389 return ret;
1390}
1391
1392static inline bool can_do_out_of_sync(struct intel_vgpu_guest_page *gpt)
1393{
1394 return enable_out_of_sync
1395 && gtt_type_is_pte_pt(
1396 guest_page_to_ppgtt_spt(gpt)->guest_page_type)
1397 && gpt->write_cnt >= 2;
1398}
1399
1400static void ppgtt_set_post_shadow(struct intel_vgpu_ppgtt_spt *spt,
1401 unsigned long index)
1402{
1403 set_bit(index, spt->post_shadow_bitmap);
1404 if (!list_empty(&spt->post_shadow_list))
1405 return;
1406
1407 list_add_tail(&spt->post_shadow_list,
1408 &spt->vgpu->gtt.post_shadow_list_head);
1409}
1410
1411/**
1412 * intel_vgpu_flush_post_shadow - flush the post shadow transactions
1413 * @vgpu: a vGPU
1414 *
1415 * This function is called before submitting a guest workload to host,
1416 * to flush all the post shadows for a vGPU.
1417 *
1418 * Returns:
1419 * Zero on success, negative error code if failed.
1420 */
1421int intel_vgpu_flush_post_shadow(struct intel_vgpu *vgpu)
1422{
1423 struct list_head *pos, *n;
1424 struct intel_vgpu_ppgtt_spt *spt;
Bing Niu9baf0922016-11-07 10:44:36 +08001425 struct intel_gvt_gtt_entry ge;
Zhi Wang2707e442016-03-28 23:23:16 +08001426 unsigned long index;
1427 int ret;
1428
1429 list_for_each_safe(pos, n, &vgpu->gtt.post_shadow_list_head) {
1430 spt = container_of(pos, struct intel_vgpu_ppgtt_spt,
1431 post_shadow_list);
1432
1433 for_each_set_bit(index, spt->post_shadow_bitmap,
1434 GTT_ENTRY_NUM_IN_ONE_PAGE) {
1435 ppgtt_get_guest_entry(spt, &ge, index);
Zhi Wang2707e442016-03-28 23:23:16 +08001436
1437 ret = ppgtt_handle_guest_write_page_table(
1438 &spt->guest_page, &ge, index);
1439 if (ret)
1440 return ret;
1441 clear_bit(index, spt->post_shadow_bitmap);
1442 }
1443 list_del_init(&spt->post_shadow_list);
1444 }
1445 return 0;
1446}
1447
Zhi Wang7d1e5cd2017-09-29 02:47:55 +08001448static int ppgtt_handle_guest_write_page_table_bytes(
1449 struct intel_vgpu_guest_page *gpt,
Zhi Wang2707e442016-03-28 23:23:16 +08001450 u64 pa, void *p_data, int bytes)
1451{
Zhi Wang2707e442016-03-28 23:23:16 +08001452 struct intel_vgpu_ppgtt_spt *spt = guest_page_to_ppgtt_spt(gpt);
1453 struct intel_vgpu *vgpu = spt->vgpu;
1454 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1455 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
Tina Zhang6b3816d2017-08-14 15:24:14 +08001456 struct intel_gvt_gtt_entry we, se;
Zhi Wang2707e442016-03-28 23:23:16 +08001457 unsigned long index;
1458 int ret;
1459
1460 index = (pa & (PAGE_SIZE - 1)) >> info->gtt_entry_size_shift;
1461
1462 ppgtt_get_guest_entry(spt, &we, index);
Zhi Wang2707e442016-03-28 23:23:16 +08001463
1464 ops->test_pse(&we);
1465
1466 if (bytes == info->gtt_entry_size) {
1467 ret = ppgtt_handle_guest_write_page_table(gpt, &we, index);
1468 if (ret)
1469 return ret;
1470 } else {
Zhi Wang2707e442016-03-28 23:23:16 +08001471 if (!test_bit(index, spt->post_shadow_bitmap)) {
Zhi Wang121d760d2017-12-29 02:50:08 +08001472 int type = spt->shadow_page.type;
1473
Tina Zhang6b3816d2017-08-14 15:24:14 +08001474 ppgtt_get_shadow_entry(spt, &se, index);
1475 ret = ppgtt_handle_guest_entry_removal(gpt, &se, index);
Zhi Wang2707e442016-03-28 23:23:16 +08001476 if (ret)
1477 return ret;
Zhi Wang121d760d2017-12-29 02:50:08 +08001478 ops->set_pfn(&se, vgpu->gtt.scratch_pt[type].page_mfn);
1479 ppgtt_set_shadow_entry(spt, &se, index);
Zhi Wang2707e442016-03-28 23:23:16 +08001480 }
Zhi Wang2707e442016-03-28 23:23:16 +08001481 ppgtt_set_post_shadow(spt, index);
Zhi Wang2707e442016-03-28 23:23:16 +08001482 }
1483
1484 if (!enable_out_of_sync)
1485 return 0;
1486
1487 gpt->write_cnt++;
1488
1489 if (gpt->oos_page)
1490 ops->set_entry(gpt->oos_page->mem, &we, index,
1491 false, 0, vgpu);
1492
1493 if (can_do_out_of_sync(gpt)) {
1494 if (!gpt->oos_page)
1495 ppgtt_allocate_oos_page(vgpu, gpt);
1496
1497 ret = ppgtt_set_guest_page_oos(vgpu, gpt);
1498 if (ret < 0)
1499 return ret;
1500 }
1501 return 0;
1502}
1503
Changbin Duede9d0c2018-01-30 19:19:40 +08001504static void invalidate_ppgtt_mm(struct intel_vgpu_mm *mm)
Zhi Wang2707e442016-03-28 23:23:16 +08001505{
1506 struct intel_vgpu *vgpu = mm->vgpu;
1507 struct intel_gvt *gvt = vgpu->gvt;
1508 struct intel_gvt_gtt *gtt = &gvt->gtt;
1509 struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops;
1510 struct intel_gvt_gtt_entry se;
Changbin Duede9d0c2018-01-30 19:19:40 +08001511 int index;
Zhi Wang2707e442016-03-28 23:23:16 +08001512
Changbin Duede9d0c2018-01-30 19:19:40 +08001513 if (!mm->ppgtt_mm.shadowed)
Zhi Wang2707e442016-03-28 23:23:16 +08001514 return;
1515
Changbin Duede9d0c2018-01-30 19:19:40 +08001516 for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.shadow_pdps); index++) {
1517 ppgtt_get_shadow_root_entry(mm, &se, index);
1518
Zhi Wang2707e442016-03-28 23:23:16 +08001519 if (!ops->test_present(&se))
1520 continue;
Changbin Duede9d0c2018-01-30 19:19:40 +08001521
1522 ppgtt_invalidate_shadow_page_by_shadow_entry(vgpu, &se);
Zhi Wang2707e442016-03-28 23:23:16 +08001523 se.val64 = 0;
Changbin Duede9d0c2018-01-30 19:19:40 +08001524 ppgtt_set_shadow_root_entry(mm, &se, index);
Zhi Wang2707e442016-03-28 23:23:16 +08001525
1526 trace_gpt_change(vgpu->id, "destroy root pointer",
Changbin Duede9d0c2018-01-30 19:19:40 +08001527 NULL, se.type, se.val64, index);
Zhi Wang2707e442016-03-28 23:23:16 +08001528 }
Changbin Duede9d0c2018-01-30 19:19:40 +08001529
1530 mm->ppgtt_mm.shadowed = false;
Zhi Wang2707e442016-03-28 23:23:16 +08001531}
1532
Zhi Wang2707e442016-03-28 23:23:16 +08001533
Changbin Duede9d0c2018-01-30 19:19:40 +08001534static int shadow_ppgtt_mm(struct intel_vgpu_mm *mm)
Zhi Wang2707e442016-03-28 23:23:16 +08001535{
1536 struct intel_vgpu *vgpu = mm->vgpu;
1537 struct intel_gvt *gvt = vgpu->gvt;
1538 struct intel_gvt_gtt *gtt = &gvt->gtt;
1539 struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops;
1540 struct intel_vgpu_ppgtt_spt *spt;
1541 struct intel_gvt_gtt_entry ge, se;
Changbin Duede9d0c2018-01-30 19:19:40 +08001542 int index, ret;
Zhi Wang2707e442016-03-28 23:23:16 +08001543
Changbin Duede9d0c2018-01-30 19:19:40 +08001544 if (mm->ppgtt_mm.shadowed)
Zhi Wang2707e442016-03-28 23:23:16 +08001545 return 0;
1546
Changbin Duede9d0c2018-01-30 19:19:40 +08001547 mm->ppgtt_mm.shadowed = true;
Zhi Wang2707e442016-03-28 23:23:16 +08001548
Changbin Duede9d0c2018-01-30 19:19:40 +08001549 for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.guest_pdps); index++) {
1550 ppgtt_get_guest_root_entry(mm, &ge, index);
1551
Zhi Wang2707e442016-03-28 23:23:16 +08001552 if (!ops->test_present(&ge))
1553 continue;
1554
1555 trace_gpt_change(vgpu->id, __func__, NULL,
Changbin Duede9d0c2018-01-30 19:19:40 +08001556 ge.type, ge.val64, index);
Zhi Wang2707e442016-03-28 23:23:16 +08001557
1558 spt = ppgtt_populate_shadow_page_by_guest_entry(vgpu, &ge);
1559 if (IS_ERR(spt)) {
Tina Zhang695fbc02017-03-10 04:26:53 -05001560 gvt_vgpu_err("fail to populate guest root pointer\n");
Zhi Wang2707e442016-03-28 23:23:16 +08001561 ret = PTR_ERR(spt);
1562 goto fail;
1563 }
1564 ppgtt_generate_shadow_entry(&se, spt, &ge);
Changbin Duede9d0c2018-01-30 19:19:40 +08001565 ppgtt_set_shadow_root_entry(mm, &se, index);
Zhi Wang2707e442016-03-28 23:23:16 +08001566
1567 trace_gpt_change(vgpu->id, "populate root pointer",
Changbin Duede9d0c2018-01-30 19:19:40 +08001568 NULL, se.type, se.val64, index);
Zhi Wang2707e442016-03-28 23:23:16 +08001569 }
Changbin Duede9d0c2018-01-30 19:19:40 +08001570
Zhi Wang2707e442016-03-28 23:23:16 +08001571 return 0;
1572fail:
Changbin Duede9d0c2018-01-30 19:19:40 +08001573 invalidate_ppgtt_mm(mm);
Zhi Wang2707e442016-03-28 23:23:16 +08001574 return ret;
1575}
1576
Changbin Duede9d0c2018-01-30 19:19:40 +08001577static struct intel_vgpu_mm *vgpu_alloc_mm(struct intel_vgpu *vgpu)
1578{
1579 struct intel_vgpu_mm *mm;
1580
1581 mm = kzalloc(sizeof(*mm), GFP_KERNEL);
1582 if (!mm)
1583 return NULL;
1584
1585 mm->vgpu = vgpu;
1586 kref_init(&mm->ref);
1587 atomic_set(&mm->pincount, 0);
1588
1589 return mm;
1590}
1591
1592static void vgpu_free_mm(struct intel_vgpu_mm *mm)
1593{
1594 kfree(mm);
1595}
1596
Zhi Wang2707e442016-03-28 23:23:16 +08001597/**
Changbin Duede9d0c2018-01-30 19:19:40 +08001598 * intel_vgpu_create_ppgtt_mm - create a ppgtt mm object for a vGPU
Zhi Wang2707e442016-03-28 23:23:16 +08001599 * @vgpu: a vGPU
Changbin Duede9d0c2018-01-30 19:19:40 +08001600 * @root_entry_type: ppgtt root entry type
1601 * @pdps: guest pdps.
Zhi Wang2707e442016-03-28 23:23:16 +08001602 *
Changbin Duede9d0c2018-01-30 19:19:40 +08001603 * This function is used to create a ppgtt mm object for a vGPU.
Zhi Wang2707e442016-03-28 23:23:16 +08001604 *
1605 * Returns:
1606 * Zero on success, negative error code in pointer if failed.
1607 */
Changbin Duede9d0c2018-01-30 19:19:40 +08001608struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu,
1609 intel_gvt_gtt_type_t root_entry_type, u64 pdps[])
Zhi Wang2707e442016-03-28 23:23:16 +08001610{
1611 struct intel_gvt *gvt = vgpu->gvt;
Zhi Wang2707e442016-03-28 23:23:16 +08001612 struct intel_vgpu_mm *mm;
1613 int ret;
1614
Changbin Duede9d0c2018-01-30 19:19:40 +08001615 mm = vgpu_alloc_mm(vgpu);
1616 if (!mm)
1617 return ERR_PTR(-ENOMEM);
Zhi Wang2707e442016-03-28 23:23:16 +08001618
Changbin Duede9d0c2018-01-30 19:19:40 +08001619 mm->type = INTEL_GVT_MM_PPGTT;
Zhi Wang2707e442016-03-28 23:23:16 +08001620
Changbin Duede9d0c2018-01-30 19:19:40 +08001621 GEM_BUG_ON(root_entry_type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY &&
1622 root_entry_type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY);
1623 mm->ppgtt_mm.root_entry_type = root_entry_type;
Zhi Wang2707e442016-03-28 23:23:16 +08001624
Changbin Duede9d0c2018-01-30 19:19:40 +08001625 INIT_LIST_HEAD(&mm->ppgtt_mm.list);
1626 INIT_LIST_HEAD(&mm->ppgtt_mm.lru_list);
Zhi Wang2707e442016-03-28 23:23:16 +08001627
Changbin Duede9d0c2018-01-30 19:19:40 +08001628 if (root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY)
1629 mm->ppgtt_mm.guest_pdps[0] = pdps[0];
1630 else
1631 memcpy(mm->ppgtt_mm.guest_pdps, pdps,
1632 sizeof(mm->ppgtt_mm.guest_pdps));
Zhi Wang2707e442016-03-28 23:23:16 +08001633
Changbin Duede9d0c2018-01-30 19:19:40 +08001634 ret = shadow_ppgtt_mm(mm);
Zhi Wang2707e442016-03-28 23:23:16 +08001635 if (ret) {
Changbin Duede9d0c2018-01-30 19:19:40 +08001636 gvt_vgpu_err("failed to shadow ppgtt mm\n");
1637 vgpu_free_mm(mm);
1638 return ERR_PTR(ret);
Zhi Wang2707e442016-03-28 23:23:16 +08001639 }
1640
Changbin Duede9d0c2018-01-30 19:19:40 +08001641 list_add_tail(&mm->ppgtt_mm.list, &vgpu->gtt.ppgtt_mm_list_head);
1642 list_add_tail(&mm->ppgtt_mm.lru_list, &gvt->gtt.ppgtt_mm_lru_list_head);
Zhi Wang2707e442016-03-28 23:23:16 +08001643 return mm;
Changbin Duede9d0c2018-01-30 19:19:40 +08001644}
1645
1646static struct intel_vgpu_mm *intel_vgpu_create_ggtt_mm(struct intel_vgpu *vgpu)
1647{
1648 struct intel_vgpu_mm *mm;
1649 unsigned long nr_entries;
1650
1651 mm = vgpu_alloc_mm(vgpu);
1652 if (!mm)
1653 return ERR_PTR(-ENOMEM);
1654
1655 mm->type = INTEL_GVT_MM_GGTT;
1656
1657 nr_entries = gvt_ggtt_gm_sz(vgpu->gvt) >> I915_GTT_PAGE_SHIFT;
1658 mm->ggtt_mm.virtual_ggtt = vzalloc(nr_entries *
1659 vgpu->gvt->device_info.gtt_entry_size);
1660 if (!mm->ggtt_mm.virtual_ggtt) {
1661 vgpu_free_mm(mm);
1662 return ERR_PTR(-ENOMEM);
1663 }
1664
1665 return mm;
1666}
1667
1668/**
Changbin Du1bc25852018-01-30 19:19:41 +08001669 * _intel_vgpu_mm_release - destroy a mm object
Changbin Duede9d0c2018-01-30 19:19:40 +08001670 * @mm_ref: a kref object
1671 *
1672 * This function is used to destroy a mm object for vGPU
1673 *
1674 */
Changbin Du1bc25852018-01-30 19:19:41 +08001675void _intel_vgpu_mm_release(struct kref *mm_ref)
Changbin Duede9d0c2018-01-30 19:19:40 +08001676{
1677 struct intel_vgpu_mm *mm = container_of(mm_ref, typeof(*mm), ref);
1678
1679 if (GEM_WARN_ON(atomic_read(&mm->pincount)))
1680 gvt_err("vgpu mm pin count bug detected\n");
1681
1682 if (mm->type == INTEL_GVT_MM_PPGTT) {
1683 list_del(&mm->ppgtt_mm.list);
1684 list_del(&mm->ppgtt_mm.lru_list);
1685 invalidate_ppgtt_mm(mm);
1686 } else {
1687 vfree(mm->ggtt_mm.virtual_ggtt);
1688 }
1689
1690 vgpu_free_mm(mm);
Zhi Wang2707e442016-03-28 23:23:16 +08001691}
1692
1693/**
1694 * intel_vgpu_unpin_mm - decrease the pin count of a vGPU mm object
1695 * @mm: a vGPU mm object
1696 *
1697 * This function is called when user doesn't want to use a vGPU mm object
1698 */
1699void intel_vgpu_unpin_mm(struct intel_vgpu_mm *mm)
1700{
Zhi Wang2707e442016-03-28 23:23:16 +08001701 atomic_dec(&mm->pincount);
1702}
1703
1704/**
1705 * intel_vgpu_pin_mm - increase the pin count of a vGPU mm object
1706 * @vgpu: a vGPU
1707 *
1708 * This function is called when user wants to use a vGPU mm object. If this
1709 * mm object hasn't been shadowed yet, the shadow will be populated at this
1710 * time.
1711 *
1712 * Returns:
1713 * Zero on success, negative error code if failed.
1714 */
1715int intel_vgpu_pin_mm(struct intel_vgpu_mm *mm)
1716{
1717 int ret;
1718
Changbin Duede9d0c2018-01-30 19:19:40 +08001719 atomic_inc(&mm->pincount);
Zhi Wang2707e442016-03-28 23:23:16 +08001720
Changbin Duede9d0c2018-01-30 19:19:40 +08001721 if (mm->type == INTEL_GVT_MM_PPGTT) {
1722 ret = shadow_ppgtt_mm(mm);
Zhi Wang2707e442016-03-28 23:23:16 +08001723 if (ret)
1724 return ret;
Changbin Duede9d0c2018-01-30 19:19:40 +08001725
1726 list_move_tail(&mm->ppgtt_mm.lru_list,
1727 &mm->vgpu->gvt->gtt.ppgtt_mm_lru_list_head);
1728
Zhi Wang2707e442016-03-28 23:23:16 +08001729 }
1730
Zhi Wang2707e442016-03-28 23:23:16 +08001731 return 0;
1732}
1733
Changbin Duede9d0c2018-01-30 19:19:40 +08001734static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt)
Zhi Wang2707e442016-03-28 23:23:16 +08001735{
1736 struct intel_vgpu_mm *mm;
1737 struct list_head *pos, *n;
1738
Changbin Duede9d0c2018-01-30 19:19:40 +08001739 list_for_each_safe(pos, n, &gvt->gtt.ppgtt_mm_lru_list_head) {
1740 mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.lru_list);
Zhi Wang2707e442016-03-28 23:23:16 +08001741
Zhi Wang2707e442016-03-28 23:23:16 +08001742 if (atomic_read(&mm->pincount))
1743 continue;
1744
Changbin Duede9d0c2018-01-30 19:19:40 +08001745 list_del_init(&mm->ppgtt_mm.lru_list);
1746 invalidate_ppgtt_mm(mm);
Zhi Wang2707e442016-03-28 23:23:16 +08001747 return 1;
1748 }
1749 return 0;
1750}
1751
1752/*
1753 * GMA translation APIs.
1754 */
1755static inline int ppgtt_get_next_level_entry(struct intel_vgpu_mm *mm,
1756 struct intel_gvt_gtt_entry *e, unsigned long index, bool guest)
1757{
1758 struct intel_vgpu *vgpu = mm->vgpu;
1759 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1760 struct intel_vgpu_ppgtt_spt *s;
1761
Zhi Wang2707e442016-03-28 23:23:16 +08001762 s = ppgtt_find_shadow_page(vgpu, ops->get_pfn(e));
1763 if (!s)
1764 return -ENXIO;
1765
1766 if (!guest)
1767 ppgtt_get_shadow_entry(s, e, index);
1768 else
1769 ppgtt_get_guest_entry(s, e, index);
1770 return 0;
1771}
1772
1773/**
1774 * intel_vgpu_gma_to_gpa - translate a gma to GPA
1775 * @mm: mm object. could be a PPGTT or GGTT mm object
1776 * @gma: graphics memory address in this mm object
1777 *
1778 * This function is used to translate a graphics memory address in specific
1779 * graphics memory space to guest physical address.
1780 *
1781 * Returns:
1782 * Guest physical address on success, INTEL_GVT_INVALID_ADDR if failed.
1783 */
1784unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma)
1785{
1786 struct intel_vgpu *vgpu = mm->vgpu;
1787 struct intel_gvt *gvt = vgpu->gvt;
1788 struct intel_gvt_gtt_pte_ops *pte_ops = gvt->gtt.pte_ops;
1789 struct intel_gvt_gtt_gma_ops *gma_ops = gvt->gtt.gma_ops;
1790 unsigned long gpa = INTEL_GVT_INVALID_ADDR;
1791 unsigned long gma_index[4];
1792 struct intel_gvt_gtt_entry e;
Changbin Duede9d0c2018-01-30 19:19:40 +08001793 int i, levels = 0;
Zhi Wang2707e442016-03-28 23:23:16 +08001794 int ret;
1795
Changbin Duede9d0c2018-01-30 19:19:40 +08001796 GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT &&
1797 mm->type != INTEL_GVT_MM_PPGTT);
Zhi Wang2707e442016-03-28 23:23:16 +08001798
1799 if (mm->type == INTEL_GVT_MM_GGTT) {
1800 if (!vgpu_gmadr_is_valid(vgpu, gma))
1801 goto err;
1802
Changbin Duede9d0c2018-01-30 19:19:40 +08001803 ggtt_get_guest_entry(mm, &e,
1804 gma_ops->gma_to_ggtt_pte_index(gma));
1805
Zhi Wang9556e112017-10-10 13:51:32 +08001806 gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT)
1807 + (gma & ~I915_GTT_PAGE_MASK);
Zhi Wang2707e442016-03-28 23:23:16 +08001808
1809 trace_gma_translate(vgpu->id, "ggtt", 0, 0, gma, gpa);
Changbin Duede9d0c2018-01-30 19:19:40 +08001810 } else {
1811 switch (mm->ppgtt_mm.root_entry_type) {
1812 case GTT_TYPE_PPGTT_ROOT_L4_ENTRY:
1813 ppgtt_get_shadow_root_entry(mm, &e, 0);
Zhi Wang2707e442016-03-28 23:23:16 +08001814
Changbin Duede9d0c2018-01-30 19:19:40 +08001815 gma_index[0] = gma_ops->gma_to_pml4_index(gma);
1816 gma_index[1] = gma_ops->gma_to_l4_pdp_index(gma);
1817 gma_index[2] = gma_ops->gma_to_pde_index(gma);
1818 gma_index[3] = gma_ops->gma_to_pte_index(gma);
1819 levels = 4;
1820 break;
1821 case GTT_TYPE_PPGTT_ROOT_L3_ENTRY:
1822 ppgtt_get_shadow_root_entry(mm, &e,
1823 gma_ops->gma_to_l3_pdp_index(gma));
Zhi Wang2707e442016-03-28 23:23:16 +08001824
Changbin Duede9d0c2018-01-30 19:19:40 +08001825 gma_index[0] = gma_ops->gma_to_pde_index(gma);
1826 gma_index[1] = gma_ops->gma_to_pte_index(gma);
1827 levels = 2;
1828 break;
1829 default:
1830 GEM_BUG_ON(1);
Changbin Du4b2dbbc2017-08-02 15:06:37 +08001831 }
Changbin Duede9d0c2018-01-30 19:19:40 +08001832
1833 /* walk the shadow page table and get gpa from guest entry */
1834 for (i = 0; i < levels; i++) {
1835 ret = ppgtt_get_next_level_entry(mm, &e, gma_index[i],
1836 (i == levels - 1));
1837 if (ret)
1838 goto err;
1839
1840 if (!pte_ops->test_present(&e)) {
1841 gvt_dbg_core("GMA 0x%lx is not present\n", gma);
1842 goto err;
1843 }
1844 }
1845
1846 gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT) +
1847 (gma & ~I915_GTT_PAGE_MASK);
1848 trace_gma_translate(vgpu->id, "ppgtt", 0,
1849 mm->ppgtt_mm.root_entry_type, gma, gpa);
Zhi Wang2707e442016-03-28 23:23:16 +08001850 }
1851
Zhi Wang2707e442016-03-28 23:23:16 +08001852 return gpa;
1853err:
Tina Zhang695fbc02017-03-10 04:26:53 -05001854 gvt_vgpu_err("invalid mm type: %d gma %lx\n", mm->type, gma);
Zhi Wang2707e442016-03-28 23:23:16 +08001855 return INTEL_GVT_INVALID_ADDR;
1856}
1857
1858static int emulate_gtt_mmio_read(struct intel_vgpu *vgpu,
1859 unsigned int off, void *p_data, unsigned int bytes)
1860{
1861 struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm;
1862 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
1863 unsigned long index = off >> info->gtt_entry_size_shift;
1864 struct intel_gvt_gtt_entry e;
1865
1866 if (bytes != 4 && bytes != 8)
1867 return -EINVAL;
1868
1869 ggtt_get_guest_entry(ggtt_mm, &e, index);
1870 memcpy(p_data, (void *)&e.val64 + (off & (info->gtt_entry_size - 1)),
1871 bytes);
1872 return 0;
1873}
1874
1875/**
1876 * intel_vgpu_emulate_gtt_mmio_read - emulate GTT MMIO register read
1877 * @vgpu: a vGPU
1878 * @off: register offset
1879 * @p_data: data will be returned to guest
1880 * @bytes: data length
1881 *
1882 * This function is used to emulate the GTT MMIO register read
1883 *
1884 * Returns:
1885 * Zero on success, error code if failed.
1886 */
1887int intel_vgpu_emulate_gtt_mmio_read(struct intel_vgpu *vgpu, unsigned int off,
1888 void *p_data, unsigned int bytes)
1889{
1890 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
1891 int ret;
1892
1893 if (bytes != 4 && bytes != 8)
1894 return -EINVAL;
1895
1896 off -= info->gtt_start_offset;
1897 ret = emulate_gtt_mmio_read(vgpu, off, p_data, bytes);
1898 return ret;
1899}
1900
1901static int emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
1902 void *p_data, unsigned int bytes)
1903{
1904 struct intel_gvt *gvt = vgpu->gvt;
1905 const struct intel_gvt_device_info *info = &gvt->device_info;
1906 struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm;
1907 struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
1908 unsigned long g_gtt_index = off >> info->gtt_entry_size_shift;
Hang Yuancc753fb2017-12-22 18:06:31 +08001909 unsigned long gma, gfn;
Zhi Wang2707e442016-03-28 23:23:16 +08001910 struct intel_gvt_gtt_entry e, m;
1911 int ret;
1912
1913 if (bytes != 4 && bytes != 8)
1914 return -EINVAL;
1915
Zhi Wang9556e112017-10-10 13:51:32 +08001916 gma = g_gtt_index << I915_GTT_PAGE_SHIFT;
Zhi Wang2707e442016-03-28 23:23:16 +08001917
1918 /* the VM may configure the whole GM space when ballooning is used */
Zhao, Xinda7c281352017-02-21 15:54:56 +08001919 if (!vgpu_gmadr_is_valid(vgpu, gma))
Zhi Wang2707e442016-03-28 23:23:16 +08001920 return 0;
Zhi Wang2707e442016-03-28 23:23:16 +08001921
1922 ggtt_get_guest_entry(ggtt_mm, &e, g_gtt_index);
1923
1924 memcpy((void *)&e.val64 + (off & (info->gtt_entry_size - 1)), p_data,
1925 bytes);
1926
1927 if (ops->test_present(&e)) {
Hang Yuancc753fb2017-12-22 18:06:31 +08001928 gfn = ops->get_pfn(&e);
1929
1930 /* one PTE update may be issued in multiple writes and the
1931 * first write may not construct a valid gfn
1932 */
1933 if (!intel_gvt_hypervisor_is_valid_gfn(vgpu, gfn)) {
1934 ops->set_pfn(&m, gvt->gtt.scratch_mfn);
1935 goto out;
1936 }
1937
Zhi Wang2707e442016-03-28 23:23:16 +08001938 ret = gtt_entry_p2m(vgpu, &e, &m);
1939 if (ret) {
Tina Zhang695fbc02017-03-10 04:26:53 -05001940 gvt_vgpu_err("fail to translate guest gtt entry\n");
Xiaoguang Chen359b6932017-03-21 10:54:21 +08001941 /* guest driver may read/write the entry when partial
1942 * update the entry in this situation p2m will fail
1943 * settting the shadow entry to point to a scratch page
1944 */
Zhi Wang22115ce2017-10-10 14:34:11 +08001945 ops->set_pfn(&m, gvt->gtt.scratch_mfn);
Zhi Wang2707e442016-03-28 23:23:16 +08001946 }
1947 } else {
1948 m = e;
Zhi Wang22115ce2017-10-10 14:34:11 +08001949 ops->set_pfn(&m, gvt->gtt.scratch_mfn);
Zhi Wang2707e442016-03-28 23:23:16 +08001950 }
1951
Hang Yuancc753fb2017-12-22 18:06:31 +08001952out:
Changbin Du3aff3512018-01-30 19:19:42 +08001953 ggtt_set_host_entry(ggtt_mm, &m, g_gtt_index);
Chuanxiao Dongaf2c6392017-06-02 15:34:24 +08001954 gtt_invalidate(gvt->dev_priv);
Zhi Wang2707e442016-03-28 23:23:16 +08001955 ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index);
1956 return 0;
1957}
1958
1959/*
1960 * intel_vgpu_emulate_gtt_mmio_write - emulate GTT MMIO register write
1961 * @vgpu: a vGPU
1962 * @off: register offset
1963 * @p_data: data from guest write
1964 * @bytes: data length
1965 *
1966 * This function is used to emulate the GTT MMIO register write
1967 *
1968 * Returns:
1969 * Zero on success, error code if failed.
1970 */
1971int intel_vgpu_emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
1972 void *p_data, unsigned int bytes)
1973{
1974 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
1975 int ret;
1976
1977 if (bytes != 4 && bytes != 8)
1978 return -EINVAL;
1979
1980 off -= info->gtt_start_offset;
1981 ret = emulate_gtt_mmio_write(vgpu, off, p_data, bytes);
1982 return ret;
1983}
1984
Zhenyu Wang4fafba22017-12-18 11:58:46 +08001985int intel_vgpu_write_protect_handler(struct intel_vgpu *vgpu, u64 pa,
1986 void *p_data, unsigned int bytes)
1987{
1988 struct intel_gvt *gvt = vgpu->gvt;
1989 int ret = 0;
1990
1991 if (atomic_read(&vgpu->gtt.n_tracked_guest_page)) {
1992 struct intel_vgpu_page_track *t;
1993
1994 mutex_lock(&gvt->lock);
1995
1996 t = intel_vgpu_find_tracked_page(vgpu, pa >> PAGE_SHIFT);
1997 if (t) {
1998 if (unlikely(vgpu->failsafe)) {
1999 /* remove write protection to prevent furture traps */
2000 intel_vgpu_clean_page_track(vgpu, t);
2001 } else {
2002 ret = t->handler(t, pa, p_data, bytes);
2003 if (ret) {
2004 gvt_err("guest page write error %d, "
2005 "gfn 0x%lx, pa 0x%llx, "
2006 "var 0x%x, len %d\n",
2007 ret, t->gfn, pa,
2008 *(u32 *)p_data, bytes);
2009 }
2010 }
2011 }
2012 mutex_unlock(&gvt->lock);
2013 }
2014 return ret;
2015}
2016
2017
Ping Gao3b6411c2016-11-04 13:47:35 +08002018static int alloc_scratch_pages(struct intel_vgpu *vgpu,
2019 intel_gvt_gtt_type_t type)
Zhi Wang2707e442016-03-28 23:23:16 +08002020{
2021 struct intel_vgpu_gtt *gtt = &vgpu->gtt;
Ping Gao3b6411c2016-11-04 13:47:35 +08002022 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
Zhenyu Wang5c352582017-11-02 17:44:52 +08002023 int page_entry_num = I915_GTT_PAGE_SIZE >>
Ping Gao3b6411c2016-11-04 13:47:35 +08002024 vgpu->gvt->device_info.gtt_entry_size_shift;
Jike Song96317392017-01-09 15:38:38 +08002025 void *scratch_pt;
Ping Gao3b6411c2016-11-04 13:47:35 +08002026 int i;
Chuanxiao Dong5de6bd42017-02-09 11:37:11 +08002027 struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev;
2028 dma_addr_t daddr;
Zhi Wang2707e442016-03-28 23:23:16 +08002029
Ping Gao3b6411c2016-11-04 13:47:35 +08002030 if (WARN_ON(type < GTT_TYPE_PPGTT_PTE_PT || type >= GTT_TYPE_MAX))
2031 return -EINVAL;
2032
Jike Song96317392017-01-09 15:38:38 +08002033 scratch_pt = (void *)get_zeroed_page(GFP_KERNEL);
Ping Gao3b6411c2016-11-04 13:47:35 +08002034 if (!scratch_pt) {
Tina Zhang695fbc02017-03-10 04:26:53 -05002035 gvt_vgpu_err("fail to allocate scratch page\n");
Zhi Wang2707e442016-03-28 23:23:16 +08002036 return -ENOMEM;
2037 }
2038
Chuanxiao Dong5de6bd42017-02-09 11:37:11 +08002039 daddr = dma_map_page(dev, virt_to_page(scratch_pt), 0,
2040 4096, PCI_DMA_BIDIRECTIONAL);
2041 if (dma_mapping_error(dev, daddr)) {
Tina Zhang695fbc02017-03-10 04:26:53 -05002042 gvt_vgpu_err("fail to dmamap scratch_pt\n");
Chuanxiao Dong5de6bd42017-02-09 11:37:11 +08002043 __free_page(virt_to_page(scratch_pt));
2044 return -ENOMEM;
Ping Gao3b6411c2016-11-04 13:47:35 +08002045 }
Chuanxiao Dong5de6bd42017-02-09 11:37:11 +08002046 gtt->scratch_pt[type].page_mfn =
Zhenyu Wang5c352582017-11-02 17:44:52 +08002047 (unsigned long)(daddr >> I915_GTT_PAGE_SHIFT);
Jike Song96317392017-01-09 15:38:38 +08002048 gtt->scratch_pt[type].page = virt_to_page(scratch_pt);
Ping Gao3b6411c2016-11-04 13:47:35 +08002049 gvt_dbg_mm("vgpu%d create scratch_pt: type %d mfn=0x%lx\n",
Chuanxiao Dong5de6bd42017-02-09 11:37:11 +08002050 vgpu->id, type, gtt->scratch_pt[type].page_mfn);
Ping Gao3b6411c2016-11-04 13:47:35 +08002051
2052 /* Build the tree by full filled the scratch pt with the entries which
2053 * point to the next level scratch pt or scratch page. The
2054 * scratch_pt[type] indicate the scratch pt/scratch page used by the
2055 * 'type' pt.
2056 * e.g. scratch_pt[GTT_TYPE_PPGTT_PDE_PT] is used by
Jike Song96317392017-01-09 15:38:38 +08002057 * GTT_TYPE_PPGTT_PDE_PT level pt, that means this scratch_pt it self
Ping Gao3b6411c2016-11-04 13:47:35 +08002058 * is GTT_TYPE_PPGTT_PTE_PT, and full filled by scratch page mfn.
2059 */
2060 if (type > GTT_TYPE_PPGTT_PTE_PT && type < GTT_TYPE_MAX) {
2061 struct intel_gvt_gtt_entry se;
2062
2063 memset(&se, 0, sizeof(struct intel_gvt_gtt_entry));
2064 se.type = get_entry_type(type - 1);
2065 ops->set_pfn(&se, gtt->scratch_pt[type - 1].page_mfn);
2066
2067 /* The entry parameters like present/writeable/cache type
2068 * set to the same as i915's scratch page tree.
2069 */
2070 se.val64 |= _PAGE_PRESENT | _PAGE_RW;
2071 if (type == GTT_TYPE_PPGTT_PDE_PT)
Zhi Wangc095b972017-09-14 20:39:41 +08002072 se.val64 |= PPAT_CACHED;
Ping Gao3b6411c2016-11-04 13:47:35 +08002073
2074 for (i = 0; i < page_entry_num; i++)
Jike Song96317392017-01-09 15:38:38 +08002075 ops->set_entry(scratch_pt, &se, i, false, 0, vgpu);
Zhi Wang2707e442016-03-28 23:23:16 +08002076 }
2077
Zhi Wang2707e442016-03-28 23:23:16 +08002078 return 0;
2079}
2080
Ping Gao3b6411c2016-11-04 13:47:35 +08002081static int release_scratch_page_tree(struct intel_vgpu *vgpu)
Zhi Wang2707e442016-03-28 23:23:16 +08002082{
Ping Gao3b6411c2016-11-04 13:47:35 +08002083 int i;
Chuanxiao Dong5de6bd42017-02-09 11:37:11 +08002084 struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev;
2085 dma_addr_t daddr;
Ping Gao3b6411c2016-11-04 13:47:35 +08002086
2087 for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) {
2088 if (vgpu->gtt.scratch_pt[i].page != NULL) {
Chuanxiao Dong5de6bd42017-02-09 11:37:11 +08002089 daddr = (dma_addr_t)(vgpu->gtt.scratch_pt[i].page_mfn <<
Zhenyu Wang5c352582017-11-02 17:44:52 +08002090 I915_GTT_PAGE_SHIFT);
Chuanxiao Dong5de6bd42017-02-09 11:37:11 +08002091 dma_unmap_page(dev, daddr, 4096, PCI_DMA_BIDIRECTIONAL);
Ping Gao3b6411c2016-11-04 13:47:35 +08002092 __free_page(vgpu->gtt.scratch_pt[i].page);
2093 vgpu->gtt.scratch_pt[i].page = NULL;
2094 vgpu->gtt.scratch_pt[i].page_mfn = 0;
2095 }
Zhi Wang2707e442016-03-28 23:23:16 +08002096 }
Ping Gao3b6411c2016-11-04 13:47:35 +08002097
2098 return 0;
2099}
2100
2101static int create_scratch_page_tree(struct intel_vgpu *vgpu)
2102{
2103 int i, ret;
2104
2105 for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) {
2106 ret = alloc_scratch_pages(vgpu, i);
2107 if (ret)
2108 goto err;
2109 }
2110
2111 return 0;
2112
2113err:
2114 release_scratch_page_tree(vgpu);
2115 return ret;
Zhi Wang2707e442016-03-28 23:23:16 +08002116}
2117
2118/**
2119 * intel_vgpu_init_gtt - initialize per-vGPU graphics memory virulization
2120 * @vgpu: a vGPU
2121 *
2122 * This function is used to initialize per-vGPU graphics memory virtualization
2123 * components.
2124 *
2125 * Returns:
2126 * Zero on success, error code if failed.
2127 */
2128int intel_vgpu_init_gtt(struct intel_vgpu *vgpu)
2129{
2130 struct intel_vgpu_gtt *gtt = &vgpu->gtt;
Zhi Wang2707e442016-03-28 23:23:16 +08002131
Zhi Wang7d1e5cd2017-09-29 02:47:55 +08002132 hash_init(gtt->tracked_guest_page_hash_table);
Zhi Wang2707e442016-03-28 23:23:16 +08002133 hash_init(gtt->shadow_page_hash_table);
2134
Changbin Duede9d0c2018-01-30 19:19:40 +08002135 INIT_LIST_HEAD(&gtt->ppgtt_mm_list_head);
Zhi Wang2707e442016-03-28 23:23:16 +08002136 INIT_LIST_HEAD(&gtt->oos_page_list_head);
2137 INIT_LIST_HEAD(&gtt->post_shadow_list_head);
2138
Changbin Duede9d0c2018-01-30 19:19:40 +08002139 gtt->ggtt_mm = intel_vgpu_create_ggtt_mm(vgpu);
2140 if (IS_ERR(gtt->ggtt_mm)) {
Tina Zhang695fbc02017-03-10 04:26:53 -05002141 gvt_vgpu_err("fail to create mm for ggtt.\n");
Changbin Duede9d0c2018-01-30 19:19:40 +08002142 return PTR_ERR(gtt->ggtt_mm);
Zhi Wang2707e442016-03-28 23:23:16 +08002143 }
2144
Changbin Duede9d0c2018-01-30 19:19:40 +08002145 intel_vgpu_reset_ggtt(vgpu);
Zhi Wang2707e442016-03-28 23:23:16 +08002146
Ping Gao3b6411c2016-11-04 13:47:35 +08002147 return create_scratch_page_tree(vgpu);
Zhi Wang2707e442016-03-28 23:23:16 +08002148}
2149
Changbin Duede9d0c2018-01-30 19:19:40 +08002150static void intel_vgpu_destroy_all_ppgtt_mm(struct intel_vgpu *vgpu)
Ping Gaoda9cc8d2017-02-21 15:52:56 +08002151{
2152 struct list_head *pos, *n;
2153 struct intel_vgpu_mm *mm;
2154
Changbin Duede9d0c2018-01-30 19:19:40 +08002155 list_for_each_safe(pos, n, &vgpu->gtt.ppgtt_mm_list_head) {
2156 mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list);
Changbin Du1bc25852018-01-30 19:19:41 +08002157 intel_vgpu_destroy_mm(mm);
Ping Gaoda9cc8d2017-02-21 15:52:56 +08002158 }
Changbin Duede9d0c2018-01-30 19:19:40 +08002159
2160 if (GEM_WARN_ON(!list_empty(&vgpu->gtt.ppgtt_mm_list_head)))
2161 gvt_err("vgpu ppgtt mm is not fully destoried\n");
2162
2163 if (GEM_WARN_ON(!hlist_empty(vgpu->gtt.shadow_page_hash_table))) {
2164 gvt_err("Why we still has spt not freed?\n");
2165 ppgtt_free_all_shadow_page(vgpu);
2166 }
2167}
2168
2169static void intel_vgpu_destroy_ggtt_mm(struct intel_vgpu *vgpu)
2170{
Changbin Du1bc25852018-01-30 19:19:41 +08002171 intel_vgpu_destroy_mm(vgpu->gtt.ggtt_mm);
Changbin Duede9d0c2018-01-30 19:19:40 +08002172 vgpu->gtt.ggtt_mm = NULL;
Ping Gaoda9cc8d2017-02-21 15:52:56 +08002173}
2174
Zhi Wang2707e442016-03-28 23:23:16 +08002175/**
2176 * intel_vgpu_clean_gtt - clean up per-vGPU graphics memory virulization
2177 * @vgpu: a vGPU
2178 *
2179 * This function is used to clean up per-vGPU graphics memory virtualization
2180 * components.
2181 *
2182 * Returns:
2183 * Zero on success, error code if failed.
2184 */
2185void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu)
2186{
Changbin Duede9d0c2018-01-30 19:19:40 +08002187 intel_vgpu_destroy_all_ppgtt_mm(vgpu);
2188 intel_vgpu_destroy_ggtt_mm(vgpu);
Ping Gao3b6411c2016-11-04 13:47:35 +08002189 release_scratch_page_tree(vgpu);
Zhi Wang2707e442016-03-28 23:23:16 +08002190}
2191
2192static void clean_spt_oos(struct intel_gvt *gvt)
2193{
2194 struct intel_gvt_gtt *gtt = &gvt->gtt;
2195 struct list_head *pos, *n;
2196 struct intel_vgpu_oos_page *oos_page;
2197
2198 WARN(!list_empty(&gtt->oos_page_use_list_head),
2199 "someone is still using oos page\n");
2200
2201 list_for_each_safe(pos, n, &gtt->oos_page_free_list_head) {
2202 oos_page = container_of(pos, struct intel_vgpu_oos_page, list);
2203 list_del(&oos_page->list);
2204 kfree(oos_page);
2205 }
2206}
2207
2208static int setup_spt_oos(struct intel_gvt *gvt)
2209{
2210 struct intel_gvt_gtt *gtt = &gvt->gtt;
2211 struct intel_vgpu_oos_page *oos_page;
2212 int i;
2213 int ret;
2214
2215 INIT_LIST_HEAD(&gtt->oos_page_free_list_head);
2216 INIT_LIST_HEAD(&gtt->oos_page_use_list_head);
2217
2218 for (i = 0; i < preallocated_oos_pages; i++) {
2219 oos_page = kzalloc(sizeof(*oos_page), GFP_KERNEL);
2220 if (!oos_page) {
Zhi Wang2707e442016-03-28 23:23:16 +08002221 ret = -ENOMEM;
2222 goto fail;
2223 }
2224
2225 INIT_LIST_HEAD(&oos_page->list);
2226 INIT_LIST_HEAD(&oos_page->vm_list);
2227 oos_page->id = i;
2228 list_add_tail(&oos_page->list, &gtt->oos_page_free_list_head);
2229 }
2230
2231 gvt_dbg_mm("%d oos pages preallocated\n", i);
2232
2233 return 0;
2234fail:
2235 clean_spt_oos(gvt);
2236 return ret;
2237}
2238
2239/**
2240 * intel_vgpu_find_ppgtt_mm - find a PPGTT mm object
2241 * @vgpu: a vGPU
2242 * @page_table_level: PPGTT page table level
2243 * @root_entry: PPGTT page table root pointers
2244 *
2245 * This function is used to find a PPGTT mm object from mm object pool
2246 *
2247 * Returns:
2248 * pointer to mm object on success, NULL if failed.
2249 */
2250struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu,
Changbin Duede9d0c2018-01-30 19:19:40 +08002251 u64 pdps[])
Zhi Wang2707e442016-03-28 23:23:16 +08002252{
Zhi Wang2707e442016-03-28 23:23:16 +08002253 struct intel_vgpu_mm *mm;
Changbin Duede9d0c2018-01-30 19:19:40 +08002254 struct list_head *pos;
Zhi Wang2707e442016-03-28 23:23:16 +08002255
Changbin Duede9d0c2018-01-30 19:19:40 +08002256 list_for_each(pos, &vgpu->gtt.ppgtt_mm_list_head) {
2257 mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list);
Zhi Wang2707e442016-03-28 23:23:16 +08002258
Changbin Duede9d0c2018-01-30 19:19:40 +08002259 switch (mm->ppgtt_mm.root_entry_type) {
2260 case GTT_TYPE_PPGTT_ROOT_L4_ENTRY:
2261 if (pdps[0] == mm->ppgtt_mm.guest_pdps[0])
Zhi Wang2707e442016-03-28 23:23:16 +08002262 return mm;
Changbin Duede9d0c2018-01-30 19:19:40 +08002263 break;
2264 case GTT_TYPE_PPGTT_ROOT_L3_ENTRY:
2265 if (!memcmp(pdps, mm->ppgtt_mm.guest_pdps,
2266 sizeof(mm->ppgtt_mm.guest_pdps)))
Zhi Wang2707e442016-03-28 23:23:16 +08002267 return mm;
Changbin Duede9d0c2018-01-30 19:19:40 +08002268 break;
2269 default:
2270 GEM_BUG_ON(1);
Zhi Wang2707e442016-03-28 23:23:16 +08002271 }
2272 }
2273 return NULL;
2274}
2275
2276/**
2277 * intel_vgpu_g2v_create_ppgtt_mm - create a PPGTT mm object from
2278 * g2v notification
2279 * @vgpu: a vGPU
Changbin Duede9d0c2018-01-30 19:19:40 +08002280 * @root_entry_type: ppgtt root entry type
2281 * @pdps: guest pdps
Zhi Wang2707e442016-03-28 23:23:16 +08002282 *
2283 * This function is used to create a PPGTT mm object from a guest to GVT-g
2284 * notification.
2285 *
2286 * Returns:
2287 * Zero on success, negative error code if failed.
2288 */
2289int intel_vgpu_g2v_create_ppgtt_mm(struct intel_vgpu *vgpu,
Changbin Duede9d0c2018-01-30 19:19:40 +08002290 intel_gvt_gtt_type_t root_entry_type, u64 pdps[])
Zhi Wang2707e442016-03-28 23:23:16 +08002291{
Zhi Wang2707e442016-03-28 23:23:16 +08002292 struct intel_vgpu_mm *mm;
2293
Changbin Duede9d0c2018-01-30 19:19:40 +08002294 mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps);
Zhi Wang2707e442016-03-28 23:23:16 +08002295 if (mm) {
Changbin Du1bc25852018-01-30 19:19:41 +08002296 intel_vgpu_mm_get(mm);
Zhi Wang2707e442016-03-28 23:23:16 +08002297 } else {
Changbin Duede9d0c2018-01-30 19:19:40 +08002298 mm = intel_vgpu_create_ppgtt_mm(vgpu, root_entry_type, pdps);
Zhi Wang2707e442016-03-28 23:23:16 +08002299 if (IS_ERR(mm)) {
Tina Zhang695fbc02017-03-10 04:26:53 -05002300 gvt_vgpu_err("fail to create mm\n");
Zhi Wang2707e442016-03-28 23:23:16 +08002301 return PTR_ERR(mm);
2302 }
2303 }
2304 return 0;
2305}
2306
2307/**
2308 * intel_vgpu_g2v_destroy_ppgtt_mm - destroy a PPGTT mm object from
2309 * g2v notification
2310 * @vgpu: a vGPU
Changbin Duede9d0c2018-01-30 19:19:40 +08002311 * @pdps: guest pdps
Zhi Wang2707e442016-03-28 23:23:16 +08002312 *
2313 * This function is used to create a PPGTT mm object from a guest to GVT-g
2314 * notification.
2315 *
2316 * Returns:
2317 * Zero on success, negative error code if failed.
2318 */
2319int intel_vgpu_g2v_destroy_ppgtt_mm(struct intel_vgpu *vgpu,
Changbin Duede9d0c2018-01-30 19:19:40 +08002320 u64 pdps[])
Zhi Wang2707e442016-03-28 23:23:16 +08002321{
Zhi Wang2707e442016-03-28 23:23:16 +08002322 struct intel_vgpu_mm *mm;
2323
Changbin Duede9d0c2018-01-30 19:19:40 +08002324 mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps);
Zhi Wang2707e442016-03-28 23:23:16 +08002325 if (!mm) {
Tina Zhang695fbc02017-03-10 04:26:53 -05002326 gvt_vgpu_err("fail to find ppgtt instance.\n");
Zhi Wang2707e442016-03-28 23:23:16 +08002327 return -EINVAL;
2328 }
Changbin Du1bc25852018-01-30 19:19:41 +08002329 intel_vgpu_mm_put(mm);
Zhi Wang2707e442016-03-28 23:23:16 +08002330 return 0;
2331}
2332
2333/**
2334 * intel_gvt_init_gtt - initialize mm components of a GVT device
2335 * @gvt: GVT device
2336 *
2337 * This function is called at the initialization stage, to initialize
2338 * the mm components of a GVT device.
2339 *
2340 * Returns:
2341 * zero on success, negative error code if failed.
2342 */
2343int intel_gvt_init_gtt(struct intel_gvt *gvt)
2344{
2345 int ret;
Jike Song96317392017-01-09 15:38:38 +08002346 void *page;
Chuanxiao Dong5de6bd42017-02-09 11:37:11 +08002347 struct device *dev = &gvt->dev_priv->drm.pdev->dev;
2348 dma_addr_t daddr;
Zhi Wang2707e442016-03-28 23:23:16 +08002349
2350 gvt_dbg_core("init gtt\n");
2351
Xu Hane3476c02017-03-29 10:13:59 +08002352 if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)
2353 || IS_KABYLAKE(gvt->dev_priv)) {
Zhi Wang2707e442016-03-28 23:23:16 +08002354 gvt->gtt.pte_ops = &gen8_gtt_pte_ops;
2355 gvt->gtt.gma_ops = &gen8_gtt_gma_ops;
Zhi Wang2707e442016-03-28 23:23:16 +08002356 } else {
2357 return -ENODEV;
2358 }
2359
Jike Song96317392017-01-09 15:38:38 +08002360 page = (void *)get_zeroed_page(GFP_KERNEL);
2361 if (!page) {
Ping Gaod650ac02016-12-08 10:14:48 +08002362 gvt_err("fail to allocate scratch ggtt page\n");
2363 return -ENOMEM;
2364 }
2365
Chuanxiao Dong5de6bd42017-02-09 11:37:11 +08002366 daddr = dma_map_page(dev, virt_to_page(page), 0,
2367 4096, PCI_DMA_BIDIRECTIONAL);
2368 if (dma_mapping_error(dev, daddr)) {
2369 gvt_err("fail to dmamap scratch ggtt page\n");
2370 __free_page(virt_to_page(page));
2371 return -ENOMEM;
Ping Gaod650ac02016-12-08 10:14:48 +08002372 }
Zhi Wang22115ce2017-10-10 14:34:11 +08002373
2374 gvt->gtt.scratch_page = virt_to_page(page);
2375 gvt->gtt.scratch_mfn = (unsigned long)(daddr >> I915_GTT_PAGE_SHIFT);
Ping Gaod650ac02016-12-08 10:14:48 +08002376
Zhi Wang2707e442016-03-28 23:23:16 +08002377 if (enable_out_of_sync) {
2378 ret = setup_spt_oos(gvt);
2379 if (ret) {
2380 gvt_err("fail to initialize SPT oos\n");
Zhou, Wenjia0de98702017-07-04 15:47:00 +08002381 dma_unmap_page(dev, daddr, 4096, PCI_DMA_BIDIRECTIONAL);
Zhi Wang22115ce2017-10-10 14:34:11 +08002382 __free_page(gvt->gtt.scratch_page);
Zhi Wang2707e442016-03-28 23:23:16 +08002383 return ret;
2384 }
2385 }
Changbin Duede9d0c2018-01-30 19:19:40 +08002386 INIT_LIST_HEAD(&gvt->gtt.ppgtt_mm_lru_list_head);
Zhi Wang2707e442016-03-28 23:23:16 +08002387 return 0;
2388}
2389
2390/**
2391 * intel_gvt_clean_gtt - clean up mm components of a GVT device
2392 * @gvt: GVT device
2393 *
2394 * This function is called at the driver unloading stage, to clean up the
2395 * the mm components of a GVT device.
2396 *
2397 */
2398void intel_gvt_clean_gtt(struct intel_gvt *gvt)
2399{
Chuanxiao Dong5de6bd42017-02-09 11:37:11 +08002400 struct device *dev = &gvt->dev_priv->drm.pdev->dev;
Zhi Wang22115ce2017-10-10 14:34:11 +08002401 dma_addr_t daddr = (dma_addr_t)(gvt->gtt.scratch_mfn <<
Zhi Wang9556e112017-10-10 13:51:32 +08002402 I915_GTT_PAGE_SHIFT);
Chuanxiao Dong5de6bd42017-02-09 11:37:11 +08002403
2404 dma_unmap_page(dev, daddr, 4096, PCI_DMA_BIDIRECTIONAL);
2405
Zhi Wang22115ce2017-10-10 14:34:11 +08002406 __free_page(gvt->gtt.scratch_page);
Ping Gaod650ac02016-12-08 10:14:48 +08002407
Zhi Wang2707e442016-03-28 23:23:16 +08002408 if (enable_out_of_sync)
2409 clean_spt_oos(gvt);
2410}
Ping Gaod650ac02016-12-08 10:14:48 +08002411
2412/**
2413 * intel_vgpu_reset_ggtt - reset the GGTT entry
2414 * @vgpu: a vGPU
2415 *
2416 * This function is called at the vGPU create stage
2417 * to reset all the GGTT entries.
2418 *
2419 */
2420void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu)
2421{
2422 struct intel_gvt *gvt = vgpu->gvt;
Zhenyu Wang5ad59bf2017-04-12 16:24:57 +08002423 struct drm_i915_private *dev_priv = gvt->dev_priv;
Changbin Dub0c766b2018-01-30 19:19:43 +08002424 struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
2425 struct intel_gvt_gtt_entry entry = {.type = GTT_TYPE_GGTT_PTE};
Ping Gaod650ac02016-12-08 10:14:48 +08002426 u32 index;
Ping Gaod650ac02016-12-08 10:14:48 +08002427 u32 num_entries;
Ping Gaod650ac02016-12-08 10:14:48 +08002428
Changbin Dub0c766b2018-01-30 19:19:43 +08002429 pte_ops->set_pfn(&entry, gvt->gtt.scratch_mfn);
2430 pte_ops->set_present(&entry);
Ping Gaod650ac02016-12-08 10:14:48 +08002431
2432 index = vgpu_aperture_gmadr_base(vgpu) >> PAGE_SHIFT;
2433 num_entries = vgpu_aperture_sz(vgpu) >> PAGE_SHIFT;
Changbin Dub0c766b2018-01-30 19:19:43 +08002434 while (num_entries--)
2435 ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++);
Ping Gaod650ac02016-12-08 10:14:48 +08002436
2437 index = vgpu_hidden_gmadr_base(vgpu) >> PAGE_SHIFT;
2438 num_entries = vgpu_hidden_sz(vgpu) >> PAGE_SHIFT;
Changbin Dub0c766b2018-01-30 19:19:43 +08002439 while (num_entries--)
2440 ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++);
Zhenyu Wang5ad59bf2017-04-12 16:24:57 +08002441
Chuanxiao Dongaf2c6392017-06-02 15:34:24 +08002442 gtt_invalidate(dev_priv);
Ping Gaod650ac02016-12-08 10:14:48 +08002443}
Changbin Dub6115812017-01-13 11:15:57 +08002444
2445/**
2446 * intel_vgpu_reset_gtt - reset the all GTT related status
2447 * @vgpu: a vGPU
Changbin Dub6115812017-01-13 11:15:57 +08002448 *
2449 * This function is called from vfio core to reset reset all
2450 * GTT related status, including GGTT, PPGTT, scratch page.
2451 *
2452 */
Chuanxiao Dong4d3e67b2017-08-04 13:08:59 +08002453void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu)
Changbin Dub6115812017-01-13 11:15:57 +08002454{
Ping Gaoda9cc8d2017-02-21 15:52:56 +08002455 /* Shadow pages are only created when there is no page
2456 * table tracking data, so remove page tracking data after
2457 * removing the shadow pages.
2458 */
Changbin Duede9d0c2018-01-30 19:19:40 +08002459 intel_vgpu_destroy_all_ppgtt_mm(vgpu);
Changbin Dub6115812017-01-13 11:15:57 +08002460 intel_vgpu_reset_ggtt(vgpu);
Changbin Dub6115812017-01-13 11:15:57 +08002461}