blob: 8fb4f1023d0620f06b08c2a5206c60c61b461fd5 [file] [log] [blame]
Zhi Wang2707e442016-03-28 23:23:16 +08001/*
2 * GTT virtualization
3 *
4 * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 * Zhi Wang <zhi.a.wang@intel.com>
27 * Zhenyu Wang <zhenyuw@linux.intel.com>
28 * Xiao Zheng <xiao.zheng@intel.com>
29 *
30 * Contributors:
31 * Min He <min.he@intel.com>
32 * Bing Niu <bing.niu@intel.com>
33 *
34 */
35
36#include "i915_drv.h"
Zhenyu Wangfeddf6e2016-10-20 17:15:03 +080037#include "gvt.h"
38#include "i915_pvinfo.h"
Zhi Wang2707e442016-03-28 23:23:16 +080039#include "trace.h"
40
Changbin Dubc37ab52018-01-30 19:19:44 +080041#if defined(VERBOSE_DEBUG)
42#define gvt_vdbg_mm(fmt, args...) gvt_dbg_mm(fmt, ##args)
43#else
44#define gvt_vdbg_mm(fmt, args...)
45#endif
46
Zhi Wang2707e442016-03-28 23:23:16 +080047static bool enable_out_of_sync = false;
48static int preallocated_oos_pages = 8192;
49
50/*
51 * validate a gm address and related range size,
52 * translate it to host gm address
53 */
54bool intel_gvt_ggtt_validate_range(struct intel_vgpu *vgpu, u64 addr, u32 size)
55{
56 if ((!vgpu_gmadr_is_valid(vgpu, addr)) || (size
57 && !vgpu_gmadr_is_valid(vgpu, addr + size - 1))) {
Tina Zhang695fbc02017-03-10 04:26:53 -050058 gvt_vgpu_err("invalid range gmadr 0x%llx size 0x%x\n",
59 addr, size);
Zhi Wang2707e442016-03-28 23:23:16 +080060 return false;
61 }
62 return true;
63}
64
65/* translate a guest gmadr to host gmadr */
66int intel_gvt_ggtt_gmadr_g2h(struct intel_vgpu *vgpu, u64 g_addr, u64 *h_addr)
67{
68 if (WARN(!vgpu_gmadr_is_valid(vgpu, g_addr),
69 "invalid guest gmadr %llx\n", g_addr))
70 return -EACCES;
71
72 if (vgpu_gmadr_is_aperture(vgpu, g_addr))
73 *h_addr = vgpu_aperture_gmadr_base(vgpu)
74 + (g_addr - vgpu_aperture_offset(vgpu));
75 else
76 *h_addr = vgpu_hidden_gmadr_base(vgpu)
77 + (g_addr - vgpu_hidden_offset(vgpu));
78 return 0;
79}
80
81/* translate a host gmadr to guest gmadr */
82int intel_gvt_ggtt_gmadr_h2g(struct intel_vgpu *vgpu, u64 h_addr, u64 *g_addr)
83{
84 if (WARN(!gvt_gmadr_is_valid(vgpu->gvt, h_addr),
85 "invalid host gmadr %llx\n", h_addr))
86 return -EACCES;
87
88 if (gvt_gmadr_is_aperture(vgpu->gvt, h_addr))
89 *g_addr = vgpu_aperture_gmadr_base(vgpu)
90 + (h_addr - gvt_aperture_gmadr_base(vgpu->gvt));
91 else
92 *g_addr = vgpu_hidden_gmadr_base(vgpu)
93 + (h_addr - gvt_hidden_gmadr_base(vgpu->gvt));
94 return 0;
95}
96
97int intel_gvt_ggtt_index_g2h(struct intel_vgpu *vgpu, unsigned long g_index,
98 unsigned long *h_index)
99{
100 u64 h_addr;
101 int ret;
102
Zhi Wang9556e112017-10-10 13:51:32 +0800103 ret = intel_gvt_ggtt_gmadr_g2h(vgpu, g_index << I915_GTT_PAGE_SHIFT,
Zhi Wang2707e442016-03-28 23:23:16 +0800104 &h_addr);
105 if (ret)
106 return ret;
107
Zhi Wang9556e112017-10-10 13:51:32 +0800108 *h_index = h_addr >> I915_GTT_PAGE_SHIFT;
Zhi Wang2707e442016-03-28 23:23:16 +0800109 return 0;
110}
111
112int intel_gvt_ggtt_h2g_index(struct intel_vgpu *vgpu, unsigned long h_index,
113 unsigned long *g_index)
114{
115 u64 g_addr;
116 int ret;
117
Zhi Wang9556e112017-10-10 13:51:32 +0800118 ret = intel_gvt_ggtt_gmadr_h2g(vgpu, h_index << I915_GTT_PAGE_SHIFT,
Zhi Wang2707e442016-03-28 23:23:16 +0800119 &g_addr);
120 if (ret)
121 return ret;
122
Zhi Wang9556e112017-10-10 13:51:32 +0800123 *g_index = g_addr >> I915_GTT_PAGE_SHIFT;
Zhi Wang2707e442016-03-28 23:23:16 +0800124 return 0;
125}
126
127#define gtt_type_is_entry(type) \
128 (type > GTT_TYPE_INVALID && type < GTT_TYPE_PPGTT_ENTRY \
129 && type != GTT_TYPE_PPGTT_PTE_ENTRY \
130 && type != GTT_TYPE_PPGTT_ROOT_ENTRY)
131
132#define gtt_type_is_pt(type) \
133 (type >= GTT_TYPE_PPGTT_PTE_PT && type < GTT_TYPE_MAX)
134
135#define gtt_type_is_pte_pt(type) \
136 (type == GTT_TYPE_PPGTT_PTE_PT)
137
138#define gtt_type_is_root_pointer(type) \
139 (gtt_type_is_entry(type) && type > GTT_TYPE_PPGTT_ROOT_ENTRY)
140
141#define gtt_init_entry(e, t, p, v) do { \
142 (e)->type = t; \
143 (e)->pdev = p; \
144 memcpy(&(e)->val64, &v, sizeof(v)); \
145} while (0)
146
Zhi Wang2707e442016-03-28 23:23:16 +0800147/*
148 * Mappings between GTT_TYPE* enumerations.
149 * Following information can be found according to the given type:
150 * - type of next level page table
151 * - type of entry inside this level page table
152 * - type of entry with PSE set
153 *
154 * If the given type doesn't have such a kind of information,
155 * e.g. give a l4 root entry type, then request to get its PSE type,
156 * give a PTE page table type, then request to get its next level page
157 * table type, as we know l4 root entry doesn't have a PSE bit,
158 * and a PTE page table doesn't have a next level page table type,
159 * GTT_TYPE_INVALID will be returned. This is useful when traversing a
160 * page table.
161 */
162
163struct gtt_type_table_entry {
164 int entry_type;
Zhi Wang054f4eb2017-10-10 17:19:30 +0800165 int pt_type;
Zhi Wang2707e442016-03-28 23:23:16 +0800166 int next_pt_type;
167 int pse_entry_type;
168};
169
Zhi Wang054f4eb2017-10-10 17:19:30 +0800170#define GTT_TYPE_TABLE_ENTRY(type, e_type, cpt_type, npt_type, pse_type) \
Zhi Wang2707e442016-03-28 23:23:16 +0800171 [type] = { \
172 .entry_type = e_type, \
Zhi Wang054f4eb2017-10-10 17:19:30 +0800173 .pt_type = cpt_type, \
Zhi Wang2707e442016-03-28 23:23:16 +0800174 .next_pt_type = npt_type, \
175 .pse_entry_type = pse_type, \
176 }
177
178static struct gtt_type_table_entry gtt_type_table[] = {
179 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L4_ENTRY,
180 GTT_TYPE_PPGTT_ROOT_L4_ENTRY,
Zhi Wang054f4eb2017-10-10 17:19:30 +0800181 GTT_TYPE_INVALID,
Zhi Wang2707e442016-03-28 23:23:16 +0800182 GTT_TYPE_PPGTT_PML4_PT,
183 GTT_TYPE_INVALID),
184 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_PT,
185 GTT_TYPE_PPGTT_PML4_ENTRY,
Zhi Wang054f4eb2017-10-10 17:19:30 +0800186 GTT_TYPE_PPGTT_PML4_PT,
Zhi Wang2707e442016-03-28 23:23:16 +0800187 GTT_TYPE_PPGTT_PDP_PT,
188 GTT_TYPE_INVALID),
189 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_ENTRY,
190 GTT_TYPE_PPGTT_PML4_ENTRY,
Zhi Wang054f4eb2017-10-10 17:19:30 +0800191 GTT_TYPE_PPGTT_PML4_PT,
Zhi Wang2707e442016-03-28 23:23:16 +0800192 GTT_TYPE_PPGTT_PDP_PT,
193 GTT_TYPE_INVALID),
194 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_PT,
195 GTT_TYPE_PPGTT_PDP_ENTRY,
Zhi Wang054f4eb2017-10-10 17:19:30 +0800196 GTT_TYPE_PPGTT_PDP_PT,
Zhi Wang2707e442016-03-28 23:23:16 +0800197 GTT_TYPE_PPGTT_PDE_PT,
198 GTT_TYPE_PPGTT_PTE_1G_ENTRY),
199 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L3_ENTRY,
200 GTT_TYPE_PPGTT_ROOT_L3_ENTRY,
Zhi Wang054f4eb2017-10-10 17:19:30 +0800201 GTT_TYPE_INVALID,
Zhi Wang2707e442016-03-28 23:23:16 +0800202 GTT_TYPE_PPGTT_PDE_PT,
203 GTT_TYPE_PPGTT_PTE_1G_ENTRY),
204 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_ENTRY,
205 GTT_TYPE_PPGTT_PDP_ENTRY,
Zhi Wang054f4eb2017-10-10 17:19:30 +0800206 GTT_TYPE_PPGTT_PDP_PT,
Zhi Wang2707e442016-03-28 23:23:16 +0800207 GTT_TYPE_PPGTT_PDE_PT,
208 GTT_TYPE_PPGTT_PTE_1G_ENTRY),
209 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_PT,
210 GTT_TYPE_PPGTT_PDE_ENTRY,
Zhi Wang054f4eb2017-10-10 17:19:30 +0800211 GTT_TYPE_PPGTT_PDE_PT,
Zhi Wang2707e442016-03-28 23:23:16 +0800212 GTT_TYPE_PPGTT_PTE_PT,
213 GTT_TYPE_PPGTT_PTE_2M_ENTRY),
214 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_ENTRY,
215 GTT_TYPE_PPGTT_PDE_ENTRY,
Zhi Wang054f4eb2017-10-10 17:19:30 +0800216 GTT_TYPE_PPGTT_PDE_PT,
Zhi Wang2707e442016-03-28 23:23:16 +0800217 GTT_TYPE_PPGTT_PTE_PT,
218 GTT_TYPE_PPGTT_PTE_2M_ENTRY),
219 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_PT,
220 GTT_TYPE_PPGTT_PTE_4K_ENTRY,
Zhi Wang054f4eb2017-10-10 17:19:30 +0800221 GTT_TYPE_PPGTT_PTE_PT,
Zhi Wang2707e442016-03-28 23:23:16 +0800222 GTT_TYPE_INVALID,
223 GTT_TYPE_INVALID),
224 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_4K_ENTRY,
225 GTT_TYPE_PPGTT_PTE_4K_ENTRY,
Zhi Wang054f4eb2017-10-10 17:19:30 +0800226 GTT_TYPE_PPGTT_PTE_PT,
Zhi Wang2707e442016-03-28 23:23:16 +0800227 GTT_TYPE_INVALID,
228 GTT_TYPE_INVALID),
229 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_2M_ENTRY,
230 GTT_TYPE_PPGTT_PDE_ENTRY,
Zhi Wang054f4eb2017-10-10 17:19:30 +0800231 GTT_TYPE_PPGTT_PDE_PT,
Zhi Wang2707e442016-03-28 23:23:16 +0800232 GTT_TYPE_INVALID,
233 GTT_TYPE_PPGTT_PTE_2M_ENTRY),
234 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_1G_ENTRY,
235 GTT_TYPE_PPGTT_PDP_ENTRY,
Zhi Wang054f4eb2017-10-10 17:19:30 +0800236 GTT_TYPE_PPGTT_PDP_PT,
Zhi Wang2707e442016-03-28 23:23:16 +0800237 GTT_TYPE_INVALID,
238 GTT_TYPE_PPGTT_PTE_1G_ENTRY),
239 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_GGTT_PTE,
240 GTT_TYPE_GGTT_PTE,
241 GTT_TYPE_INVALID,
Zhi Wang054f4eb2017-10-10 17:19:30 +0800242 GTT_TYPE_INVALID,
Zhi Wang2707e442016-03-28 23:23:16 +0800243 GTT_TYPE_INVALID),
244};
245
246static inline int get_next_pt_type(int type)
247{
248 return gtt_type_table[type].next_pt_type;
249}
250
Zhi Wang054f4eb2017-10-10 17:19:30 +0800251static inline int get_pt_type(int type)
252{
253 return gtt_type_table[type].pt_type;
254}
255
Zhi Wang2707e442016-03-28 23:23:16 +0800256static inline int get_entry_type(int type)
257{
258 return gtt_type_table[type].entry_type;
259}
260
261static inline int get_pse_type(int type)
262{
263 return gtt_type_table[type].pse_entry_type;
264}
265
266static u64 read_pte64(struct drm_i915_private *dev_priv, unsigned long index)
267{
Du, Changbin321927d2016-10-20 14:08:46 +0800268 void __iomem *addr = (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + index;
Zhi Wang2707e442016-03-28 23:23:16 +0800269
Changbin Du905a5032016-12-30 14:10:53 +0800270 return readq(addr);
Zhi Wang2707e442016-03-28 23:23:16 +0800271}
272
Changbin Dua143cef2018-01-30 19:19:45 +0800273static void ggtt_invalidate(struct drm_i915_private *dev_priv)
Chuanxiao Dongaf2c6392017-06-02 15:34:24 +0800274{
275 mmio_hw_access_pre(dev_priv);
276 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
277 mmio_hw_access_post(dev_priv);
278}
279
Zhi Wang2707e442016-03-28 23:23:16 +0800280static void write_pte64(struct drm_i915_private *dev_priv,
281 unsigned long index, u64 pte)
282{
Du, Changbin321927d2016-10-20 14:08:46 +0800283 void __iomem *addr = (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + index;
Zhi Wang2707e442016-03-28 23:23:16 +0800284
Zhi Wang2707e442016-03-28 23:23:16 +0800285 writeq(pte, addr);
Zhi Wang2707e442016-03-28 23:23:16 +0800286}
287
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800288static inline int gtt_get_entry64(void *pt,
Zhi Wang2707e442016-03-28 23:23:16 +0800289 struct intel_gvt_gtt_entry *e,
290 unsigned long index, bool hypervisor_access, unsigned long gpa,
291 struct intel_vgpu *vgpu)
292{
293 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
294 int ret;
295
296 if (WARN_ON(info->gtt_entry_size != 8))
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800297 return -EINVAL;
Zhi Wang2707e442016-03-28 23:23:16 +0800298
299 if (hypervisor_access) {
300 ret = intel_gvt_hypervisor_read_gpa(vgpu, gpa +
301 (index << info->gtt_entry_size_shift),
302 &e->val64, 8);
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800303 if (WARN_ON(ret))
304 return ret;
Zhi Wang2707e442016-03-28 23:23:16 +0800305 } else if (!pt) {
306 e->val64 = read_pte64(vgpu->gvt->dev_priv, index);
307 } else {
308 e->val64 = *((u64 *)pt + index);
309 }
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800310 return 0;
Zhi Wang2707e442016-03-28 23:23:16 +0800311}
312
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800313static inline int gtt_set_entry64(void *pt,
Zhi Wang2707e442016-03-28 23:23:16 +0800314 struct intel_gvt_gtt_entry *e,
315 unsigned long index, bool hypervisor_access, unsigned long gpa,
316 struct intel_vgpu *vgpu)
317{
318 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
319 int ret;
320
321 if (WARN_ON(info->gtt_entry_size != 8))
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800322 return -EINVAL;
Zhi Wang2707e442016-03-28 23:23:16 +0800323
324 if (hypervisor_access) {
325 ret = intel_gvt_hypervisor_write_gpa(vgpu, gpa +
326 (index << info->gtt_entry_size_shift),
327 &e->val64, 8);
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800328 if (WARN_ON(ret))
329 return ret;
Zhi Wang2707e442016-03-28 23:23:16 +0800330 } else if (!pt) {
331 write_pte64(vgpu->gvt->dev_priv, index, e->val64);
332 } else {
333 *((u64 *)pt + index) = e->val64;
334 }
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800335 return 0;
Zhi Wang2707e442016-03-28 23:23:16 +0800336}
337
338#define GTT_HAW 46
339
Changbin Du420fba72018-01-30 19:19:55 +0800340#define ADDR_1G_MASK GENMASK_ULL(GTT_HAW - 1, 30)
341#define ADDR_2M_MASK GENMASK_ULL(GTT_HAW - 1, 21)
342#define ADDR_4K_MASK GENMASK_ULL(GTT_HAW - 1, 12)
Zhi Wang2707e442016-03-28 23:23:16 +0800343
344static unsigned long gen8_gtt_get_pfn(struct intel_gvt_gtt_entry *e)
345{
346 unsigned long pfn;
347
348 if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY)
Changbin Dud861ca22018-01-30 19:19:47 +0800349 pfn = (e->val64 & ADDR_1G_MASK) >> PAGE_SHIFT;
Zhi Wang2707e442016-03-28 23:23:16 +0800350 else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY)
Changbin Dud861ca22018-01-30 19:19:47 +0800351 pfn = (e->val64 & ADDR_2M_MASK) >> PAGE_SHIFT;
Zhi Wang2707e442016-03-28 23:23:16 +0800352 else
Changbin Dud861ca22018-01-30 19:19:47 +0800353 pfn = (e->val64 & ADDR_4K_MASK) >> PAGE_SHIFT;
Zhi Wang2707e442016-03-28 23:23:16 +0800354 return pfn;
355}
356
357static void gen8_gtt_set_pfn(struct intel_gvt_gtt_entry *e, unsigned long pfn)
358{
359 if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) {
360 e->val64 &= ~ADDR_1G_MASK;
Changbin Dud861ca22018-01-30 19:19:47 +0800361 pfn &= (ADDR_1G_MASK >> PAGE_SHIFT);
Zhi Wang2707e442016-03-28 23:23:16 +0800362 } else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY) {
363 e->val64 &= ~ADDR_2M_MASK;
Changbin Dud861ca22018-01-30 19:19:47 +0800364 pfn &= (ADDR_2M_MASK >> PAGE_SHIFT);
Zhi Wang2707e442016-03-28 23:23:16 +0800365 } else {
366 e->val64 &= ~ADDR_4K_MASK;
Changbin Dud861ca22018-01-30 19:19:47 +0800367 pfn &= (ADDR_4K_MASK >> PAGE_SHIFT);
Zhi Wang2707e442016-03-28 23:23:16 +0800368 }
369
Changbin Dud861ca22018-01-30 19:19:47 +0800370 e->val64 |= (pfn << PAGE_SHIFT);
Zhi Wang2707e442016-03-28 23:23:16 +0800371}
372
373static bool gen8_gtt_test_pse(struct intel_gvt_gtt_entry *e)
374{
375 /* Entry doesn't have PSE bit. */
376 if (get_pse_type(e->type) == GTT_TYPE_INVALID)
377 return false;
378
379 e->type = get_entry_type(e->type);
Changbin Dud861ca22018-01-30 19:19:47 +0800380 if (!(e->val64 & _PAGE_PSE))
Zhi Wang2707e442016-03-28 23:23:16 +0800381 return false;
382
383 e->type = get_pse_type(e->type);
384 return true;
385}
386
387static bool gen8_gtt_test_present(struct intel_gvt_gtt_entry *e)
388{
389 /*
390 * i915 writes PDP root pointer registers without present bit,
391 * it also works, so we need to treat root pointer entry
392 * specifically.
393 */
394 if (e->type == GTT_TYPE_PPGTT_ROOT_L3_ENTRY
395 || e->type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY)
396 return (e->val64 != 0);
397 else
Changbin Dud861ca22018-01-30 19:19:47 +0800398 return (e->val64 & _PAGE_PRESENT);
Zhi Wang2707e442016-03-28 23:23:16 +0800399}
400
401static void gtt_entry_clear_present(struct intel_gvt_gtt_entry *e)
402{
Changbin Dud861ca22018-01-30 19:19:47 +0800403 e->val64 &= ~_PAGE_PRESENT;
Zhi Wang2707e442016-03-28 23:23:16 +0800404}
405
Zhi Wang655c64e2017-10-10 17:24:26 +0800406static void gtt_entry_set_present(struct intel_gvt_gtt_entry *e)
407{
Changbin Dud861ca22018-01-30 19:19:47 +0800408 e->val64 |= _PAGE_PRESENT;
Zhi Wang2707e442016-03-28 23:23:16 +0800409}
410
411/*
412 * Per-platform GMA routines.
413 */
414static unsigned long gma_to_ggtt_pte_index(unsigned long gma)
415{
Zhi Wang9556e112017-10-10 13:51:32 +0800416 unsigned long x = (gma >> I915_GTT_PAGE_SHIFT);
Zhi Wang2707e442016-03-28 23:23:16 +0800417
418 trace_gma_index(__func__, gma, x);
419 return x;
420}
421
422#define DEFINE_PPGTT_GMA_TO_INDEX(prefix, ename, exp) \
423static unsigned long prefix##_gma_to_##ename##_index(unsigned long gma) \
424{ \
425 unsigned long x = (exp); \
426 trace_gma_index(__func__, gma, x); \
427 return x; \
428}
429
430DEFINE_PPGTT_GMA_TO_INDEX(gen8, pte, (gma >> 12 & 0x1ff));
431DEFINE_PPGTT_GMA_TO_INDEX(gen8, pde, (gma >> 21 & 0x1ff));
432DEFINE_PPGTT_GMA_TO_INDEX(gen8, l3_pdp, (gma >> 30 & 0x3));
433DEFINE_PPGTT_GMA_TO_INDEX(gen8, l4_pdp, (gma >> 30 & 0x1ff));
434DEFINE_PPGTT_GMA_TO_INDEX(gen8, pml4, (gma >> 39 & 0x1ff));
435
436static struct intel_gvt_gtt_pte_ops gen8_gtt_pte_ops = {
437 .get_entry = gtt_get_entry64,
438 .set_entry = gtt_set_entry64,
439 .clear_present = gtt_entry_clear_present,
Zhi Wang655c64e2017-10-10 17:24:26 +0800440 .set_present = gtt_entry_set_present,
Zhi Wang2707e442016-03-28 23:23:16 +0800441 .test_present = gen8_gtt_test_present,
442 .test_pse = gen8_gtt_test_pse,
443 .get_pfn = gen8_gtt_get_pfn,
444 .set_pfn = gen8_gtt_set_pfn,
445};
446
447static struct intel_gvt_gtt_gma_ops gen8_gtt_gma_ops = {
448 .gma_to_ggtt_pte_index = gma_to_ggtt_pte_index,
449 .gma_to_pte_index = gen8_gma_to_pte_index,
450 .gma_to_pde_index = gen8_gma_to_pde_index,
451 .gma_to_l3_pdp_index = gen8_gma_to_l3_pdp_index,
452 .gma_to_l4_pdp_index = gen8_gma_to_l4_pdp_index,
453 .gma_to_pml4_index = gen8_gma_to_pml4_index,
454};
455
Zhi Wang2707e442016-03-28 23:23:16 +0800456/*
457 * MM helpers.
458 */
Changbin Du3aff3512018-01-30 19:19:42 +0800459static void _ppgtt_get_root_entry(struct intel_vgpu_mm *mm,
460 struct intel_gvt_gtt_entry *entry, unsigned long index,
461 bool guest)
Zhi Wang2707e442016-03-28 23:23:16 +0800462{
Changbin Du3aff3512018-01-30 19:19:42 +0800463 struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
Zhi Wang2707e442016-03-28 23:23:16 +0800464
Changbin Du3aff3512018-01-30 19:19:42 +0800465 GEM_BUG_ON(mm->type != INTEL_GVT_MM_PPGTT);
Zhi Wang2707e442016-03-28 23:23:16 +0800466
Changbin Du3aff3512018-01-30 19:19:42 +0800467 entry->type = mm->ppgtt_mm.root_entry_type;
468 pte_ops->get_entry(guest ? mm->ppgtt_mm.guest_pdps :
469 mm->ppgtt_mm.shadow_pdps,
470 entry, index, false, 0, mm->vgpu);
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800471
Changbin Du3aff3512018-01-30 19:19:42 +0800472 pte_ops->test_pse(entry);
Zhi Wang2707e442016-03-28 23:23:16 +0800473}
474
Changbin Du3aff3512018-01-30 19:19:42 +0800475static inline void ppgtt_get_guest_root_entry(struct intel_vgpu_mm *mm,
476 struct intel_gvt_gtt_entry *entry, unsigned long index)
Zhi Wang2707e442016-03-28 23:23:16 +0800477{
Changbin Du3aff3512018-01-30 19:19:42 +0800478 _ppgtt_get_root_entry(mm, entry, index, true);
479}
Zhi Wang2707e442016-03-28 23:23:16 +0800480
Changbin Du3aff3512018-01-30 19:19:42 +0800481static inline void ppgtt_get_shadow_root_entry(struct intel_vgpu_mm *mm,
482 struct intel_gvt_gtt_entry *entry, unsigned long index)
483{
484 _ppgtt_get_root_entry(mm, entry, index, false);
485}
486
487static void _ppgtt_set_root_entry(struct intel_vgpu_mm *mm,
488 struct intel_gvt_gtt_entry *entry, unsigned long index,
489 bool guest)
490{
491 struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
492
493 pte_ops->set_entry(guest ? mm->ppgtt_mm.guest_pdps :
494 mm->ppgtt_mm.shadow_pdps,
495 entry, index, false, 0, mm->vgpu);
496}
497
498static inline void ppgtt_set_guest_root_entry(struct intel_vgpu_mm *mm,
499 struct intel_gvt_gtt_entry *entry, unsigned long index)
500{
501 _ppgtt_set_root_entry(mm, entry, index, true);
502}
503
504static inline void ppgtt_set_shadow_root_entry(struct intel_vgpu_mm *mm,
505 struct intel_gvt_gtt_entry *entry, unsigned long index)
506{
507 _ppgtt_set_root_entry(mm, entry, index, false);
508}
509
510static void ggtt_get_guest_entry(struct intel_vgpu_mm *mm,
511 struct intel_gvt_gtt_entry *entry, unsigned long index)
512{
513 struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
514
515 GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
516
517 entry->type = GTT_TYPE_GGTT_PTE;
518 pte_ops->get_entry(mm->ggtt_mm.virtual_ggtt, entry, index,
519 false, 0, mm->vgpu);
520}
521
522static void ggtt_set_guest_entry(struct intel_vgpu_mm *mm,
523 struct intel_gvt_gtt_entry *entry, unsigned long index)
524{
525 struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
526
527 GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
528
529 pte_ops->set_entry(mm->ggtt_mm.virtual_ggtt, entry, index,
530 false, 0, mm->vgpu);
531}
532
533static void ggtt_set_host_entry(struct intel_vgpu_mm *mm,
534 struct intel_gvt_gtt_entry *entry, unsigned long index)
535{
536 struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
537
538 GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
539
540 pte_ops->set_entry(NULL, entry, index, false, 0, mm->vgpu);
Zhi Wang2707e442016-03-28 23:23:16 +0800541}
542
543/*
544 * PPGTT shadow page table helpers.
545 */
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800546static inline int ppgtt_spt_get_entry(
Zhi Wang2707e442016-03-28 23:23:16 +0800547 struct intel_vgpu_ppgtt_spt *spt,
548 void *page_table, int type,
549 struct intel_gvt_gtt_entry *e, unsigned long index,
550 bool guest)
551{
552 struct intel_gvt *gvt = spt->vgpu->gvt;
553 struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800554 int ret;
Zhi Wang2707e442016-03-28 23:23:16 +0800555
556 e->type = get_entry_type(type);
557
558 if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n"))
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800559 return -EINVAL;
Zhi Wang2707e442016-03-28 23:23:16 +0800560
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800561 ret = ops->get_entry(page_table, e, index, guest,
Changbin Due502a2a2018-01-30 19:19:53 +0800562 spt->guest_page.gfn << I915_GTT_PAGE_SHIFT,
Zhi Wang2707e442016-03-28 23:23:16 +0800563 spt->vgpu);
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800564 if (ret)
565 return ret;
566
Zhi Wang2707e442016-03-28 23:23:16 +0800567 ops->test_pse(e);
Changbin Dubc37ab52018-01-30 19:19:44 +0800568
569 gvt_vdbg_mm("read ppgtt entry, spt type %d, entry type %d, index %lu, value %llx\n",
570 type, e->type, index, e->val64);
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800571 return 0;
Zhi Wang2707e442016-03-28 23:23:16 +0800572}
573
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800574static inline int ppgtt_spt_set_entry(
Zhi Wang2707e442016-03-28 23:23:16 +0800575 struct intel_vgpu_ppgtt_spt *spt,
576 void *page_table, int type,
577 struct intel_gvt_gtt_entry *e, unsigned long index,
578 bool guest)
579{
580 struct intel_gvt *gvt = spt->vgpu->gvt;
581 struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
582
583 if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n"))
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800584 return -EINVAL;
Zhi Wang2707e442016-03-28 23:23:16 +0800585
Changbin Dubc37ab52018-01-30 19:19:44 +0800586 gvt_vdbg_mm("set ppgtt entry, spt type %d, entry type %d, index %lu, value %llx\n",
587 type, e->type, index, e->val64);
588
Zhi Wang2707e442016-03-28 23:23:16 +0800589 return ops->set_entry(page_table, e, index, guest,
Changbin Due502a2a2018-01-30 19:19:53 +0800590 spt->guest_page.gfn << I915_GTT_PAGE_SHIFT,
Zhi Wang2707e442016-03-28 23:23:16 +0800591 spt->vgpu);
592}
593
594#define ppgtt_get_guest_entry(spt, e, index) \
595 ppgtt_spt_get_entry(spt, NULL, \
Changbin Du44b46732018-01-30 19:19:49 +0800596 spt->guest_page.type, e, index, true)
Zhi Wang2707e442016-03-28 23:23:16 +0800597
598#define ppgtt_set_guest_entry(spt, e, index) \
599 ppgtt_spt_set_entry(spt, NULL, \
Changbin Du44b46732018-01-30 19:19:49 +0800600 spt->guest_page.type, e, index, true)
Zhi Wang2707e442016-03-28 23:23:16 +0800601
602#define ppgtt_get_shadow_entry(spt, e, index) \
603 ppgtt_spt_get_entry(spt, spt->shadow_page.vaddr, \
604 spt->shadow_page.type, e, index, false)
605
606#define ppgtt_set_shadow_entry(spt, e, index) \
607 ppgtt_spt_set_entry(spt, spt->shadow_page.vaddr, \
608 spt->shadow_page.type, e, index, false)
609
Changbin Du44b46732018-01-30 19:19:49 +0800610static void *alloc_spt(gfp_t gfp_mask)
Zhi Wang7d1e5cd2017-09-29 02:47:55 +0800611{
Changbin Du44b46732018-01-30 19:19:49 +0800612 struct intel_vgpu_ppgtt_spt *spt;
Zhi Wang7d1e5cd2017-09-29 02:47:55 +0800613
Changbin Du44b46732018-01-30 19:19:49 +0800614 spt = kzalloc(sizeof(*spt), gfp_mask);
615 if (!spt)
616 return NULL;
Zhi Wang7d1e5cd2017-09-29 02:47:55 +0800617
Changbin Du44b46732018-01-30 19:19:49 +0800618 spt->shadow_page.page = alloc_page(gfp_mask);
619 if (!spt->shadow_page.page) {
620 kfree(spt);
621 return NULL;
622 }
623 return spt;
Zhi Wang7d1e5cd2017-09-29 02:47:55 +0800624}
625
Changbin Du44b46732018-01-30 19:19:49 +0800626static void free_spt(struct intel_vgpu_ppgtt_spt *spt)
Zhi Wang7d1e5cd2017-09-29 02:47:55 +0800627{
Changbin Du44b46732018-01-30 19:19:49 +0800628 __free_page(spt->shadow_page.page);
629 kfree(spt);
Zhi Wang7d1e5cd2017-09-29 02:47:55 +0800630}
631
Zhi Wang2707e442016-03-28 23:23:16 +0800632static int detach_oos_page(struct intel_vgpu *vgpu,
633 struct intel_vgpu_oos_page *oos_page);
634
Changbin Dud87f5ff2018-01-30 19:19:50 +0800635static void ppgtt_free_spt(struct intel_vgpu_ppgtt_spt *spt)
Zhi Wang2707e442016-03-28 23:23:16 +0800636{
Changbin Du44b46732018-01-30 19:19:49 +0800637 struct device *kdev = &spt->vgpu->gvt->dev_priv->drm.pdev->dev;
Zhi Wang2707e442016-03-28 23:23:16 +0800638
Changbin Du44b46732018-01-30 19:19:49 +0800639 trace_spt_free(spt->vgpu->id, spt, spt->guest_page.type);
640
641 dma_unmap_page(kdev, spt->shadow_page.mfn << I915_GTT_PAGE_SHIFT, 4096,
642 PCI_DMA_BIDIRECTIONAL);
Changbin Dub6c126a2018-01-30 19:19:54 +0800643
644 radix_tree_delete(&spt->vgpu->gtt.spt_tree, spt->shadow_page.mfn);
Changbin Du44b46732018-01-30 19:19:49 +0800645
646 if (spt->guest_page.oos_page)
647 detach_oos_page(spt->vgpu, spt->guest_page.oos_page);
648
Changbin Due502a2a2018-01-30 19:19:53 +0800649 intel_vgpu_unregister_page_track(spt->vgpu, spt->guest_page.gfn);
Changbin Du44b46732018-01-30 19:19:49 +0800650
Zhi Wang2707e442016-03-28 23:23:16 +0800651 list_del_init(&spt->post_shadow_list);
Zhi Wang2707e442016-03-28 23:23:16 +0800652 free_spt(spt);
653}
654
Changbin Dud87f5ff2018-01-30 19:19:50 +0800655static void ppgtt_free_all_spt(struct intel_vgpu *vgpu)
Zhi Wang2707e442016-03-28 23:23:16 +0800656{
Changbin Du44b46732018-01-30 19:19:49 +0800657 struct intel_vgpu_ppgtt_spt *spt;
Changbin Dub6c126a2018-01-30 19:19:54 +0800658 struct radix_tree_iter iter;
659 void **slot;
Zhi Wang2707e442016-03-28 23:23:16 +0800660
Changbin Dub6c126a2018-01-30 19:19:54 +0800661 radix_tree_for_each_slot(slot, &vgpu->gtt.spt_tree, &iter, 0) {
662 spt = radix_tree_deref_slot(slot);
Changbin Dud87f5ff2018-01-30 19:19:50 +0800663 ppgtt_free_spt(spt);
Changbin Dub6c126a2018-01-30 19:19:54 +0800664 }
Zhi Wang2707e442016-03-28 23:23:16 +0800665}
666
Zhi Wang7d1e5cd2017-09-29 02:47:55 +0800667static int ppgtt_handle_guest_write_page_table_bytes(
Changbin Du44b46732018-01-30 19:19:49 +0800668 struct intel_vgpu_ppgtt_spt *spt,
Zhi Wang2707e442016-03-28 23:23:16 +0800669 u64 pa, void *p_data, int bytes);
670
Changbin Due502a2a2018-01-30 19:19:53 +0800671static int ppgtt_write_protection_handler(
672 struct intel_vgpu_page_track *page_track,
673 u64 gpa, void *data, int bytes)
Zhi Wang2707e442016-03-28 23:23:16 +0800674{
Changbin Due502a2a2018-01-30 19:19:53 +0800675 struct intel_vgpu_ppgtt_spt *spt = page_track->priv_data;
676
Zhi Wang2707e442016-03-28 23:23:16 +0800677 int ret;
678
679 if (bytes != 4 && bytes != 8)
680 return -EINVAL;
681
Changbin Due502a2a2018-01-30 19:19:53 +0800682 ret = ppgtt_handle_guest_write_page_table_bytes(spt, gpa, data, bytes);
Zhi Wang2707e442016-03-28 23:23:16 +0800683 if (ret)
684 return ret;
685 return ret;
686}
687
Changbin Du44b46732018-01-30 19:19:49 +0800688/* Find a spt by guest gfn. */
689static struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_gfn(
690 struct intel_vgpu *vgpu, unsigned long gfn)
691{
692 struct intel_vgpu_page_track *track;
693
Changbin Due502a2a2018-01-30 19:19:53 +0800694 track = intel_vgpu_find_page_track(vgpu, gfn);
695 if (track && track->handler == ppgtt_write_protection_handler)
696 return track->priv_data;
Changbin Du44b46732018-01-30 19:19:49 +0800697
698 return NULL;
699}
700
701/* Find the spt by shadow page mfn. */
Changbin Dub6c126a2018-01-30 19:19:54 +0800702static inline struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_mfn(
Changbin Du44b46732018-01-30 19:19:49 +0800703 struct intel_vgpu *vgpu, unsigned long mfn)
704{
Changbin Dub6c126a2018-01-30 19:19:54 +0800705 return radix_tree_lookup(&vgpu->gtt.spt_tree, mfn);
Changbin Du44b46732018-01-30 19:19:49 +0800706}
707
Changbin Duede9d0c2018-01-30 19:19:40 +0800708static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt);
Zhi Wang2707e442016-03-28 23:23:16 +0800709
Changbin Dud87f5ff2018-01-30 19:19:50 +0800710static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt(
Zhi Wang2707e442016-03-28 23:23:16 +0800711 struct intel_vgpu *vgpu, int type, unsigned long gfn)
712{
Changbin Du44b46732018-01-30 19:19:49 +0800713 struct device *kdev = &vgpu->gvt->dev_priv->drm.pdev->dev;
Zhi Wang2707e442016-03-28 23:23:16 +0800714 struct intel_vgpu_ppgtt_spt *spt = NULL;
Changbin Du44b46732018-01-30 19:19:49 +0800715 dma_addr_t daddr;
Changbin Due502a2a2018-01-30 19:19:53 +0800716 int ret;
Zhi Wang2707e442016-03-28 23:23:16 +0800717
718retry:
719 spt = alloc_spt(GFP_KERNEL | __GFP_ZERO);
720 if (!spt) {
Changbin Duede9d0c2018-01-30 19:19:40 +0800721 if (reclaim_one_ppgtt_mm(vgpu->gvt))
Zhi Wang2707e442016-03-28 23:23:16 +0800722 goto retry;
723
Tina Zhang695fbc02017-03-10 04:26:53 -0500724 gvt_vgpu_err("fail to allocate ppgtt shadow page\n");
Zhi Wang2707e442016-03-28 23:23:16 +0800725 return ERR_PTR(-ENOMEM);
726 }
727
728 spt->vgpu = vgpu;
Zhi Wang2707e442016-03-28 23:23:16 +0800729 atomic_set(&spt->refcount, 1);
730 INIT_LIST_HEAD(&spt->post_shadow_list);
731
732 /*
Changbin Du44b46732018-01-30 19:19:49 +0800733 * Init shadow_page.
Zhi Wang2707e442016-03-28 23:23:16 +0800734 */
Changbin Du44b46732018-01-30 19:19:49 +0800735 spt->shadow_page.type = type;
736 daddr = dma_map_page(kdev, spt->shadow_page.page,
737 0, 4096, PCI_DMA_BIDIRECTIONAL);
738 if (dma_mapping_error(kdev, daddr)) {
739 gvt_vgpu_err("fail to map dma addr\n");
Changbin Dub6c126a2018-01-30 19:19:54 +0800740 ret = -EINVAL;
741 goto err_free_spt;
Zhi Wang2707e442016-03-28 23:23:16 +0800742 }
Changbin Du44b46732018-01-30 19:19:49 +0800743 spt->shadow_page.vaddr = page_address(spt->shadow_page.page);
744 spt->shadow_page.mfn = daddr >> I915_GTT_PAGE_SHIFT;
Zhi Wang2707e442016-03-28 23:23:16 +0800745
Changbin Du44b46732018-01-30 19:19:49 +0800746 /*
747 * Init guest_page.
748 */
749 spt->guest_page.type = type;
750 spt->guest_page.gfn = gfn;
751
Changbin Due502a2a2018-01-30 19:19:53 +0800752 ret = intel_vgpu_register_page_track(vgpu, spt->guest_page.gfn,
753 ppgtt_write_protection_handler, spt);
Changbin Dub6c126a2018-01-30 19:19:54 +0800754 if (ret)
755 goto err_unmap_dma;
Changbin Du44b46732018-01-30 19:19:49 +0800756
Changbin Dub6c126a2018-01-30 19:19:54 +0800757 ret = radix_tree_insert(&vgpu->gtt.spt_tree, spt->shadow_page.mfn, spt);
758 if (ret)
759 goto err_unreg_page_track;
Zhi Wang2707e442016-03-28 23:23:16 +0800760
761 trace_spt_alloc(vgpu->id, spt, type, spt->shadow_page.mfn, gfn);
762 return spt;
Changbin Dub6c126a2018-01-30 19:19:54 +0800763
764err_unreg_page_track:
765 intel_vgpu_unregister_page_track(vgpu, spt->guest_page.gfn);
766err_unmap_dma:
767 dma_unmap_page(kdev, daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
768err_free_spt:
769 free_spt(spt);
770 return ERR_PTR(ret);
Zhi Wang2707e442016-03-28 23:23:16 +0800771}
772
773#define pt_entry_size_shift(spt) \
774 ((spt)->vgpu->gvt->device_info.gtt_entry_size_shift)
775
776#define pt_entries(spt) \
Zhi Wang9556e112017-10-10 13:51:32 +0800777 (I915_GTT_PAGE_SIZE >> pt_entry_size_shift(spt))
Zhi Wang2707e442016-03-28 23:23:16 +0800778
779#define for_each_present_guest_entry(spt, e, i) \
780 for (i = 0; i < pt_entries(spt); i++) \
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800781 if (!ppgtt_get_guest_entry(spt, e, i) && \
782 spt->vgpu->gvt->gtt.pte_ops->test_present(e))
Zhi Wang2707e442016-03-28 23:23:16 +0800783
784#define for_each_present_shadow_entry(spt, e, i) \
785 for (i = 0; i < pt_entries(spt); i++) \
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800786 if (!ppgtt_get_shadow_entry(spt, e, i) && \
787 spt->vgpu->gvt->gtt.pte_ops->test_present(e))
Zhi Wang2707e442016-03-28 23:23:16 +0800788
Changbin Dud87f5ff2018-01-30 19:19:50 +0800789static void ppgtt_get_spt(struct intel_vgpu_ppgtt_spt *spt)
Zhi Wang2707e442016-03-28 23:23:16 +0800790{
791 int v = atomic_read(&spt->refcount);
792
793 trace_spt_refcount(spt->vgpu->id, "inc", spt, v, (v + 1));
794
795 atomic_inc(&spt->refcount);
796}
797
Changbin Dud87f5ff2018-01-30 19:19:50 +0800798static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt);
Zhi Wang2707e442016-03-28 23:23:16 +0800799
Changbin Dud87f5ff2018-01-30 19:19:50 +0800800static int ppgtt_invalidate_spt_by_shadow_entry(struct intel_vgpu *vgpu,
Zhi Wang2707e442016-03-28 23:23:16 +0800801 struct intel_gvt_gtt_entry *e)
802{
803 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
804 struct intel_vgpu_ppgtt_spt *s;
Ping Gao3b6411c2016-11-04 13:47:35 +0800805 intel_gvt_gtt_type_t cur_pt_type;
Zhi Wang2707e442016-03-28 23:23:16 +0800806
Changbin Du72f03d72018-01-30 19:19:48 +0800807 GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(e->type)));
Zhi Wang2707e442016-03-28 23:23:16 +0800808
Ping Gao3b6411c2016-11-04 13:47:35 +0800809 if (e->type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY
810 && e->type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY) {
811 cur_pt_type = get_next_pt_type(e->type) + 1;
812 if (ops->get_pfn(e) ==
813 vgpu->gtt.scratch_pt[cur_pt_type].page_mfn)
814 return 0;
815 }
Changbin Du44b46732018-01-30 19:19:49 +0800816 s = intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(e));
Zhi Wang2707e442016-03-28 23:23:16 +0800817 if (!s) {
Tina Zhang695fbc02017-03-10 04:26:53 -0500818 gvt_vgpu_err("fail to find shadow page: mfn: 0x%lx\n",
819 ops->get_pfn(e));
Zhi Wang2707e442016-03-28 23:23:16 +0800820 return -ENXIO;
821 }
Changbin Dud87f5ff2018-01-30 19:19:50 +0800822 return ppgtt_invalidate_spt(s);
Zhi Wang2707e442016-03-28 23:23:16 +0800823}
824
Changbin Dud87f5ff2018-01-30 19:19:50 +0800825static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt)
Zhi Wang2707e442016-03-28 23:23:16 +0800826{
Tina Zhang695fbc02017-03-10 04:26:53 -0500827 struct intel_vgpu *vgpu = spt->vgpu;
Zhi Wang2707e442016-03-28 23:23:16 +0800828 struct intel_gvt_gtt_entry e;
829 unsigned long index;
830 int ret;
831 int v = atomic_read(&spt->refcount);
832
833 trace_spt_change(spt->vgpu->id, "die", spt,
Changbin Du44b46732018-01-30 19:19:49 +0800834 spt->guest_page.gfn, spt->shadow_page.type);
Zhi Wang2707e442016-03-28 23:23:16 +0800835
836 trace_spt_refcount(spt->vgpu->id, "dec", spt, v, (v - 1));
837
838 if (atomic_dec_return(&spt->refcount) > 0)
839 return 0;
840
841 if (gtt_type_is_pte_pt(spt->shadow_page.type))
842 goto release;
843
844 for_each_present_shadow_entry(spt, &e, index) {
Changbin Du72f03d72018-01-30 19:19:48 +0800845 switch (e.type) {
846 case GTT_TYPE_PPGTT_PTE_4K_ENTRY:
847 gvt_vdbg_mm("invalidate 4K entry\n");
848 continue;
849 case GTT_TYPE_PPGTT_PTE_2M_ENTRY:
850 case GTT_TYPE_PPGTT_PTE_1G_ENTRY:
851 WARN(1, "GVT doesn't support 2M/1GB page\n");
852 continue;
853 case GTT_TYPE_PPGTT_PML4_ENTRY:
854 case GTT_TYPE_PPGTT_PDP_ENTRY:
855 case GTT_TYPE_PPGTT_PDE_ENTRY:
856 gvt_vdbg_mm("invalidate PMUL4/PDP/PDE entry\n");
Changbin Dud87f5ff2018-01-30 19:19:50 +0800857 ret = ppgtt_invalidate_spt_by_shadow_entry(
Changbin Du72f03d72018-01-30 19:19:48 +0800858 spt->vgpu, &e);
859 if (ret)
860 goto fail;
861 break;
862 default:
863 GEM_BUG_ON(1);
Zhi Wang2707e442016-03-28 23:23:16 +0800864 }
Zhi Wang2707e442016-03-28 23:23:16 +0800865 }
866release:
867 trace_spt_change(spt->vgpu->id, "release", spt,
Changbin Du44b46732018-01-30 19:19:49 +0800868 spt->guest_page.gfn, spt->shadow_page.type);
Changbin Dud87f5ff2018-01-30 19:19:50 +0800869 ppgtt_free_spt(spt);
Zhi Wang2707e442016-03-28 23:23:16 +0800870 return 0;
871fail:
Tina Zhang695fbc02017-03-10 04:26:53 -0500872 gvt_vgpu_err("fail: shadow page %p shadow entry 0x%llx type %d\n",
873 spt, e.val64, e.type);
Zhi Wang2707e442016-03-28 23:23:16 +0800874 return ret;
875}
876
Changbin Dud87f5ff2018-01-30 19:19:50 +0800877static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt);
Zhi Wang2707e442016-03-28 23:23:16 +0800878
Changbin Dud87f5ff2018-01-30 19:19:50 +0800879static struct intel_vgpu_ppgtt_spt *ppgtt_populate_spt_by_guest_entry(
Zhi Wang2707e442016-03-28 23:23:16 +0800880 struct intel_vgpu *vgpu, struct intel_gvt_gtt_entry *we)
881{
882 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
Changbin Du44b46732018-01-30 19:19:49 +0800883 struct intel_vgpu_ppgtt_spt *spt = NULL;
Zhi Wang2707e442016-03-28 23:23:16 +0800884 int ret;
885
Changbin Du72f03d72018-01-30 19:19:48 +0800886 GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(we->type)));
Zhi Wang2707e442016-03-28 23:23:16 +0800887
Changbin Du44b46732018-01-30 19:19:49 +0800888 spt = intel_vgpu_find_spt_by_gfn(vgpu, ops->get_pfn(we));
889 if (spt)
Changbin Dud87f5ff2018-01-30 19:19:50 +0800890 ppgtt_get_spt(spt);
Changbin Du44b46732018-01-30 19:19:49 +0800891 else {
Zhi Wang2707e442016-03-28 23:23:16 +0800892 int type = get_next_pt_type(we->type);
893
Changbin Dud87f5ff2018-01-30 19:19:50 +0800894 spt = ppgtt_alloc_spt(vgpu, type, ops->get_pfn(we));
Changbin Du44b46732018-01-30 19:19:49 +0800895 if (IS_ERR(spt)) {
896 ret = PTR_ERR(spt);
Zhi Wang2707e442016-03-28 23:23:16 +0800897 goto fail;
898 }
899
Changbin Due502a2a2018-01-30 19:19:53 +0800900 ret = intel_vgpu_enable_page_track(vgpu, spt->guest_page.gfn);
Zhi Wang2707e442016-03-28 23:23:16 +0800901 if (ret)
902 goto fail;
903
Changbin Dud87f5ff2018-01-30 19:19:50 +0800904 ret = ppgtt_populate_spt(spt);
Zhi Wang2707e442016-03-28 23:23:16 +0800905 if (ret)
906 goto fail;
907
Changbin Du44b46732018-01-30 19:19:49 +0800908 trace_spt_change(vgpu->id, "new", spt, spt->guest_page.gfn,
909 spt->shadow_page.type);
Zhi Wang2707e442016-03-28 23:23:16 +0800910 }
Changbin Du44b46732018-01-30 19:19:49 +0800911 return spt;
Zhi Wang2707e442016-03-28 23:23:16 +0800912fail:
Tina Zhang695fbc02017-03-10 04:26:53 -0500913 gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
Changbin Du44b46732018-01-30 19:19:49 +0800914 spt, we->val64, we->type);
Zhi Wang2707e442016-03-28 23:23:16 +0800915 return ERR_PTR(ret);
916}
917
918static inline void ppgtt_generate_shadow_entry(struct intel_gvt_gtt_entry *se,
919 struct intel_vgpu_ppgtt_spt *s, struct intel_gvt_gtt_entry *ge)
920{
921 struct intel_gvt_gtt_pte_ops *ops = s->vgpu->gvt->gtt.pte_ops;
922
923 se->type = ge->type;
924 se->val64 = ge->val64;
925
926 ops->set_pfn(se, s->shadow_page.mfn);
927}
928
Changbin Du72f03d72018-01-30 19:19:48 +0800929static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu,
930 struct intel_vgpu_ppgtt_spt *spt, unsigned long index,
931 struct intel_gvt_gtt_entry *ge)
932{
933 struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
934 struct intel_gvt_gtt_entry se = *ge;
935 unsigned long gfn, mfn;
936
937 if (!pte_ops->test_present(ge))
938 return 0;
939
940 gfn = pte_ops->get_pfn(ge);
941
942 switch (ge->type) {
943 case GTT_TYPE_PPGTT_PTE_4K_ENTRY:
944 gvt_vdbg_mm("shadow 4K gtt entry\n");
945 break;
946 case GTT_TYPE_PPGTT_PTE_2M_ENTRY:
947 case GTT_TYPE_PPGTT_PTE_1G_ENTRY:
948 gvt_vgpu_err("GVT doesn't support 2M/1GB entry\n");
949 return -EINVAL;
950 default:
951 GEM_BUG_ON(1);
952 };
953
954 /* direct shadow */
955 mfn = intel_gvt_hypervisor_gfn_to_mfn(vgpu, gfn);
956 if (mfn == INTEL_GVT_INVALID_ADDR)
957 return -ENXIO;
958
959 pte_ops->set_pfn(&se, mfn);
960 ppgtt_set_shadow_entry(spt, &se, index);
961 return 0;
962}
963
Changbin Dud87f5ff2018-01-30 19:19:50 +0800964static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt)
Zhi Wang2707e442016-03-28 23:23:16 +0800965{
966 struct intel_vgpu *vgpu = spt->vgpu;
Hang Yuancc753fb2017-12-22 18:06:31 +0800967 struct intel_gvt *gvt = vgpu->gvt;
968 struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
Zhi Wang2707e442016-03-28 23:23:16 +0800969 struct intel_vgpu_ppgtt_spt *s;
970 struct intel_gvt_gtt_entry se, ge;
Hang Yuancc753fb2017-12-22 18:06:31 +0800971 unsigned long gfn, i;
Zhi Wang2707e442016-03-28 23:23:16 +0800972 int ret;
973
974 trace_spt_change(spt->vgpu->id, "born", spt,
Changbin Due502a2a2018-01-30 19:19:53 +0800975 spt->guest_page.gfn, spt->shadow_page.type);
Zhi Wang2707e442016-03-28 23:23:16 +0800976
Zhi Wang2707e442016-03-28 23:23:16 +0800977 for_each_present_guest_entry(spt, &ge, i) {
Changbin Du72f03d72018-01-30 19:19:48 +0800978 if (gtt_type_is_pt(get_next_pt_type(ge.type))) {
Changbin Dud87f5ff2018-01-30 19:19:50 +0800979 s = ppgtt_populate_spt_by_guest_entry(vgpu, &ge);
Changbin Du72f03d72018-01-30 19:19:48 +0800980 if (IS_ERR(s)) {
981 ret = PTR_ERR(s);
982 goto fail;
983 }
984 ppgtt_get_shadow_entry(spt, &se, i);
985 ppgtt_generate_shadow_entry(&se, s, &ge);
986 ppgtt_set_shadow_entry(spt, &se, i);
987 } else {
988 gfn = ops->get_pfn(&ge);
989 if (!intel_gvt_hypervisor_is_valid_gfn(vgpu, gfn)) {
990 ops->set_pfn(&se, gvt->gtt.scratch_mfn);
991 ppgtt_set_shadow_entry(spt, &se, i);
992 continue;
993 }
Zhi Wang2707e442016-03-28 23:23:16 +0800994
Changbin Du72f03d72018-01-30 19:19:48 +0800995 ret = ppgtt_populate_shadow_entry(vgpu, spt, i, &ge);
996 if (ret)
997 goto fail;
Zhi Wang2707e442016-03-28 23:23:16 +0800998 }
Zhi Wang2707e442016-03-28 23:23:16 +0800999 }
1000 return 0;
1001fail:
Tina Zhang695fbc02017-03-10 04:26:53 -05001002 gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
1003 spt, ge.val64, ge.type);
Zhi Wang2707e442016-03-28 23:23:16 +08001004 return ret;
1005}
1006
Changbin Du44b46732018-01-30 19:19:49 +08001007static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_ppgtt_spt *spt,
Tina Zhang6b3816d2017-08-14 15:24:14 +08001008 struct intel_gvt_gtt_entry *se, unsigned long index)
Zhi Wang2707e442016-03-28 23:23:16 +08001009{
Zhi Wang2707e442016-03-28 23:23:16 +08001010 struct intel_vgpu *vgpu = spt->vgpu;
1011 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
Zhi Wang2707e442016-03-28 23:23:16 +08001012 int ret;
1013
Changbin Du44b46732018-01-30 19:19:49 +08001014 trace_spt_guest_change(spt->vgpu->id, "remove", spt,
1015 spt->shadow_page.type, se->val64, index);
Bing Niu9baf0922016-11-07 10:44:36 +08001016
Changbin Dubc37ab52018-01-30 19:19:44 +08001017 gvt_vdbg_mm("destroy old shadow entry, type %d, index %lu, value %llx\n",
1018 se->type, index, se->val64);
1019
Tina Zhang6b3816d2017-08-14 15:24:14 +08001020 if (!ops->test_present(se))
Zhi Wang2707e442016-03-28 23:23:16 +08001021 return 0;
1022
Changbin Du44b46732018-01-30 19:19:49 +08001023 if (ops->get_pfn(se) ==
1024 vgpu->gtt.scratch_pt[spt->shadow_page.type].page_mfn)
Zhi Wang2707e442016-03-28 23:23:16 +08001025 return 0;
1026
Tina Zhang6b3816d2017-08-14 15:24:14 +08001027 if (gtt_type_is_pt(get_next_pt_type(se->type))) {
Bing Niu9baf0922016-11-07 10:44:36 +08001028 struct intel_vgpu_ppgtt_spt *s =
Changbin Du44b46732018-01-30 19:19:49 +08001029 intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(se));
Bing Niu9baf0922016-11-07 10:44:36 +08001030 if (!s) {
Tina Zhang695fbc02017-03-10 04:26:53 -05001031 gvt_vgpu_err("fail to find guest page\n");
Zhi Wang2707e442016-03-28 23:23:16 +08001032 ret = -ENXIO;
1033 goto fail;
1034 }
Changbin Dud87f5ff2018-01-30 19:19:50 +08001035 ret = ppgtt_invalidate_spt(s);
Zhi Wang2707e442016-03-28 23:23:16 +08001036 if (ret)
1037 goto fail;
1038 }
Zhi Wang2707e442016-03-28 23:23:16 +08001039 return 0;
1040fail:
Tina Zhang695fbc02017-03-10 04:26:53 -05001041 gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
Tina Zhang6b3816d2017-08-14 15:24:14 +08001042 spt, se->val64, se->type);
Zhi Wang2707e442016-03-28 23:23:16 +08001043 return ret;
1044}
1045
Changbin Du44b46732018-01-30 19:19:49 +08001046static int ppgtt_handle_guest_entry_add(struct intel_vgpu_ppgtt_spt *spt,
Zhi Wang2707e442016-03-28 23:23:16 +08001047 struct intel_gvt_gtt_entry *we, unsigned long index)
1048{
Zhi Wang2707e442016-03-28 23:23:16 +08001049 struct intel_vgpu *vgpu = spt->vgpu;
1050 struct intel_gvt_gtt_entry m;
1051 struct intel_vgpu_ppgtt_spt *s;
1052 int ret;
1053
Changbin Du44b46732018-01-30 19:19:49 +08001054 trace_spt_guest_change(spt->vgpu->id, "add", spt, spt->shadow_page.type,
1055 we->val64, index);
Zhi Wang2707e442016-03-28 23:23:16 +08001056
Changbin Dubc37ab52018-01-30 19:19:44 +08001057 gvt_vdbg_mm("add shadow entry: type %d, index %lu, value %llx\n",
1058 we->type, index, we->val64);
1059
Zhi Wang2707e442016-03-28 23:23:16 +08001060 if (gtt_type_is_pt(get_next_pt_type(we->type))) {
Changbin Dud87f5ff2018-01-30 19:19:50 +08001061 s = ppgtt_populate_spt_by_guest_entry(vgpu, we);
Zhi Wang2707e442016-03-28 23:23:16 +08001062 if (IS_ERR(s)) {
1063 ret = PTR_ERR(s);
1064 goto fail;
1065 }
1066 ppgtt_get_shadow_entry(spt, &m, index);
1067 ppgtt_generate_shadow_entry(&m, s, we);
1068 ppgtt_set_shadow_entry(spt, &m, index);
1069 } else {
Changbin Du72f03d72018-01-30 19:19:48 +08001070 ret = ppgtt_populate_shadow_entry(vgpu, spt, index, we);
Zhi Wang2707e442016-03-28 23:23:16 +08001071 if (ret)
1072 goto fail;
Zhi Wang2707e442016-03-28 23:23:16 +08001073 }
1074 return 0;
1075fail:
Tina Zhang695fbc02017-03-10 04:26:53 -05001076 gvt_vgpu_err("fail: spt %p guest entry 0x%llx type %d\n",
1077 spt, we->val64, we->type);
Zhi Wang2707e442016-03-28 23:23:16 +08001078 return ret;
1079}
1080
1081static int sync_oos_page(struct intel_vgpu *vgpu,
1082 struct intel_vgpu_oos_page *oos_page)
1083{
1084 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
1085 struct intel_gvt *gvt = vgpu->gvt;
1086 struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
Changbin Du44b46732018-01-30 19:19:49 +08001087 struct intel_vgpu_ppgtt_spt *spt = oos_page->spt;
Changbin Du72f03d72018-01-30 19:19:48 +08001088 struct intel_gvt_gtt_entry old, new;
Zhi Wang2707e442016-03-28 23:23:16 +08001089 int index;
1090 int ret;
1091
1092 trace_oos_change(vgpu->id, "sync", oos_page->id,
Changbin Du44b46732018-01-30 19:19:49 +08001093 spt, spt->guest_page.type);
Zhi Wang2707e442016-03-28 23:23:16 +08001094
Changbin Du44b46732018-01-30 19:19:49 +08001095 old.type = new.type = get_entry_type(spt->guest_page.type);
Zhi Wang2707e442016-03-28 23:23:16 +08001096 old.val64 = new.val64 = 0;
1097
Zhi Wang9556e112017-10-10 13:51:32 +08001098 for (index = 0; index < (I915_GTT_PAGE_SIZE >>
1099 info->gtt_entry_size_shift); index++) {
Zhi Wang2707e442016-03-28 23:23:16 +08001100 ops->get_entry(oos_page->mem, &old, index, false, 0, vgpu);
1101 ops->get_entry(NULL, &new, index, true,
Changbin Du44b46732018-01-30 19:19:49 +08001102 spt->guest_page.gfn << PAGE_SHIFT, vgpu);
Zhi Wang2707e442016-03-28 23:23:16 +08001103
1104 if (old.val64 == new.val64
1105 && !test_and_clear_bit(index, spt->post_shadow_bitmap))
1106 continue;
1107
1108 trace_oos_sync(vgpu->id, oos_page->id,
Changbin Du44b46732018-01-30 19:19:49 +08001109 spt, spt->guest_page.type,
Zhi Wang2707e442016-03-28 23:23:16 +08001110 new.val64, index);
1111
Changbin Du72f03d72018-01-30 19:19:48 +08001112 ret = ppgtt_populate_shadow_entry(vgpu, spt, index, &new);
Zhi Wang2707e442016-03-28 23:23:16 +08001113 if (ret)
1114 return ret;
1115
1116 ops->set_entry(oos_page->mem, &new, index, false, 0, vgpu);
Zhi Wang2707e442016-03-28 23:23:16 +08001117 }
1118
Changbin Du44b46732018-01-30 19:19:49 +08001119 spt->guest_page.write_cnt = 0;
Zhi Wang2707e442016-03-28 23:23:16 +08001120 list_del_init(&spt->post_shadow_list);
1121 return 0;
1122}
1123
1124static int detach_oos_page(struct intel_vgpu *vgpu,
1125 struct intel_vgpu_oos_page *oos_page)
1126{
1127 struct intel_gvt *gvt = vgpu->gvt;
Changbin Du44b46732018-01-30 19:19:49 +08001128 struct intel_vgpu_ppgtt_spt *spt = oos_page->spt;
Zhi Wang2707e442016-03-28 23:23:16 +08001129
1130 trace_oos_change(vgpu->id, "detach", oos_page->id,
Changbin Du44b46732018-01-30 19:19:49 +08001131 spt, spt->guest_page.type);
Zhi Wang2707e442016-03-28 23:23:16 +08001132
Changbin Du44b46732018-01-30 19:19:49 +08001133 spt->guest_page.write_cnt = 0;
1134 spt->guest_page.oos_page = NULL;
1135 oos_page->spt = NULL;
Zhi Wang2707e442016-03-28 23:23:16 +08001136
1137 list_del_init(&oos_page->vm_list);
1138 list_move_tail(&oos_page->list, &gvt->gtt.oos_page_free_list_head);
1139
1140 return 0;
1141}
1142
Changbin Du44b46732018-01-30 19:19:49 +08001143static int attach_oos_page(struct intel_vgpu_oos_page *oos_page,
1144 struct intel_vgpu_ppgtt_spt *spt)
Zhi Wang2707e442016-03-28 23:23:16 +08001145{
Changbin Du44b46732018-01-30 19:19:49 +08001146 struct intel_gvt *gvt = spt->vgpu->gvt;
Zhi Wang2707e442016-03-28 23:23:16 +08001147 int ret;
1148
Changbin Du44b46732018-01-30 19:19:49 +08001149 ret = intel_gvt_hypervisor_read_gpa(spt->vgpu,
1150 spt->guest_page.gfn << I915_GTT_PAGE_SHIFT,
Zhi Wang9556e112017-10-10 13:51:32 +08001151 oos_page->mem, I915_GTT_PAGE_SIZE);
Zhi Wang2707e442016-03-28 23:23:16 +08001152 if (ret)
1153 return ret;
1154
Changbin Du44b46732018-01-30 19:19:49 +08001155 oos_page->spt = spt;
1156 spt->guest_page.oos_page = oos_page;
Zhi Wang2707e442016-03-28 23:23:16 +08001157
1158 list_move_tail(&oos_page->list, &gvt->gtt.oos_page_use_list_head);
1159
Changbin Du44b46732018-01-30 19:19:49 +08001160 trace_oos_change(spt->vgpu->id, "attach", oos_page->id,
1161 spt, spt->guest_page.type);
Zhi Wang2707e442016-03-28 23:23:16 +08001162 return 0;
1163}
1164
Changbin Du44b46732018-01-30 19:19:49 +08001165static int ppgtt_set_guest_page_sync(struct intel_vgpu_ppgtt_spt *spt)
Zhi Wang2707e442016-03-28 23:23:16 +08001166{
Changbin Du44b46732018-01-30 19:19:49 +08001167 struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page;
Zhi Wang2707e442016-03-28 23:23:16 +08001168 int ret;
1169
Changbin Due502a2a2018-01-30 19:19:53 +08001170 ret = intel_vgpu_enable_page_track(spt->vgpu, spt->guest_page.gfn);
Zhi Wang2707e442016-03-28 23:23:16 +08001171 if (ret)
1172 return ret;
1173
Changbin Du44b46732018-01-30 19:19:49 +08001174 trace_oos_change(spt->vgpu->id, "set page sync", oos_page->id,
1175 spt, spt->guest_page.type);
Zhi Wang2707e442016-03-28 23:23:16 +08001176
Changbin Du44b46732018-01-30 19:19:49 +08001177 list_del_init(&oos_page->vm_list);
1178 return sync_oos_page(spt->vgpu, oos_page);
Zhi Wang2707e442016-03-28 23:23:16 +08001179}
1180
Changbin Du44b46732018-01-30 19:19:49 +08001181static int ppgtt_allocate_oos_page(struct intel_vgpu_ppgtt_spt *spt)
Zhi Wang2707e442016-03-28 23:23:16 +08001182{
Changbin Du44b46732018-01-30 19:19:49 +08001183 struct intel_gvt *gvt = spt->vgpu->gvt;
Zhi Wang2707e442016-03-28 23:23:16 +08001184 struct intel_gvt_gtt *gtt = &gvt->gtt;
Changbin Du44b46732018-01-30 19:19:49 +08001185 struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page;
Zhi Wang2707e442016-03-28 23:23:16 +08001186 int ret;
1187
1188 WARN(oos_page, "shadow PPGTT page has already has a oos page\n");
1189
1190 if (list_empty(&gtt->oos_page_free_list_head)) {
1191 oos_page = container_of(gtt->oos_page_use_list_head.next,
1192 struct intel_vgpu_oos_page, list);
Changbin Du44b46732018-01-30 19:19:49 +08001193 ret = ppgtt_set_guest_page_sync(oos_page->spt);
Zhi Wang2707e442016-03-28 23:23:16 +08001194 if (ret)
1195 return ret;
Changbin Du44b46732018-01-30 19:19:49 +08001196 ret = detach_oos_page(spt->vgpu, oos_page);
Zhi Wang2707e442016-03-28 23:23:16 +08001197 if (ret)
1198 return ret;
1199 } else
1200 oos_page = container_of(gtt->oos_page_free_list_head.next,
1201 struct intel_vgpu_oos_page, list);
Changbin Du44b46732018-01-30 19:19:49 +08001202 return attach_oos_page(oos_page, spt);
Zhi Wang2707e442016-03-28 23:23:16 +08001203}
1204
Changbin Du44b46732018-01-30 19:19:49 +08001205static int ppgtt_set_guest_page_oos(struct intel_vgpu_ppgtt_spt *spt)
Zhi Wang2707e442016-03-28 23:23:16 +08001206{
Changbin Du44b46732018-01-30 19:19:49 +08001207 struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page;
Zhi Wang2707e442016-03-28 23:23:16 +08001208
1209 if (WARN(!oos_page, "shadow PPGTT page should have a oos page\n"))
1210 return -EINVAL;
1211
Changbin Du44b46732018-01-30 19:19:49 +08001212 trace_oos_change(spt->vgpu->id, "set page out of sync", oos_page->id,
1213 spt, spt->guest_page.type);
Zhi Wang2707e442016-03-28 23:23:16 +08001214
Changbin Du44b46732018-01-30 19:19:49 +08001215 list_add_tail(&oos_page->vm_list, &spt->vgpu->gtt.oos_page_list_head);
Changbin Due502a2a2018-01-30 19:19:53 +08001216 return intel_vgpu_disable_page_track(spt->vgpu, spt->guest_page.gfn);
Zhi Wang2707e442016-03-28 23:23:16 +08001217}
1218
1219/**
1220 * intel_vgpu_sync_oos_pages - sync all the out-of-synced shadow for vGPU
1221 * @vgpu: a vGPU
1222 *
1223 * This function is called before submitting a guest workload to host,
1224 * to sync all the out-of-synced shadow for vGPU
1225 *
1226 * Returns:
1227 * Zero on success, negative error code if failed.
1228 */
1229int intel_vgpu_sync_oos_pages(struct intel_vgpu *vgpu)
1230{
1231 struct list_head *pos, *n;
1232 struct intel_vgpu_oos_page *oos_page;
1233 int ret;
1234
1235 if (!enable_out_of_sync)
1236 return 0;
1237
1238 list_for_each_safe(pos, n, &vgpu->gtt.oos_page_list_head) {
1239 oos_page = container_of(pos,
1240 struct intel_vgpu_oos_page, vm_list);
Changbin Du44b46732018-01-30 19:19:49 +08001241 ret = ppgtt_set_guest_page_sync(oos_page->spt);
Zhi Wang2707e442016-03-28 23:23:16 +08001242 if (ret)
1243 return ret;
1244 }
1245 return 0;
1246}
1247
1248/*
1249 * The heart of PPGTT shadow page table.
1250 */
1251static int ppgtt_handle_guest_write_page_table(
Changbin Du44b46732018-01-30 19:19:49 +08001252 struct intel_vgpu_ppgtt_spt *spt,
Zhi Wang2707e442016-03-28 23:23:16 +08001253 struct intel_gvt_gtt_entry *we, unsigned long index)
1254{
Zhi Wang2707e442016-03-28 23:23:16 +08001255 struct intel_vgpu *vgpu = spt->vgpu;
Tina Zhang6b3816d2017-08-14 15:24:14 +08001256 int type = spt->shadow_page.type;
Zhi Wang2707e442016-03-28 23:23:16 +08001257 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
Changbin Du72f03d72018-01-30 19:19:48 +08001258 struct intel_gvt_gtt_entry old_se;
Bing Niu9baf0922016-11-07 10:44:36 +08001259 int new_present;
Changbin Du72f03d72018-01-30 19:19:48 +08001260 int ret;
Zhi Wang2707e442016-03-28 23:23:16 +08001261
Zhi Wang2707e442016-03-28 23:23:16 +08001262 new_present = ops->test_present(we);
1263
Tina Zhang6b3816d2017-08-14 15:24:14 +08001264 /*
1265 * Adding the new entry first and then removing the old one, that can
1266 * guarantee the ppgtt table is validated during the window between
1267 * adding and removal.
1268 */
Changbin Du72f03d72018-01-30 19:19:48 +08001269 ppgtt_get_shadow_entry(spt, &old_se, index);
Zhi Wang2707e442016-03-28 23:23:16 +08001270
Zhi Wang2707e442016-03-28 23:23:16 +08001271 if (new_present) {
Changbin Du44b46732018-01-30 19:19:49 +08001272 ret = ppgtt_handle_guest_entry_add(spt, we, index);
Zhi Wang2707e442016-03-28 23:23:16 +08001273 if (ret)
1274 goto fail;
1275 }
Tina Zhang6b3816d2017-08-14 15:24:14 +08001276
Changbin Du44b46732018-01-30 19:19:49 +08001277 ret = ppgtt_handle_guest_entry_removal(spt, &old_se, index);
Tina Zhang6b3816d2017-08-14 15:24:14 +08001278 if (ret)
1279 goto fail;
1280
1281 if (!new_present) {
Changbin Du72f03d72018-01-30 19:19:48 +08001282 ops->set_pfn(&old_se, vgpu->gtt.scratch_pt[type].page_mfn);
1283 ppgtt_set_shadow_entry(spt, &old_se, index);
Tina Zhang6b3816d2017-08-14 15:24:14 +08001284 }
1285
Zhi Wang2707e442016-03-28 23:23:16 +08001286 return 0;
1287fail:
Tina Zhang695fbc02017-03-10 04:26:53 -05001288 gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d.\n",
1289 spt, we->val64, we->type);
Zhi Wang2707e442016-03-28 23:23:16 +08001290 return ret;
1291}
1292
Changbin Du72f03d72018-01-30 19:19:48 +08001293
1294
Changbin Du44b46732018-01-30 19:19:49 +08001295static inline bool can_do_out_of_sync(struct intel_vgpu_ppgtt_spt *spt)
Zhi Wang2707e442016-03-28 23:23:16 +08001296{
1297 return enable_out_of_sync
Changbin Du44b46732018-01-30 19:19:49 +08001298 && gtt_type_is_pte_pt(spt->guest_page.type)
1299 && spt->guest_page.write_cnt >= 2;
Zhi Wang2707e442016-03-28 23:23:16 +08001300}
1301
1302static void ppgtt_set_post_shadow(struct intel_vgpu_ppgtt_spt *spt,
1303 unsigned long index)
1304{
1305 set_bit(index, spt->post_shadow_bitmap);
1306 if (!list_empty(&spt->post_shadow_list))
1307 return;
1308
1309 list_add_tail(&spt->post_shadow_list,
1310 &spt->vgpu->gtt.post_shadow_list_head);
1311}
1312
1313/**
1314 * intel_vgpu_flush_post_shadow - flush the post shadow transactions
1315 * @vgpu: a vGPU
1316 *
1317 * This function is called before submitting a guest workload to host,
1318 * to flush all the post shadows for a vGPU.
1319 *
1320 * Returns:
1321 * Zero on success, negative error code if failed.
1322 */
1323int intel_vgpu_flush_post_shadow(struct intel_vgpu *vgpu)
1324{
1325 struct list_head *pos, *n;
1326 struct intel_vgpu_ppgtt_spt *spt;
Bing Niu9baf0922016-11-07 10:44:36 +08001327 struct intel_gvt_gtt_entry ge;
Zhi Wang2707e442016-03-28 23:23:16 +08001328 unsigned long index;
1329 int ret;
1330
1331 list_for_each_safe(pos, n, &vgpu->gtt.post_shadow_list_head) {
1332 spt = container_of(pos, struct intel_vgpu_ppgtt_spt,
1333 post_shadow_list);
1334
1335 for_each_set_bit(index, spt->post_shadow_bitmap,
1336 GTT_ENTRY_NUM_IN_ONE_PAGE) {
1337 ppgtt_get_guest_entry(spt, &ge, index);
Zhi Wang2707e442016-03-28 23:23:16 +08001338
Changbin Du44b46732018-01-30 19:19:49 +08001339 ret = ppgtt_handle_guest_write_page_table(spt,
1340 &ge, index);
Zhi Wang2707e442016-03-28 23:23:16 +08001341 if (ret)
1342 return ret;
1343 clear_bit(index, spt->post_shadow_bitmap);
1344 }
1345 list_del_init(&spt->post_shadow_list);
1346 }
1347 return 0;
1348}
1349
Zhi Wang7d1e5cd2017-09-29 02:47:55 +08001350static int ppgtt_handle_guest_write_page_table_bytes(
Changbin Du44b46732018-01-30 19:19:49 +08001351 struct intel_vgpu_ppgtt_spt *spt,
Zhi Wang2707e442016-03-28 23:23:16 +08001352 u64 pa, void *p_data, int bytes)
1353{
Zhi Wang2707e442016-03-28 23:23:16 +08001354 struct intel_vgpu *vgpu = spt->vgpu;
1355 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1356 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
Tina Zhang6b3816d2017-08-14 15:24:14 +08001357 struct intel_gvt_gtt_entry we, se;
Zhi Wang2707e442016-03-28 23:23:16 +08001358 unsigned long index;
1359 int ret;
1360
1361 index = (pa & (PAGE_SIZE - 1)) >> info->gtt_entry_size_shift;
1362
1363 ppgtt_get_guest_entry(spt, &we, index);
Zhi Wang2707e442016-03-28 23:23:16 +08001364
1365 ops->test_pse(&we);
1366
1367 if (bytes == info->gtt_entry_size) {
Changbin Du44b46732018-01-30 19:19:49 +08001368 ret = ppgtt_handle_guest_write_page_table(spt, &we, index);
Zhi Wang2707e442016-03-28 23:23:16 +08001369 if (ret)
1370 return ret;
1371 } else {
Zhi Wang2707e442016-03-28 23:23:16 +08001372 if (!test_bit(index, spt->post_shadow_bitmap)) {
Zhi Wang121d760d2017-12-29 02:50:08 +08001373 int type = spt->shadow_page.type;
1374
Tina Zhang6b3816d2017-08-14 15:24:14 +08001375 ppgtt_get_shadow_entry(spt, &se, index);
Changbin Du44b46732018-01-30 19:19:49 +08001376 ret = ppgtt_handle_guest_entry_removal(spt, &se, index);
Zhi Wang2707e442016-03-28 23:23:16 +08001377 if (ret)
1378 return ret;
Zhi Wang121d760d2017-12-29 02:50:08 +08001379 ops->set_pfn(&se, vgpu->gtt.scratch_pt[type].page_mfn);
1380 ppgtt_set_shadow_entry(spt, &se, index);
Zhi Wang2707e442016-03-28 23:23:16 +08001381 }
Zhi Wang2707e442016-03-28 23:23:16 +08001382 ppgtt_set_post_shadow(spt, index);
Zhi Wang2707e442016-03-28 23:23:16 +08001383 }
1384
1385 if (!enable_out_of_sync)
1386 return 0;
1387
Changbin Du44b46732018-01-30 19:19:49 +08001388 spt->guest_page.write_cnt++;
Zhi Wang2707e442016-03-28 23:23:16 +08001389
Changbin Du44b46732018-01-30 19:19:49 +08001390 if (spt->guest_page.oos_page)
1391 ops->set_entry(spt->guest_page.oos_page->mem, &we, index,
Zhi Wang2707e442016-03-28 23:23:16 +08001392 false, 0, vgpu);
1393
Changbin Du44b46732018-01-30 19:19:49 +08001394 if (can_do_out_of_sync(spt)) {
1395 if (!spt->guest_page.oos_page)
1396 ppgtt_allocate_oos_page(spt);
Zhi Wang2707e442016-03-28 23:23:16 +08001397
Changbin Du44b46732018-01-30 19:19:49 +08001398 ret = ppgtt_set_guest_page_oos(spt);
Zhi Wang2707e442016-03-28 23:23:16 +08001399 if (ret < 0)
1400 return ret;
1401 }
1402 return 0;
1403}
1404
Changbin Duede9d0c2018-01-30 19:19:40 +08001405static void invalidate_ppgtt_mm(struct intel_vgpu_mm *mm)
Zhi Wang2707e442016-03-28 23:23:16 +08001406{
1407 struct intel_vgpu *vgpu = mm->vgpu;
1408 struct intel_gvt *gvt = vgpu->gvt;
1409 struct intel_gvt_gtt *gtt = &gvt->gtt;
1410 struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops;
1411 struct intel_gvt_gtt_entry se;
Changbin Duede9d0c2018-01-30 19:19:40 +08001412 int index;
Zhi Wang2707e442016-03-28 23:23:16 +08001413
Changbin Duede9d0c2018-01-30 19:19:40 +08001414 if (!mm->ppgtt_mm.shadowed)
Zhi Wang2707e442016-03-28 23:23:16 +08001415 return;
1416
Changbin Duede9d0c2018-01-30 19:19:40 +08001417 for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.shadow_pdps); index++) {
1418 ppgtt_get_shadow_root_entry(mm, &se, index);
1419
Zhi Wang2707e442016-03-28 23:23:16 +08001420 if (!ops->test_present(&se))
1421 continue;
Changbin Duede9d0c2018-01-30 19:19:40 +08001422
Changbin Dud87f5ff2018-01-30 19:19:50 +08001423 ppgtt_invalidate_spt_by_shadow_entry(vgpu, &se);
Zhi Wang2707e442016-03-28 23:23:16 +08001424 se.val64 = 0;
Changbin Duede9d0c2018-01-30 19:19:40 +08001425 ppgtt_set_shadow_root_entry(mm, &se, index);
Zhi Wang2707e442016-03-28 23:23:16 +08001426
Changbin Du44b46732018-01-30 19:19:49 +08001427 trace_spt_guest_change(vgpu->id, "destroy root pointer",
1428 NULL, se.type, se.val64, index);
Zhi Wang2707e442016-03-28 23:23:16 +08001429 }
Changbin Duede9d0c2018-01-30 19:19:40 +08001430
1431 mm->ppgtt_mm.shadowed = false;
Zhi Wang2707e442016-03-28 23:23:16 +08001432}
1433
Zhi Wang2707e442016-03-28 23:23:16 +08001434
Changbin Duede9d0c2018-01-30 19:19:40 +08001435static int shadow_ppgtt_mm(struct intel_vgpu_mm *mm)
Zhi Wang2707e442016-03-28 23:23:16 +08001436{
1437 struct intel_vgpu *vgpu = mm->vgpu;
1438 struct intel_gvt *gvt = vgpu->gvt;
1439 struct intel_gvt_gtt *gtt = &gvt->gtt;
1440 struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops;
1441 struct intel_vgpu_ppgtt_spt *spt;
1442 struct intel_gvt_gtt_entry ge, se;
Changbin Duede9d0c2018-01-30 19:19:40 +08001443 int index, ret;
Zhi Wang2707e442016-03-28 23:23:16 +08001444
Changbin Duede9d0c2018-01-30 19:19:40 +08001445 if (mm->ppgtt_mm.shadowed)
Zhi Wang2707e442016-03-28 23:23:16 +08001446 return 0;
1447
Changbin Duede9d0c2018-01-30 19:19:40 +08001448 mm->ppgtt_mm.shadowed = true;
Zhi Wang2707e442016-03-28 23:23:16 +08001449
Changbin Duede9d0c2018-01-30 19:19:40 +08001450 for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.guest_pdps); index++) {
1451 ppgtt_get_guest_root_entry(mm, &ge, index);
1452
Zhi Wang2707e442016-03-28 23:23:16 +08001453 if (!ops->test_present(&ge))
1454 continue;
1455
Changbin Du44b46732018-01-30 19:19:49 +08001456 trace_spt_guest_change(vgpu->id, __func__, NULL,
1457 ge.type, ge.val64, index);
Zhi Wang2707e442016-03-28 23:23:16 +08001458
Changbin Dud87f5ff2018-01-30 19:19:50 +08001459 spt = ppgtt_populate_spt_by_guest_entry(vgpu, &ge);
Zhi Wang2707e442016-03-28 23:23:16 +08001460 if (IS_ERR(spt)) {
Tina Zhang695fbc02017-03-10 04:26:53 -05001461 gvt_vgpu_err("fail to populate guest root pointer\n");
Zhi Wang2707e442016-03-28 23:23:16 +08001462 ret = PTR_ERR(spt);
1463 goto fail;
1464 }
1465 ppgtt_generate_shadow_entry(&se, spt, &ge);
Changbin Duede9d0c2018-01-30 19:19:40 +08001466 ppgtt_set_shadow_root_entry(mm, &se, index);
Zhi Wang2707e442016-03-28 23:23:16 +08001467
Changbin Du44b46732018-01-30 19:19:49 +08001468 trace_spt_guest_change(vgpu->id, "populate root pointer",
1469 NULL, se.type, se.val64, index);
Zhi Wang2707e442016-03-28 23:23:16 +08001470 }
Changbin Duede9d0c2018-01-30 19:19:40 +08001471
Zhi Wang2707e442016-03-28 23:23:16 +08001472 return 0;
1473fail:
Changbin Duede9d0c2018-01-30 19:19:40 +08001474 invalidate_ppgtt_mm(mm);
Zhi Wang2707e442016-03-28 23:23:16 +08001475 return ret;
1476}
1477
Changbin Duede9d0c2018-01-30 19:19:40 +08001478static struct intel_vgpu_mm *vgpu_alloc_mm(struct intel_vgpu *vgpu)
1479{
1480 struct intel_vgpu_mm *mm;
1481
1482 mm = kzalloc(sizeof(*mm), GFP_KERNEL);
1483 if (!mm)
1484 return NULL;
1485
1486 mm->vgpu = vgpu;
1487 kref_init(&mm->ref);
1488 atomic_set(&mm->pincount, 0);
1489
1490 return mm;
1491}
1492
1493static void vgpu_free_mm(struct intel_vgpu_mm *mm)
1494{
1495 kfree(mm);
1496}
1497
Zhi Wang2707e442016-03-28 23:23:16 +08001498/**
Changbin Duede9d0c2018-01-30 19:19:40 +08001499 * intel_vgpu_create_ppgtt_mm - create a ppgtt mm object for a vGPU
Zhi Wang2707e442016-03-28 23:23:16 +08001500 * @vgpu: a vGPU
Changbin Duede9d0c2018-01-30 19:19:40 +08001501 * @root_entry_type: ppgtt root entry type
1502 * @pdps: guest pdps.
Zhi Wang2707e442016-03-28 23:23:16 +08001503 *
Changbin Duede9d0c2018-01-30 19:19:40 +08001504 * This function is used to create a ppgtt mm object for a vGPU.
Zhi Wang2707e442016-03-28 23:23:16 +08001505 *
1506 * Returns:
1507 * Zero on success, negative error code in pointer if failed.
1508 */
Changbin Duede9d0c2018-01-30 19:19:40 +08001509struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu,
1510 intel_gvt_gtt_type_t root_entry_type, u64 pdps[])
Zhi Wang2707e442016-03-28 23:23:16 +08001511{
1512 struct intel_gvt *gvt = vgpu->gvt;
Zhi Wang2707e442016-03-28 23:23:16 +08001513 struct intel_vgpu_mm *mm;
1514 int ret;
1515
Changbin Duede9d0c2018-01-30 19:19:40 +08001516 mm = vgpu_alloc_mm(vgpu);
1517 if (!mm)
1518 return ERR_PTR(-ENOMEM);
Zhi Wang2707e442016-03-28 23:23:16 +08001519
Changbin Duede9d0c2018-01-30 19:19:40 +08001520 mm->type = INTEL_GVT_MM_PPGTT;
Zhi Wang2707e442016-03-28 23:23:16 +08001521
Changbin Duede9d0c2018-01-30 19:19:40 +08001522 GEM_BUG_ON(root_entry_type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY &&
1523 root_entry_type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY);
1524 mm->ppgtt_mm.root_entry_type = root_entry_type;
Zhi Wang2707e442016-03-28 23:23:16 +08001525
Changbin Duede9d0c2018-01-30 19:19:40 +08001526 INIT_LIST_HEAD(&mm->ppgtt_mm.list);
1527 INIT_LIST_HEAD(&mm->ppgtt_mm.lru_list);
Zhi Wang2707e442016-03-28 23:23:16 +08001528
Changbin Duede9d0c2018-01-30 19:19:40 +08001529 if (root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY)
1530 mm->ppgtt_mm.guest_pdps[0] = pdps[0];
1531 else
1532 memcpy(mm->ppgtt_mm.guest_pdps, pdps,
1533 sizeof(mm->ppgtt_mm.guest_pdps));
Zhi Wang2707e442016-03-28 23:23:16 +08001534
Changbin Duede9d0c2018-01-30 19:19:40 +08001535 ret = shadow_ppgtt_mm(mm);
Zhi Wang2707e442016-03-28 23:23:16 +08001536 if (ret) {
Changbin Duede9d0c2018-01-30 19:19:40 +08001537 gvt_vgpu_err("failed to shadow ppgtt mm\n");
1538 vgpu_free_mm(mm);
1539 return ERR_PTR(ret);
Zhi Wang2707e442016-03-28 23:23:16 +08001540 }
1541
Changbin Duede9d0c2018-01-30 19:19:40 +08001542 list_add_tail(&mm->ppgtt_mm.list, &vgpu->gtt.ppgtt_mm_list_head);
1543 list_add_tail(&mm->ppgtt_mm.lru_list, &gvt->gtt.ppgtt_mm_lru_list_head);
Zhi Wang2707e442016-03-28 23:23:16 +08001544 return mm;
Changbin Duede9d0c2018-01-30 19:19:40 +08001545}
1546
1547static struct intel_vgpu_mm *intel_vgpu_create_ggtt_mm(struct intel_vgpu *vgpu)
1548{
1549 struct intel_vgpu_mm *mm;
1550 unsigned long nr_entries;
1551
1552 mm = vgpu_alloc_mm(vgpu);
1553 if (!mm)
1554 return ERR_PTR(-ENOMEM);
1555
1556 mm->type = INTEL_GVT_MM_GGTT;
1557
1558 nr_entries = gvt_ggtt_gm_sz(vgpu->gvt) >> I915_GTT_PAGE_SHIFT;
1559 mm->ggtt_mm.virtual_ggtt = vzalloc(nr_entries *
1560 vgpu->gvt->device_info.gtt_entry_size);
1561 if (!mm->ggtt_mm.virtual_ggtt) {
1562 vgpu_free_mm(mm);
1563 return ERR_PTR(-ENOMEM);
1564 }
1565
1566 return mm;
1567}
1568
1569/**
Changbin Du1bc25852018-01-30 19:19:41 +08001570 * _intel_vgpu_mm_release - destroy a mm object
Changbin Duede9d0c2018-01-30 19:19:40 +08001571 * @mm_ref: a kref object
1572 *
1573 * This function is used to destroy a mm object for vGPU
1574 *
1575 */
Changbin Du1bc25852018-01-30 19:19:41 +08001576void _intel_vgpu_mm_release(struct kref *mm_ref)
Changbin Duede9d0c2018-01-30 19:19:40 +08001577{
1578 struct intel_vgpu_mm *mm = container_of(mm_ref, typeof(*mm), ref);
1579
1580 if (GEM_WARN_ON(atomic_read(&mm->pincount)))
1581 gvt_err("vgpu mm pin count bug detected\n");
1582
1583 if (mm->type == INTEL_GVT_MM_PPGTT) {
1584 list_del(&mm->ppgtt_mm.list);
1585 list_del(&mm->ppgtt_mm.lru_list);
1586 invalidate_ppgtt_mm(mm);
1587 } else {
1588 vfree(mm->ggtt_mm.virtual_ggtt);
1589 }
1590
1591 vgpu_free_mm(mm);
Zhi Wang2707e442016-03-28 23:23:16 +08001592}
1593
1594/**
1595 * intel_vgpu_unpin_mm - decrease the pin count of a vGPU mm object
1596 * @mm: a vGPU mm object
1597 *
1598 * This function is called when user doesn't want to use a vGPU mm object
1599 */
1600void intel_vgpu_unpin_mm(struct intel_vgpu_mm *mm)
1601{
Zhi Wang2707e442016-03-28 23:23:16 +08001602 atomic_dec(&mm->pincount);
1603}
1604
1605/**
1606 * intel_vgpu_pin_mm - increase the pin count of a vGPU mm object
1607 * @vgpu: a vGPU
1608 *
1609 * This function is called when user wants to use a vGPU mm object. If this
1610 * mm object hasn't been shadowed yet, the shadow will be populated at this
1611 * time.
1612 *
1613 * Returns:
1614 * Zero on success, negative error code if failed.
1615 */
1616int intel_vgpu_pin_mm(struct intel_vgpu_mm *mm)
1617{
1618 int ret;
1619
Changbin Duede9d0c2018-01-30 19:19:40 +08001620 atomic_inc(&mm->pincount);
Zhi Wang2707e442016-03-28 23:23:16 +08001621
Changbin Duede9d0c2018-01-30 19:19:40 +08001622 if (mm->type == INTEL_GVT_MM_PPGTT) {
1623 ret = shadow_ppgtt_mm(mm);
Zhi Wang2707e442016-03-28 23:23:16 +08001624 if (ret)
1625 return ret;
Changbin Duede9d0c2018-01-30 19:19:40 +08001626
1627 list_move_tail(&mm->ppgtt_mm.lru_list,
1628 &mm->vgpu->gvt->gtt.ppgtt_mm_lru_list_head);
1629
Zhi Wang2707e442016-03-28 23:23:16 +08001630 }
1631
Zhi Wang2707e442016-03-28 23:23:16 +08001632 return 0;
1633}
1634
Changbin Duede9d0c2018-01-30 19:19:40 +08001635static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt)
Zhi Wang2707e442016-03-28 23:23:16 +08001636{
1637 struct intel_vgpu_mm *mm;
1638 struct list_head *pos, *n;
1639
Changbin Duede9d0c2018-01-30 19:19:40 +08001640 list_for_each_safe(pos, n, &gvt->gtt.ppgtt_mm_lru_list_head) {
1641 mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.lru_list);
Zhi Wang2707e442016-03-28 23:23:16 +08001642
Zhi Wang2707e442016-03-28 23:23:16 +08001643 if (atomic_read(&mm->pincount))
1644 continue;
1645
Changbin Duede9d0c2018-01-30 19:19:40 +08001646 list_del_init(&mm->ppgtt_mm.lru_list);
1647 invalidate_ppgtt_mm(mm);
Zhi Wang2707e442016-03-28 23:23:16 +08001648 return 1;
1649 }
1650 return 0;
1651}
1652
1653/*
1654 * GMA translation APIs.
1655 */
1656static inline int ppgtt_get_next_level_entry(struct intel_vgpu_mm *mm,
1657 struct intel_gvt_gtt_entry *e, unsigned long index, bool guest)
1658{
1659 struct intel_vgpu *vgpu = mm->vgpu;
1660 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1661 struct intel_vgpu_ppgtt_spt *s;
1662
Changbin Du44b46732018-01-30 19:19:49 +08001663 s = intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(e));
Zhi Wang2707e442016-03-28 23:23:16 +08001664 if (!s)
1665 return -ENXIO;
1666
1667 if (!guest)
1668 ppgtt_get_shadow_entry(s, e, index);
1669 else
1670 ppgtt_get_guest_entry(s, e, index);
1671 return 0;
1672}
1673
1674/**
1675 * intel_vgpu_gma_to_gpa - translate a gma to GPA
1676 * @mm: mm object. could be a PPGTT or GGTT mm object
1677 * @gma: graphics memory address in this mm object
1678 *
1679 * This function is used to translate a graphics memory address in specific
1680 * graphics memory space to guest physical address.
1681 *
1682 * Returns:
1683 * Guest physical address on success, INTEL_GVT_INVALID_ADDR if failed.
1684 */
1685unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma)
1686{
1687 struct intel_vgpu *vgpu = mm->vgpu;
1688 struct intel_gvt *gvt = vgpu->gvt;
1689 struct intel_gvt_gtt_pte_ops *pte_ops = gvt->gtt.pte_ops;
1690 struct intel_gvt_gtt_gma_ops *gma_ops = gvt->gtt.gma_ops;
1691 unsigned long gpa = INTEL_GVT_INVALID_ADDR;
1692 unsigned long gma_index[4];
1693 struct intel_gvt_gtt_entry e;
Changbin Duede9d0c2018-01-30 19:19:40 +08001694 int i, levels = 0;
Zhi Wang2707e442016-03-28 23:23:16 +08001695 int ret;
1696
Changbin Duede9d0c2018-01-30 19:19:40 +08001697 GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT &&
1698 mm->type != INTEL_GVT_MM_PPGTT);
Zhi Wang2707e442016-03-28 23:23:16 +08001699
1700 if (mm->type == INTEL_GVT_MM_GGTT) {
1701 if (!vgpu_gmadr_is_valid(vgpu, gma))
1702 goto err;
1703
Changbin Duede9d0c2018-01-30 19:19:40 +08001704 ggtt_get_guest_entry(mm, &e,
1705 gma_ops->gma_to_ggtt_pte_index(gma));
1706
Zhi Wang9556e112017-10-10 13:51:32 +08001707 gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT)
1708 + (gma & ~I915_GTT_PAGE_MASK);
Zhi Wang2707e442016-03-28 23:23:16 +08001709
1710 trace_gma_translate(vgpu->id, "ggtt", 0, 0, gma, gpa);
Changbin Duede9d0c2018-01-30 19:19:40 +08001711 } else {
1712 switch (mm->ppgtt_mm.root_entry_type) {
1713 case GTT_TYPE_PPGTT_ROOT_L4_ENTRY:
1714 ppgtt_get_shadow_root_entry(mm, &e, 0);
Zhi Wang2707e442016-03-28 23:23:16 +08001715
Changbin Duede9d0c2018-01-30 19:19:40 +08001716 gma_index[0] = gma_ops->gma_to_pml4_index(gma);
1717 gma_index[1] = gma_ops->gma_to_l4_pdp_index(gma);
1718 gma_index[2] = gma_ops->gma_to_pde_index(gma);
1719 gma_index[3] = gma_ops->gma_to_pte_index(gma);
1720 levels = 4;
1721 break;
1722 case GTT_TYPE_PPGTT_ROOT_L3_ENTRY:
1723 ppgtt_get_shadow_root_entry(mm, &e,
1724 gma_ops->gma_to_l3_pdp_index(gma));
Zhi Wang2707e442016-03-28 23:23:16 +08001725
Changbin Duede9d0c2018-01-30 19:19:40 +08001726 gma_index[0] = gma_ops->gma_to_pde_index(gma);
1727 gma_index[1] = gma_ops->gma_to_pte_index(gma);
1728 levels = 2;
1729 break;
1730 default:
1731 GEM_BUG_ON(1);
Changbin Du4b2dbbc2017-08-02 15:06:37 +08001732 }
Changbin Duede9d0c2018-01-30 19:19:40 +08001733
1734 /* walk the shadow page table and get gpa from guest entry */
1735 for (i = 0; i < levels; i++) {
1736 ret = ppgtt_get_next_level_entry(mm, &e, gma_index[i],
1737 (i == levels - 1));
1738 if (ret)
1739 goto err;
1740
1741 if (!pte_ops->test_present(&e)) {
1742 gvt_dbg_core("GMA 0x%lx is not present\n", gma);
1743 goto err;
1744 }
1745 }
1746
1747 gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT) +
1748 (gma & ~I915_GTT_PAGE_MASK);
1749 trace_gma_translate(vgpu->id, "ppgtt", 0,
1750 mm->ppgtt_mm.root_entry_type, gma, gpa);
Zhi Wang2707e442016-03-28 23:23:16 +08001751 }
1752
Zhi Wang2707e442016-03-28 23:23:16 +08001753 return gpa;
1754err:
Tina Zhang695fbc02017-03-10 04:26:53 -05001755 gvt_vgpu_err("invalid mm type: %d gma %lx\n", mm->type, gma);
Zhi Wang2707e442016-03-28 23:23:16 +08001756 return INTEL_GVT_INVALID_ADDR;
1757}
1758
Changbin Dua143cef2018-01-30 19:19:45 +08001759static int emulate_ggtt_mmio_read(struct intel_vgpu *vgpu,
Zhi Wang2707e442016-03-28 23:23:16 +08001760 unsigned int off, void *p_data, unsigned int bytes)
1761{
1762 struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm;
1763 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
1764 unsigned long index = off >> info->gtt_entry_size_shift;
1765 struct intel_gvt_gtt_entry e;
1766
1767 if (bytes != 4 && bytes != 8)
1768 return -EINVAL;
1769
1770 ggtt_get_guest_entry(ggtt_mm, &e, index);
1771 memcpy(p_data, (void *)&e.val64 + (off & (info->gtt_entry_size - 1)),
1772 bytes);
1773 return 0;
1774}
1775
1776/**
1777 * intel_vgpu_emulate_gtt_mmio_read - emulate GTT MMIO register read
1778 * @vgpu: a vGPU
1779 * @off: register offset
1780 * @p_data: data will be returned to guest
1781 * @bytes: data length
1782 *
1783 * This function is used to emulate the GTT MMIO register read
1784 *
1785 * Returns:
1786 * Zero on success, error code if failed.
1787 */
Changbin Dua143cef2018-01-30 19:19:45 +08001788int intel_vgpu_emulate_ggtt_mmio_read(struct intel_vgpu *vgpu, unsigned int off,
Zhi Wang2707e442016-03-28 23:23:16 +08001789 void *p_data, unsigned int bytes)
1790{
1791 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
1792 int ret;
1793
1794 if (bytes != 4 && bytes != 8)
1795 return -EINVAL;
1796
1797 off -= info->gtt_start_offset;
Changbin Dua143cef2018-01-30 19:19:45 +08001798 ret = emulate_ggtt_mmio_read(vgpu, off, p_data, bytes);
Zhi Wang2707e442016-03-28 23:23:16 +08001799 return ret;
1800}
1801
Changbin Dua143cef2018-01-30 19:19:45 +08001802static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
Zhi Wang2707e442016-03-28 23:23:16 +08001803 void *p_data, unsigned int bytes)
1804{
1805 struct intel_gvt *gvt = vgpu->gvt;
1806 const struct intel_gvt_device_info *info = &gvt->device_info;
1807 struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm;
1808 struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
1809 unsigned long g_gtt_index = off >> info->gtt_entry_size_shift;
Changbin Du72f03d72018-01-30 19:19:48 +08001810 unsigned long gma, gfn, mfn;
Zhi Wang2707e442016-03-28 23:23:16 +08001811 struct intel_gvt_gtt_entry e, m;
Zhi Wang2707e442016-03-28 23:23:16 +08001812
1813 if (bytes != 4 && bytes != 8)
1814 return -EINVAL;
1815
Zhi Wang9556e112017-10-10 13:51:32 +08001816 gma = g_gtt_index << I915_GTT_PAGE_SHIFT;
Zhi Wang2707e442016-03-28 23:23:16 +08001817
1818 /* the VM may configure the whole GM space when ballooning is used */
Zhao, Xinda7c281352017-02-21 15:54:56 +08001819 if (!vgpu_gmadr_is_valid(vgpu, gma))
Zhi Wang2707e442016-03-28 23:23:16 +08001820 return 0;
Zhi Wang2707e442016-03-28 23:23:16 +08001821
1822 ggtt_get_guest_entry(ggtt_mm, &e, g_gtt_index);
1823
1824 memcpy((void *)&e.val64 + (off & (info->gtt_entry_size - 1)), p_data,
1825 bytes);
Changbin Du72f03d72018-01-30 19:19:48 +08001826 m = e;
Zhi Wang2707e442016-03-28 23:23:16 +08001827
1828 if (ops->test_present(&e)) {
Hang Yuancc753fb2017-12-22 18:06:31 +08001829 gfn = ops->get_pfn(&e);
1830
1831 /* one PTE update may be issued in multiple writes and the
1832 * first write may not construct a valid gfn
1833 */
1834 if (!intel_gvt_hypervisor_is_valid_gfn(vgpu, gfn)) {
1835 ops->set_pfn(&m, gvt->gtt.scratch_mfn);
1836 goto out;
1837 }
1838
Changbin Du72f03d72018-01-30 19:19:48 +08001839 mfn = intel_gvt_hypervisor_gfn_to_mfn(vgpu, gfn);
1840 if (mfn == INTEL_GVT_INVALID_ADDR) {
1841 gvt_vgpu_err("fail to populate guest ggtt entry\n");
Xiaoguang Chen359b6932017-03-21 10:54:21 +08001842 /* guest driver may read/write the entry when partial
1843 * update the entry in this situation p2m will fail
1844 * settting the shadow entry to point to a scratch page
1845 */
Zhi Wang22115ce2017-10-10 14:34:11 +08001846 ops->set_pfn(&m, gvt->gtt.scratch_mfn);
Changbin Du72f03d72018-01-30 19:19:48 +08001847 } else
1848 ops->set_pfn(&m, mfn);
1849 } else
Zhi Wang22115ce2017-10-10 14:34:11 +08001850 ops->set_pfn(&m, gvt->gtt.scratch_mfn);
Zhi Wang2707e442016-03-28 23:23:16 +08001851
Hang Yuancc753fb2017-12-22 18:06:31 +08001852out:
Changbin Du3aff3512018-01-30 19:19:42 +08001853 ggtt_set_host_entry(ggtt_mm, &m, g_gtt_index);
Changbin Dua143cef2018-01-30 19:19:45 +08001854 ggtt_invalidate(gvt->dev_priv);
Zhi Wang2707e442016-03-28 23:23:16 +08001855 ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index);
1856 return 0;
1857}
1858
1859/*
Changbin Dua143cef2018-01-30 19:19:45 +08001860 * intel_vgpu_emulate_ggtt_mmio_write - emulate GTT MMIO register write
Zhi Wang2707e442016-03-28 23:23:16 +08001861 * @vgpu: a vGPU
1862 * @off: register offset
1863 * @p_data: data from guest write
1864 * @bytes: data length
1865 *
1866 * This function is used to emulate the GTT MMIO register write
1867 *
1868 * Returns:
1869 * Zero on success, error code if failed.
1870 */
Changbin Dua143cef2018-01-30 19:19:45 +08001871int intel_vgpu_emulate_ggtt_mmio_write(struct intel_vgpu *vgpu,
1872 unsigned int off, void *p_data, unsigned int bytes)
Zhi Wang2707e442016-03-28 23:23:16 +08001873{
1874 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
1875 int ret;
1876
1877 if (bytes != 4 && bytes != 8)
1878 return -EINVAL;
1879
1880 off -= info->gtt_start_offset;
Changbin Dua143cef2018-01-30 19:19:45 +08001881 ret = emulate_ggtt_mmio_write(vgpu, off, p_data, bytes);
Zhi Wang2707e442016-03-28 23:23:16 +08001882 return ret;
1883}
1884
Ping Gao3b6411c2016-11-04 13:47:35 +08001885static int alloc_scratch_pages(struct intel_vgpu *vgpu,
1886 intel_gvt_gtt_type_t type)
Zhi Wang2707e442016-03-28 23:23:16 +08001887{
1888 struct intel_vgpu_gtt *gtt = &vgpu->gtt;
Ping Gao3b6411c2016-11-04 13:47:35 +08001889 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
Zhenyu Wang5c352582017-11-02 17:44:52 +08001890 int page_entry_num = I915_GTT_PAGE_SIZE >>
Ping Gao3b6411c2016-11-04 13:47:35 +08001891 vgpu->gvt->device_info.gtt_entry_size_shift;
Jike Song96317392017-01-09 15:38:38 +08001892 void *scratch_pt;
Ping Gao3b6411c2016-11-04 13:47:35 +08001893 int i;
Chuanxiao Dong5de6bd42017-02-09 11:37:11 +08001894 struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev;
1895 dma_addr_t daddr;
Zhi Wang2707e442016-03-28 23:23:16 +08001896
Ping Gao3b6411c2016-11-04 13:47:35 +08001897 if (WARN_ON(type < GTT_TYPE_PPGTT_PTE_PT || type >= GTT_TYPE_MAX))
1898 return -EINVAL;
1899
Jike Song96317392017-01-09 15:38:38 +08001900 scratch_pt = (void *)get_zeroed_page(GFP_KERNEL);
Ping Gao3b6411c2016-11-04 13:47:35 +08001901 if (!scratch_pt) {
Tina Zhang695fbc02017-03-10 04:26:53 -05001902 gvt_vgpu_err("fail to allocate scratch page\n");
Zhi Wang2707e442016-03-28 23:23:16 +08001903 return -ENOMEM;
1904 }
1905
Chuanxiao Dong5de6bd42017-02-09 11:37:11 +08001906 daddr = dma_map_page(dev, virt_to_page(scratch_pt), 0,
1907 4096, PCI_DMA_BIDIRECTIONAL);
1908 if (dma_mapping_error(dev, daddr)) {
Tina Zhang695fbc02017-03-10 04:26:53 -05001909 gvt_vgpu_err("fail to dmamap scratch_pt\n");
Chuanxiao Dong5de6bd42017-02-09 11:37:11 +08001910 __free_page(virt_to_page(scratch_pt));
1911 return -ENOMEM;
Ping Gao3b6411c2016-11-04 13:47:35 +08001912 }
Chuanxiao Dong5de6bd42017-02-09 11:37:11 +08001913 gtt->scratch_pt[type].page_mfn =
Zhenyu Wang5c352582017-11-02 17:44:52 +08001914 (unsigned long)(daddr >> I915_GTT_PAGE_SHIFT);
Jike Song96317392017-01-09 15:38:38 +08001915 gtt->scratch_pt[type].page = virt_to_page(scratch_pt);
Ping Gao3b6411c2016-11-04 13:47:35 +08001916 gvt_dbg_mm("vgpu%d create scratch_pt: type %d mfn=0x%lx\n",
Chuanxiao Dong5de6bd42017-02-09 11:37:11 +08001917 vgpu->id, type, gtt->scratch_pt[type].page_mfn);
Ping Gao3b6411c2016-11-04 13:47:35 +08001918
1919 /* Build the tree by full filled the scratch pt with the entries which
1920 * point to the next level scratch pt or scratch page. The
1921 * scratch_pt[type] indicate the scratch pt/scratch page used by the
1922 * 'type' pt.
1923 * e.g. scratch_pt[GTT_TYPE_PPGTT_PDE_PT] is used by
Jike Song96317392017-01-09 15:38:38 +08001924 * GTT_TYPE_PPGTT_PDE_PT level pt, that means this scratch_pt it self
Ping Gao3b6411c2016-11-04 13:47:35 +08001925 * is GTT_TYPE_PPGTT_PTE_PT, and full filled by scratch page mfn.
1926 */
1927 if (type > GTT_TYPE_PPGTT_PTE_PT && type < GTT_TYPE_MAX) {
1928 struct intel_gvt_gtt_entry se;
1929
1930 memset(&se, 0, sizeof(struct intel_gvt_gtt_entry));
1931 se.type = get_entry_type(type - 1);
1932 ops->set_pfn(&se, gtt->scratch_pt[type - 1].page_mfn);
1933
1934 /* The entry parameters like present/writeable/cache type
1935 * set to the same as i915's scratch page tree.
1936 */
1937 se.val64 |= _PAGE_PRESENT | _PAGE_RW;
1938 if (type == GTT_TYPE_PPGTT_PDE_PT)
Zhi Wangc095b972017-09-14 20:39:41 +08001939 se.val64 |= PPAT_CACHED;
Ping Gao3b6411c2016-11-04 13:47:35 +08001940
1941 for (i = 0; i < page_entry_num; i++)
Jike Song96317392017-01-09 15:38:38 +08001942 ops->set_entry(scratch_pt, &se, i, false, 0, vgpu);
Zhi Wang2707e442016-03-28 23:23:16 +08001943 }
1944
Zhi Wang2707e442016-03-28 23:23:16 +08001945 return 0;
1946}
1947
Ping Gao3b6411c2016-11-04 13:47:35 +08001948static int release_scratch_page_tree(struct intel_vgpu *vgpu)
Zhi Wang2707e442016-03-28 23:23:16 +08001949{
Ping Gao3b6411c2016-11-04 13:47:35 +08001950 int i;
Chuanxiao Dong5de6bd42017-02-09 11:37:11 +08001951 struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev;
1952 dma_addr_t daddr;
Ping Gao3b6411c2016-11-04 13:47:35 +08001953
1954 for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) {
1955 if (vgpu->gtt.scratch_pt[i].page != NULL) {
Chuanxiao Dong5de6bd42017-02-09 11:37:11 +08001956 daddr = (dma_addr_t)(vgpu->gtt.scratch_pt[i].page_mfn <<
Zhenyu Wang5c352582017-11-02 17:44:52 +08001957 I915_GTT_PAGE_SHIFT);
Chuanxiao Dong5de6bd42017-02-09 11:37:11 +08001958 dma_unmap_page(dev, daddr, 4096, PCI_DMA_BIDIRECTIONAL);
Ping Gao3b6411c2016-11-04 13:47:35 +08001959 __free_page(vgpu->gtt.scratch_pt[i].page);
1960 vgpu->gtt.scratch_pt[i].page = NULL;
1961 vgpu->gtt.scratch_pt[i].page_mfn = 0;
1962 }
Zhi Wang2707e442016-03-28 23:23:16 +08001963 }
Ping Gao3b6411c2016-11-04 13:47:35 +08001964
1965 return 0;
1966}
1967
1968static int create_scratch_page_tree(struct intel_vgpu *vgpu)
1969{
1970 int i, ret;
1971
1972 for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) {
1973 ret = alloc_scratch_pages(vgpu, i);
1974 if (ret)
1975 goto err;
1976 }
1977
1978 return 0;
1979
1980err:
1981 release_scratch_page_tree(vgpu);
1982 return ret;
Zhi Wang2707e442016-03-28 23:23:16 +08001983}
1984
1985/**
1986 * intel_vgpu_init_gtt - initialize per-vGPU graphics memory virulization
1987 * @vgpu: a vGPU
1988 *
1989 * This function is used to initialize per-vGPU graphics memory virtualization
1990 * components.
1991 *
1992 * Returns:
1993 * Zero on success, error code if failed.
1994 */
1995int intel_vgpu_init_gtt(struct intel_vgpu *vgpu)
1996{
1997 struct intel_vgpu_gtt *gtt = &vgpu->gtt;
Zhi Wang2707e442016-03-28 23:23:16 +08001998
Changbin Dub6c126a2018-01-30 19:19:54 +08001999 INIT_RADIX_TREE(&gtt->spt_tree, GFP_KERNEL);
Zhi Wang2707e442016-03-28 23:23:16 +08002000
Changbin Duede9d0c2018-01-30 19:19:40 +08002001 INIT_LIST_HEAD(&gtt->ppgtt_mm_list_head);
Zhi Wang2707e442016-03-28 23:23:16 +08002002 INIT_LIST_HEAD(&gtt->oos_page_list_head);
2003 INIT_LIST_HEAD(&gtt->post_shadow_list_head);
2004
Changbin Duede9d0c2018-01-30 19:19:40 +08002005 gtt->ggtt_mm = intel_vgpu_create_ggtt_mm(vgpu);
2006 if (IS_ERR(gtt->ggtt_mm)) {
Tina Zhang695fbc02017-03-10 04:26:53 -05002007 gvt_vgpu_err("fail to create mm for ggtt.\n");
Changbin Duede9d0c2018-01-30 19:19:40 +08002008 return PTR_ERR(gtt->ggtt_mm);
Zhi Wang2707e442016-03-28 23:23:16 +08002009 }
2010
Changbin Duede9d0c2018-01-30 19:19:40 +08002011 intel_vgpu_reset_ggtt(vgpu);
Zhi Wang2707e442016-03-28 23:23:16 +08002012
Ping Gao3b6411c2016-11-04 13:47:35 +08002013 return create_scratch_page_tree(vgpu);
Zhi Wang2707e442016-03-28 23:23:16 +08002014}
2015
Changbin Duede9d0c2018-01-30 19:19:40 +08002016static void intel_vgpu_destroy_all_ppgtt_mm(struct intel_vgpu *vgpu)
Ping Gaoda9cc8d2017-02-21 15:52:56 +08002017{
2018 struct list_head *pos, *n;
2019 struct intel_vgpu_mm *mm;
2020
Changbin Duede9d0c2018-01-30 19:19:40 +08002021 list_for_each_safe(pos, n, &vgpu->gtt.ppgtt_mm_list_head) {
2022 mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list);
Changbin Du1bc25852018-01-30 19:19:41 +08002023 intel_vgpu_destroy_mm(mm);
Ping Gaoda9cc8d2017-02-21 15:52:56 +08002024 }
Changbin Duede9d0c2018-01-30 19:19:40 +08002025
2026 if (GEM_WARN_ON(!list_empty(&vgpu->gtt.ppgtt_mm_list_head)))
2027 gvt_err("vgpu ppgtt mm is not fully destoried\n");
2028
Changbin Dub6c126a2018-01-30 19:19:54 +08002029 if (GEM_WARN_ON(!radix_tree_empty(&vgpu->gtt.spt_tree))) {
Changbin Duede9d0c2018-01-30 19:19:40 +08002030 gvt_err("Why we still has spt not freed?\n");
Changbin Dud87f5ff2018-01-30 19:19:50 +08002031 ppgtt_free_all_spt(vgpu);
Changbin Duede9d0c2018-01-30 19:19:40 +08002032 }
2033}
2034
2035static void intel_vgpu_destroy_ggtt_mm(struct intel_vgpu *vgpu)
2036{
Changbin Du1bc25852018-01-30 19:19:41 +08002037 intel_vgpu_destroy_mm(vgpu->gtt.ggtt_mm);
Changbin Duede9d0c2018-01-30 19:19:40 +08002038 vgpu->gtt.ggtt_mm = NULL;
Ping Gaoda9cc8d2017-02-21 15:52:56 +08002039}
2040
Zhi Wang2707e442016-03-28 23:23:16 +08002041/**
2042 * intel_vgpu_clean_gtt - clean up per-vGPU graphics memory virulization
2043 * @vgpu: a vGPU
2044 *
2045 * This function is used to clean up per-vGPU graphics memory virtualization
2046 * components.
2047 *
2048 * Returns:
2049 * Zero on success, error code if failed.
2050 */
2051void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu)
2052{
Changbin Duede9d0c2018-01-30 19:19:40 +08002053 intel_vgpu_destroy_all_ppgtt_mm(vgpu);
2054 intel_vgpu_destroy_ggtt_mm(vgpu);
Ping Gao3b6411c2016-11-04 13:47:35 +08002055 release_scratch_page_tree(vgpu);
Zhi Wang2707e442016-03-28 23:23:16 +08002056}
2057
2058static void clean_spt_oos(struct intel_gvt *gvt)
2059{
2060 struct intel_gvt_gtt *gtt = &gvt->gtt;
2061 struct list_head *pos, *n;
2062 struct intel_vgpu_oos_page *oos_page;
2063
2064 WARN(!list_empty(&gtt->oos_page_use_list_head),
2065 "someone is still using oos page\n");
2066
2067 list_for_each_safe(pos, n, &gtt->oos_page_free_list_head) {
2068 oos_page = container_of(pos, struct intel_vgpu_oos_page, list);
2069 list_del(&oos_page->list);
2070 kfree(oos_page);
2071 }
2072}
2073
2074static int setup_spt_oos(struct intel_gvt *gvt)
2075{
2076 struct intel_gvt_gtt *gtt = &gvt->gtt;
2077 struct intel_vgpu_oos_page *oos_page;
2078 int i;
2079 int ret;
2080
2081 INIT_LIST_HEAD(&gtt->oos_page_free_list_head);
2082 INIT_LIST_HEAD(&gtt->oos_page_use_list_head);
2083
2084 for (i = 0; i < preallocated_oos_pages; i++) {
2085 oos_page = kzalloc(sizeof(*oos_page), GFP_KERNEL);
2086 if (!oos_page) {
Zhi Wang2707e442016-03-28 23:23:16 +08002087 ret = -ENOMEM;
2088 goto fail;
2089 }
2090
2091 INIT_LIST_HEAD(&oos_page->list);
2092 INIT_LIST_HEAD(&oos_page->vm_list);
2093 oos_page->id = i;
2094 list_add_tail(&oos_page->list, &gtt->oos_page_free_list_head);
2095 }
2096
2097 gvt_dbg_mm("%d oos pages preallocated\n", i);
2098
2099 return 0;
2100fail:
2101 clean_spt_oos(gvt);
2102 return ret;
2103}
2104
2105/**
2106 * intel_vgpu_find_ppgtt_mm - find a PPGTT mm object
2107 * @vgpu: a vGPU
2108 * @page_table_level: PPGTT page table level
2109 * @root_entry: PPGTT page table root pointers
2110 *
2111 * This function is used to find a PPGTT mm object from mm object pool
2112 *
2113 * Returns:
2114 * pointer to mm object on success, NULL if failed.
2115 */
2116struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu,
Changbin Duede9d0c2018-01-30 19:19:40 +08002117 u64 pdps[])
Zhi Wang2707e442016-03-28 23:23:16 +08002118{
Zhi Wang2707e442016-03-28 23:23:16 +08002119 struct intel_vgpu_mm *mm;
Changbin Duede9d0c2018-01-30 19:19:40 +08002120 struct list_head *pos;
Zhi Wang2707e442016-03-28 23:23:16 +08002121
Changbin Duede9d0c2018-01-30 19:19:40 +08002122 list_for_each(pos, &vgpu->gtt.ppgtt_mm_list_head) {
2123 mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list);
Zhi Wang2707e442016-03-28 23:23:16 +08002124
Changbin Duede9d0c2018-01-30 19:19:40 +08002125 switch (mm->ppgtt_mm.root_entry_type) {
2126 case GTT_TYPE_PPGTT_ROOT_L4_ENTRY:
2127 if (pdps[0] == mm->ppgtt_mm.guest_pdps[0])
Zhi Wang2707e442016-03-28 23:23:16 +08002128 return mm;
Changbin Duede9d0c2018-01-30 19:19:40 +08002129 break;
2130 case GTT_TYPE_PPGTT_ROOT_L3_ENTRY:
2131 if (!memcmp(pdps, mm->ppgtt_mm.guest_pdps,
2132 sizeof(mm->ppgtt_mm.guest_pdps)))
Zhi Wang2707e442016-03-28 23:23:16 +08002133 return mm;
Changbin Duede9d0c2018-01-30 19:19:40 +08002134 break;
2135 default:
2136 GEM_BUG_ON(1);
Zhi Wang2707e442016-03-28 23:23:16 +08002137 }
2138 }
2139 return NULL;
2140}
2141
2142/**
Changbin Due6e9c462018-01-30 19:19:46 +08002143 * intel_vgpu_get_ppgtt_mm - get or create a PPGTT mm object.
Zhi Wang2707e442016-03-28 23:23:16 +08002144 * @vgpu: a vGPU
Changbin Duede9d0c2018-01-30 19:19:40 +08002145 * @root_entry_type: ppgtt root entry type
2146 * @pdps: guest pdps
Zhi Wang2707e442016-03-28 23:23:16 +08002147 *
Changbin Due6e9c462018-01-30 19:19:46 +08002148 * This function is used to find or create a PPGTT mm object from a guest.
Zhi Wang2707e442016-03-28 23:23:16 +08002149 *
2150 * Returns:
2151 * Zero on success, negative error code if failed.
2152 */
Changbin Due6e9c462018-01-30 19:19:46 +08002153struct intel_vgpu_mm *intel_vgpu_get_ppgtt_mm(struct intel_vgpu *vgpu,
Changbin Duede9d0c2018-01-30 19:19:40 +08002154 intel_gvt_gtt_type_t root_entry_type, u64 pdps[])
Zhi Wang2707e442016-03-28 23:23:16 +08002155{
Zhi Wang2707e442016-03-28 23:23:16 +08002156 struct intel_vgpu_mm *mm;
2157
Changbin Duede9d0c2018-01-30 19:19:40 +08002158 mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps);
Zhi Wang2707e442016-03-28 23:23:16 +08002159 if (mm) {
Changbin Du1bc25852018-01-30 19:19:41 +08002160 intel_vgpu_mm_get(mm);
Zhi Wang2707e442016-03-28 23:23:16 +08002161 } else {
Changbin Duede9d0c2018-01-30 19:19:40 +08002162 mm = intel_vgpu_create_ppgtt_mm(vgpu, root_entry_type, pdps);
Changbin Due6e9c462018-01-30 19:19:46 +08002163 if (IS_ERR(mm))
Tina Zhang695fbc02017-03-10 04:26:53 -05002164 gvt_vgpu_err("fail to create mm\n");
Zhi Wang2707e442016-03-28 23:23:16 +08002165 }
Changbin Due6e9c462018-01-30 19:19:46 +08002166 return mm;
Zhi Wang2707e442016-03-28 23:23:16 +08002167}
2168
2169/**
Changbin Due6e9c462018-01-30 19:19:46 +08002170 * intel_vgpu_put_ppgtt_mm - find and put a PPGTT mm object.
Zhi Wang2707e442016-03-28 23:23:16 +08002171 * @vgpu: a vGPU
Changbin Duede9d0c2018-01-30 19:19:40 +08002172 * @pdps: guest pdps
Zhi Wang2707e442016-03-28 23:23:16 +08002173 *
Changbin Due6e9c462018-01-30 19:19:46 +08002174 * This function is used to find a PPGTT mm object from a guest and destroy it.
Zhi Wang2707e442016-03-28 23:23:16 +08002175 *
2176 * Returns:
2177 * Zero on success, negative error code if failed.
2178 */
Changbin Due6e9c462018-01-30 19:19:46 +08002179int intel_vgpu_put_ppgtt_mm(struct intel_vgpu *vgpu, u64 pdps[])
Zhi Wang2707e442016-03-28 23:23:16 +08002180{
Zhi Wang2707e442016-03-28 23:23:16 +08002181 struct intel_vgpu_mm *mm;
2182
Changbin Duede9d0c2018-01-30 19:19:40 +08002183 mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps);
Zhi Wang2707e442016-03-28 23:23:16 +08002184 if (!mm) {
Tina Zhang695fbc02017-03-10 04:26:53 -05002185 gvt_vgpu_err("fail to find ppgtt instance.\n");
Zhi Wang2707e442016-03-28 23:23:16 +08002186 return -EINVAL;
2187 }
Changbin Du1bc25852018-01-30 19:19:41 +08002188 intel_vgpu_mm_put(mm);
Zhi Wang2707e442016-03-28 23:23:16 +08002189 return 0;
2190}
2191
2192/**
2193 * intel_gvt_init_gtt - initialize mm components of a GVT device
2194 * @gvt: GVT device
2195 *
2196 * This function is called at the initialization stage, to initialize
2197 * the mm components of a GVT device.
2198 *
2199 * Returns:
2200 * zero on success, negative error code if failed.
2201 */
2202int intel_gvt_init_gtt(struct intel_gvt *gvt)
2203{
2204 int ret;
Jike Song96317392017-01-09 15:38:38 +08002205 void *page;
Chuanxiao Dong5de6bd42017-02-09 11:37:11 +08002206 struct device *dev = &gvt->dev_priv->drm.pdev->dev;
2207 dma_addr_t daddr;
Zhi Wang2707e442016-03-28 23:23:16 +08002208
2209 gvt_dbg_core("init gtt\n");
2210
Xu Hane3476c02017-03-29 10:13:59 +08002211 if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)
2212 || IS_KABYLAKE(gvt->dev_priv)) {
Zhi Wang2707e442016-03-28 23:23:16 +08002213 gvt->gtt.pte_ops = &gen8_gtt_pte_ops;
2214 gvt->gtt.gma_ops = &gen8_gtt_gma_ops;
Zhi Wang2707e442016-03-28 23:23:16 +08002215 } else {
2216 return -ENODEV;
2217 }
2218
Jike Song96317392017-01-09 15:38:38 +08002219 page = (void *)get_zeroed_page(GFP_KERNEL);
2220 if (!page) {
Ping Gaod650ac02016-12-08 10:14:48 +08002221 gvt_err("fail to allocate scratch ggtt page\n");
2222 return -ENOMEM;
2223 }
2224
Chuanxiao Dong5de6bd42017-02-09 11:37:11 +08002225 daddr = dma_map_page(dev, virt_to_page(page), 0,
2226 4096, PCI_DMA_BIDIRECTIONAL);
2227 if (dma_mapping_error(dev, daddr)) {
2228 gvt_err("fail to dmamap scratch ggtt page\n");
2229 __free_page(virt_to_page(page));
2230 return -ENOMEM;
Ping Gaod650ac02016-12-08 10:14:48 +08002231 }
Zhi Wang22115ce2017-10-10 14:34:11 +08002232
2233 gvt->gtt.scratch_page = virt_to_page(page);
2234 gvt->gtt.scratch_mfn = (unsigned long)(daddr >> I915_GTT_PAGE_SHIFT);
Ping Gaod650ac02016-12-08 10:14:48 +08002235
Zhi Wang2707e442016-03-28 23:23:16 +08002236 if (enable_out_of_sync) {
2237 ret = setup_spt_oos(gvt);
2238 if (ret) {
2239 gvt_err("fail to initialize SPT oos\n");
Zhou, Wenjia0de98702017-07-04 15:47:00 +08002240 dma_unmap_page(dev, daddr, 4096, PCI_DMA_BIDIRECTIONAL);
Zhi Wang22115ce2017-10-10 14:34:11 +08002241 __free_page(gvt->gtt.scratch_page);
Zhi Wang2707e442016-03-28 23:23:16 +08002242 return ret;
2243 }
2244 }
Changbin Duede9d0c2018-01-30 19:19:40 +08002245 INIT_LIST_HEAD(&gvt->gtt.ppgtt_mm_lru_list_head);
Zhi Wang2707e442016-03-28 23:23:16 +08002246 return 0;
2247}
2248
2249/**
2250 * intel_gvt_clean_gtt - clean up mm components of a GVT device
2251 * @gvt: GVT device
2252 *
2253 * This function is called at the driver unloading stage, to clean up the
2254 * the mm components of a GVT device.
2255 *
2256 */
2257void intel_gvt_clean_gtt(struct intel_gvt *gvt)
2258{
Chuanxiao Dong5de6bd42017-02-09 11:37:11 +08002259 struct device *dev = &gvt->dev_priv->drm.pdev->dev;
Zhi Wang22115ce2017-10-10 14:34:11 +08002260 dma_addr_t daddr = (dma_addr_t)(gvt->gtt.scratch_mfn <<
Zhi Wang9556e112017-10-10 13:51:32 +08002261 I915_GTT_PAGE_SHIFT);
Chuanxiao Dong5de6bd42017-02-09 11:37:11 +08002262
2263 dma_unmap_page(dev, daddr, 4096, PCI_DMA_BIDIRECTIONAL);
2264
Zhi Wang22115ce2017-10-10 14:34:11 +08002265 __free_page(gvt->gtt.scratch_page);
Ping Gaod650ac02016-12-08 10:14:48 +08002266
Zhi Wang2707e442016-03-28 23:23:16 +08002267 if (enable_out_of_sync)
2268 clean_spt_oos(gvt);
2269}
Ping Gaod650ac02016-12-08 10:14:48 +08002270
2271/**
2272 * intel_vgpu_reset_ggtt - reset the GGTT entry
2273 * @vgpu: a vGPU
2274 *
2275 * This function is called at the vGPU create stage
2276 * to reset all the GGTT entries.
2277 *
2278 */
2279void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu)
2280{
2281 struct intel_gvt *gvt = vgpu->gvt;
Zhenyu Wang5ad59bf2017-04-12 16:24:57 +08002282 struct drm_i915_private *dev_priv = gvt->dev_priv;
Changbin Dub0c766b2018-01-30 19:19:43 +08002283 struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
2284 struct intel_gvt_gtt_entry entry = {.type = GTT_TYPE_GGTT_PTE};
Ping Gaod650ac02016-12-08 10:14:48 +08002285 u32 index;
Ping Gaod650ac02016-12-08 10:14:48 +08002286 u32 num_entries;
Ping Gaod650ac02016-12-08 10:14:48 +08002287
Changbin Dub0c766b2018-01-30 19:19:43 +08002288 pte_ops->set_pfn(&entry, gvt->gtt.scratch_mfn);
2289 pte_ops->set_present(&entry);
Ping Gaod650ac02016-12-08 10:14:48 +08002290
2291 index = vgpu_aperture_gmadr_base(vgpu) >> PAGE_SHIFT;
2292 num_entries = vgpu_aperture_sz(vgpu) >> PAGE_SHIFT;
Changbin Dub0c766b2018-01-30 19:19:43 +08002293 while (num_entries--)
2294 ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++);
Ping Gaod650ac02016-12-08 10:14:48 +08002295
2296 index = vgpu_hidden_gmadr_base(vgpu) >> PAGE_SHIFT;
2297 num_entries = vgpu_hidden_sz(vgpu) >> PAGE_SHIFT;
Changbin Dub0c766b2018-01-30 19:19:43 +08002298 while (num_entries--)
2299 ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++);
Zhenyu Wang5ad59bf2017-04-12 16:24:57 +08002300
Changbin Dua143cef2018-01-30 19:19:45 +08002301 ggtt_invalidate(dev_priv);
Ping Gaod650ac02016-12-08 10:14:48 +08002302}
Changbin Dub6115812017-01-13 11:15:57 +08002303
2304/**
2305 * intel_vgpu_reset_gtt - reset the all GTT related status
2306 * @vgpu: a vGPU
Changbin Dub6115812017-01-13 11:15:57 +08002307 *
2308 * This function is called from vfio core to reset reset all
2309 * GTT related status, including GGTT, PPGTT, scratch page.
2310 *
2311 */
Chuanxiao Dong4d3e67b2017-08-04 13:08:59 +08002312void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu)
Changbin Dub6115812017-01-13 11:15:57 +08002313{
Ping Gaoda9cc8d2017-02-21 15:52:56 +08002314 /* Shadow pages are only created when there is no page
2315 * table tracking data, so remove page tracking data after
2316 * removing the shadow pages.
2317 */
Changbin Duede9d0c2018-01-30 19:19:40 +08002318 intel_vgpu_destroy_all_ppgtt_mm(vgpu);
Changbin Dub6115812017-01-13 11:15:57 +08002319 intel_vgpu_reset_ggtt(vgpu);
Changbin Dub6115812017-01-13 11:15:57 +08002320}