blob: 13eb0572afdb65d64d08352956eea8bbcde25b75 [file] [log] [blame]
Zhi Wang2707e442016-03-28 23:23:16 +08001/*
2 * GTT virtualization
3 *
4 * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 * Zhi Wang <zhi.a.wang@intel.com>
27 * Zhenyu Wang <zhenyuw@linux.intel.com>
28 * Xiao Zheng <xiao.zheng@intel.com>
29 *
30 * Contributors:
31 * Min He <min.he@intel.com>
32 * Bing Niu <bing.niu@intel.com>
33 *
34 */
35
36#include "i915_drv.h"
Zhenyu Wangfeddf6e2016-10-20 17:15:03 +080037#include "gvt.h"
38#include "i915_pvinfo.h"
Zhi Wang2707e442016-03-28 23:23:16 +080039#include "trace.h"
40
Changbin Dubc37ab52018-01-30 19:19:44 +080041#if defined(VERBOSE_DEBUG)
42#define gvt_vdbg_mm(fmt, args...) gvt_dbg_mm(fmt, ##args)
43#else
44#define gvt_vdbg_mm(fmt, args...)
45#endif
46
Zhi Wang2707e442016-03-28 23:23:16 +080047static bool enable_out_of_sync = false;
48static int preallocated_oos_pages = 8192;
49
50/*
51 * validate a gm address and related range size,
52 * translate it to host gm address
53 */
54bool intel_gvt_ggtt_validate_range(struct intel_vgpu *vgpu, u64 addr, u32 size)
55{
56 if ((!vgpu_gmadr_is_valid(vgpu, addr)) || (size
57 && !vgpu_gmadr_is_valid(vgpu, addr + size - 1))) {
Tina Zhang695fbc02017-03-10 04:26:53 -050058 gvt_vgpu_err("invalid range gmadr 0x%llx size 0x%x\n",
59 addr, size);
Zhi Wang2707e442016-03-28 23:23:16 +080060 return false;
61 }
62 return true;
63}
64
65/* translate a guest gmadr to host gmadr */
66int intel_gvt_ggtt_gmadr_g2h(struct intel_vgpu *vgpu, u64 g_addr, u64 *h_addr)
67{
68 if (WARN(!vgpu_gmadr_is_valid(vgpu, g_addr),
69 "invalid guest gmadr %llx\n", g_addr))
70 return -EACCES;
71
72 if (vgpu_gmadr_is_aperture(vgpu, g_addr))
73 *h_addr = vgpu_aperture_gmadr_base(vgpu)
74 + (g_addr - vgpu_aperture_offset(vgpu));
75 else
76 *h_addr = vgpu_hidden_gmadr_base(vgpu)
77 + (g_addr - vgpu_hidden_offset(vgpu));
78 return 0;
79}
80
81/* translate a host gmadr to guest gmadr */
82int intel_gvt_ggtt_gmadr_h2g(struct intel_vgpu *vgpu, u64 h_addr, u64 *g_addr)
83{
84 if (WARN(!gvt_gmadr_is_valid(vgpu->gvt, h_addr),
85 "invalid host gmadr %llx\n", h_addr))
86 return -EACCES;
87
88 if (gvt_gmadr_is_aperture(vgpu->gvt, h_addr))
89 *g_addr = vgpu_aperture_gmadr_base(vgpu)
90 + (h_addr - gvt_aperture_gmadr_base(vgpu->gvt));
91 else
92 *g_addr = vgpu_hidden_gmadr_base(vgpu)
93 + (h_addr - gvt_hidden_gmadr_base(vgpu->gvt));
94 return 0;
95}
96
97int intel_gvt_ggtt_index_g2h(struct intel_vgpu *vgpu, unsigned long g_index,
98 unsigned long *h_index)
99{
100 u64 h_addr;
101 int ret;
102
Zhi Wang9556e112017-10-10 13:51:32 +0800103 ret = intel_gvt_ggtt_gmadr_g2h(vgpu, g_index << I915_GTT_PAGE_SHIFT,
Zhi Wang2707e442016-03-28 23:23:16 +0800104 &h_addr);
105 if (ret)
106 return ret;
107
Zhi Wang9556e112017-10-10 13:51:32 +0800108 *h_index = h_addr >> I915_GTT_PAGE_SHIFT;
Zhi Wang2707e442016-03-28 23:23:16 +0800109 return 0;
110}
111
112int intel_gvt_ggtt_h2g_index(struct intel_vgpu *vgpu, unsigned long h_index,
113 unsigned long *g_index)
114{
115 u64 g_addr;
116 int ret;
117
Zhi Wang9556e112017-10-10 13:51:32 +0800118 ret = intel_gvt_ggtt_gmadr_h2g(vgpu, h_index << I915_GTT_PAGE_SHIFT,
Zhi Wang2707e442016-03-28 23:23:16 +0800119 &g_addr);
120 if (ret)
121 return ret;
122
Zhi Wang9556e112017-10-10 13:51:32 +0800123 *g_index = g_addr >> I915_GTT_PAGE_SHIFT;
Zhi Wang2707e442016-03-28 23:23:16 +0800124 return 0;
125}
126
127#define gtt_type_is_entry(type) \
128 (type > GTT_TYPE_INVALID && type < GTT_TYPE_PPGTT_ENTRY \
129 && type != GTT_TYPE_PPGTT_PTE_ENTRY \
130 && type != GTT_TYPE_PPGTT_ROOT_ENTRY)
131
132#define gtt_type_is_pt(type) \
133 (type >= GTT_TYPE_PPGTT_PTE_PT && type < GTT_TYPE_MAX)
134
135#define gtt_type_is_pte_pt(type) \
136 (type == GTT_TYPE_PPGTT_PTE_PT)
137
138#define gtt_type_is_root_pointer(type) \
139 (gtt_type_is_entry(type) && type > GTT_TYPE_PPGTT_ROOT_ENTRY)
140
141#define gtt_init_entry(e, t, p, v) do { \
142 (e)->type = t; \
143 (e)->pdev = p; \
144 memcpy(&(e)->val64, &v, sizeof(v)); \
145} while (0)
146
Zhi Wang2707e442016-03-28 23:23:16 +0800147/*
148 * Mappings between GTT_TYPE* enumerations.
149 * Following information can be found according to the given type:
150 * - type of next level page table
151 * - type of entry inside this level page table
152 * - type of entry with PSE set
153 *
154 * If the given type doesn't have such a kind of information,
155 * e.g. give a l4 root entry type, then request to get its PSE type,
156 * give a PTE page table type, then request to get its next level page
157 * table type, as we know l4 root entry doesn't have a PSE bit,
158 * and a PTE page table doesn't have a next level page table type,
159 * GTT_TYPE_INVALID will be returned. This is useful when traversing a
160 * page table.
161 */
162
163struct gtt_type_table_entry {
164 int entry_type;
Zhi Wang054f4eb2017-10-10 17:19:30 +0800165 int pt_type;
Zhi Wang2707e442016-03-28 23:23:16 +0800166 int next_pt_type;
167 int pse_entry_type;
168};
169
Zhi Wang054f4eb2017-10-10 17:19:30 +0800170#define GTT_TYPE_TABLE_ENTRY(type, e_type, cpt_type, npt_type, pse_type) \
Zhi Wang2707e442016-03-28 23:23:16 +0800171 [type] = { \
172 .entry_type = e_type, \
Zhi Wang054f4eb2017-10-10 17:19:30 +0800173 .pt_type = cpt_type, \
Zhi Wang2707e442016-03-28 23:23:16 +0800174 .next_pt_type = npt_type, \
175 .pse_entry_type = pse_type, \
176 }
177
178static struct gtt_type_table_entry gtt_type_table[] = {
179 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L4_ENTRY,
180 GTT_TYPE_PPGTT_ROOT_L4_ENTRY,
Zhi Wang054f4eb2017-10-10 17:19:30 +0800181 GTT_TYPE_INVALID,
Zhi Wang2707e442016-03-28 23:23:16 +0800182 GTT_TYPE_PPGTT_PML4_PT,
183 GTT_TYPE_INVALID),
184 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_PT,
185 GTT_TYPE_PPGTT_PML4_ENTRY,
Zhi Wang054f4eb2017-10-10 17:19:30 +0800186 GTT_TYPE_PPGTT_PML4_PT,
Zhi Wang2707e442016-03-28 23:23:16 +0800187 GTT_TYPE_PPGTT_PDP_PT,
188 GTT_TYPE_INVALID),
189 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_ENTRY,
190 GTT_TYPE_PPGTT_PML4_ENTRY,
Zhi Wang054f4eb2017-10-10 17:19:30 +0800191 GTT_TYPE_PPGTT_PML4_PT,
Zhi Wang2707e442016-03-28 23:23:16 +0800192 GTT_TYPE_PPGTT_PDP_PT,
193 GTT_TYPE_INVALID),
194 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_PT,
195 GTT_TYPE_PPGTT_PDP_ENTRY,
Zhi Wang054f4eb2017-10-10 17:19:30 +0800196 GTT_TYPE_PPGTT_PDP_PT,
Zhi Wang2707e442016-03-28 23:23:16 +0800197 GTT_TYPE_PPGTT_PDE_PT,
198 GTT_TYPE_PPGTT_PTE_1G_ENTRY),
199 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L3_ENTRY,
200 GTT_TYPE_PPGTT_ROOT_L3_ENTRY,
Zhi Wang054f4eb2017-10-10 17:19:30 +0800201 GTT_TYPE_INVALID,
Zhi Wang2707e442016-03-28 23:23:16 +0800202 GTT_TYPE_PPGTT_PDE_PT,
203 GTT_TYPE_PPGTT_PTE_1G_ENTRY),
204 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_ENTRY,
205 GTT_TYPE_PPGTT_PDP_ENTRY,
Zhi Wang054f4eb2017-10-10 17:19:30 +0800206 GTT_TYPE_PPGTT_PDP_PT,
Zhi Wang2707e442016-03-28 23:23:16 +0800207 GTT_TYPE_PPGTT_PDE_PT,
208 GTT_TYPE_PPGTT_PTE_1G_ENTRY),
209 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_PT,
210 GTT_TYPE_PPGTT_PDE_ENTRY,
Zhi Wang054f4eb2017-10-10 17:19:30 +0800211 GTT_TYPE_PPGTT_PDE_PT,
Zhi Wang2707e442016-03-28 23:23:16 +0800212 GTT_TYPE_PPGTT_PTE_PT,
213 GTT_TYPE_PPGTT_PTE_2M_ENTRY),
214 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_ENTRY,
215 GTT_TYPE_PPGTT_PDE_ENTRY,
Zhi Wang054f4eb2017-10-10 17:19:30 +0800216 GTT_TYPE_PPGTT_PDE_PT,
Zhi Wang2707e442016-03-28 23:23:16 +0800217 GTT_TYPE_PPGTT_PTE_PT,
218 GTT_TYPE_PPGTT_PTE_2M_ENTRY),
219 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_PT,
220 GTT_TYPE_PPGTT_PTE_4K_ENTRY,
Zhi Wang054f4eb2017-10-10 17:19:30 +0800221 GTT_TYPE_PPGTT_PTE_PT,
Zhi Wang2707e442016-03-28 23:23:16 +0800222 GTT_TYPE_INVALID,
223 GTT_TYPE_INVALID),
224 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_4K_ENTRY,
225 GTT_TYPE_PPGTT_PTE_4K_ENTRY,
Zhi Wang054f4eb2017-10-10 17:19:30 +0800226 GTT_TYPE_PPGTT_PTE_PT,
Zhi Wang2707e442016-03-28 23:23:16 +0800227 GTT_TYPE_INVALID,
228 GTT_TYPE_INVALID),
229 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_2M_ENTRY,
230 GTT_TYPE_PPGTT_PDE_ENTRY,
Zhi Wang054f4eb2017-10-10 17:19:30 +0800231 GTT_TYPE_PPGTT_PDE_PT,
Zhi Wang2707e442016-03-28 23:23:16 +0800232 GTT_TYPE_INVALID,
233 GTT_TYPE_PPGTT_PTE_2M_ENTRY),
234 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_1G_ENTRY,
235 GTT_TYPE_PPGTT_PDP_ENTRY,
Zhi Wang054f4eb2017-10-10 17:19:30 +0800236 GTT_TYPE_PPGTT_PDP_PT,
Zhi Wang2707e442016-03-28 23:23:16 +0800237 GTT_TYPE_INVALID,
238 GTT_TYPE_PPGTT_PTE_1G_ENTRY),
239 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_GGTT_PTE,
240 GTT_TYPE_GGTT_PTE,
241 GTT_TYPE_INVALID,
Zhi Wang054f4eb2017-10-10 17:19:30 +0800242 GTT_TYPE_INVALID,
Zhi Wang2707e442016-03-28 23:23:16 +0800243 GTT_TYPE_INVALID),
244};
245
246static inline int get_next_pt_type(int type)
247{
248 return gtt_type_table[type].next_pt_type;
249}
250
Zhi Wang054f4eb2017-10-10 17:19:30 +0800251static inline int get_pt_type(int type)
252{
253 return gtt_type_table[type].pt_type;
254}
255
Zhi Wang2707e442016-03-28 23:23:16 +0800256static inline int get_entry_type(int type)
257{
258 return gtt_type_table[type].entry_type;
259}
260
261static inline int get_pse_type(int type)
262{
263 return gtt_type_table[type].pse_entry_type;
264}
265
266static u64 read_pte64(struct drm_i915_private *dev_priv, unsigned long index)
267{
Du, Changbin321927d2016-10-20 14:08:46 +0800268 void __iomem *addr = (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + index;
Zhi Wang2707e442016-03-28 23:23:16 +0800269
Changbin Du905a5032016-12-30 14:10:53 +0800270 return readq(addr);
Zhi Wang2707e442016-03-28 23:23:16 +0800271}
272
Changbin Dua143cef2018-01-30 19:19:45 +0800273static void ggtt_invalidate(struct drm_i915_private *dev_priv)
Chuanxiao Dongaf2c6392017-06-02 15:34:24 +0800274{
275 mmio_hw_access_pre(dev_priv);
276 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
277 mmio_hw_access_post(dev_priv);
278}
279
Zhi Wang2707e442016-03-28 23:23:16 +0800280static void write_pte64(struct drm_i915_private *dev_priv,
281 unsigned long index, u64 pte)
282{
Du, Changbin321927d2016-10-20 14:08:46 +0800283 void __iomem *addr = (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + index;
Zhi Wang2707e442016-03-28 23:23:16 +0800284
Zhi Wang2707e442016-03-28 23:23:16 +0800285 writeq(pte, addr);
Zhi Wang2707e442016-03-28 23:23:16 +0800286}
287
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800288static inline int gtt_get_entry64(void *pt,
Zhi Wang2707e442016-03-28 23:23:16 +0800289 struct intel_gvt_gtt_entry *e,
290 unsigned long index, bool hypervisor_access, unsigned long gpa,
291 struct intel_vgpu *vgpu)
292{
293 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
294 int ret;
295
296 if (WARN_ON(info->gtt_entry_size != 8))
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800297 return -EINVAL;
Zhi Wang2707e442016-03-28 23:23:16 +0800298
299 if (hypervisor_access) {
300 ret = intel_gvt_hypervisor_read_gpa(vgpu, gpa +
301 (index << info->gtt_entry_size_shift),
302 &e->val64, 8);
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800303 if (WARN_ON(ret))
304 return ret;
Zhi Wang2707e442016-03-28 23:23:16 +0800305 } else if (!pt) {
306 e->val64 = read_pte64(vgpu->gvt->dev_priv, index);
307 } else {
308 e->val64 = *((u64 *)pt + index);
309 }
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800310 return 0;
Zhi Wang2707e442016-03-28 23:23:16 +0800311}
312
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800313static inline int gtt_set_entry64(void *pt,
Zhi Wang2707e442016-03-28 23:23:16 +0800314 struct intel_gvt_gtt_entry *e,
315 unsigned long index, bool hypervisor_access, unsigned long gpa,
316 struct intel_vgpu *vgpu)
317{
318 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
319 int ret;
320
321 if (WARN_ON(info->gtt_entry_size != 8))
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800322 return -EINVAL;
Zhi Wang2707e442016-03-28 23:23:16 +0800323
324 if (hypervisor_access) {
325 ret = intel_gvt_hypervisor_write_gpa(vgpu, gpa +
326 (index << info->gtt_entry_size_shift),
327 &e->val64, 8);
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800328 if (WARN_ON(ret))
329 return ret;
Zhi Wang2707e442016-03-28 23:23:16 +0800330 } else if (!pt) {
331 write_pte64(vgpu->gvt->dev_priv, index, e->val64);
332 } else {
333 *((u64 *)pt + index) = e->val64;
334 }
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800335 return 0;
Zhi Wang2707e442016-03-28 23:23:16 +0800336}
337
338#define GTT_HAW 46
339
Xiong Zhangb721b652017-11-28 07:29:54 +0800340#define ADDR_1G_MASK (((1UL << (GTT_HAW - 30)) - 1) << 30)
341#define ADDR_2M_MASK (((1UL << (GTT_HAW - 21)) - 1) << 21)
342#define ADDR_4K_MASK (((1UL << (GTT_HAW - 12)) - 1) << 12)
Zhi Wang2707e442016-03-28 23:23:16 +0800343
344static unsigned long gen8_gtt_get_pfn(struct intel_gvt_gtt_entry *e)
345{
346 unsigned long pfn;
347
348 if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY)
Changbin Dud861ca22018-01-30 19:19:47 +0800349 pfn = (e->val64 & ADDR_1G_MASK) >> PAGE_SHIFT;
Zhi Wang2707e442016-03-28 23:23:16 +0800350 else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY)
Changbin Dud861ca22018-01-30 19:19:47 +0800351 pfn = (e->val64 & ADDR_2M_MASK) >> PAGE_SHIFT;
Zhi Wang2707e442016-03-28 23:23:16 +0800352 else
Changbin Dud861ca22018-01-30 19:19:47 +0800353 pfn = (e->val64 & ADDR_4K_MASK) >> PAGE_SHIFT;
Zhi Wang2707e442016-03-28 23:23:16 +0800354 return pfn;
355}
356
357static void gen8_gtt_set_pfn(struct intel_gvt_gtt_entry *e, unsigned long pfn)
358{
359 if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) {
360 e->val64 &= ~ADDR_1G_MASK;
Changbin Dud861ca22018-01-30 19:19:47 +0800361 pfn &= (ADDR_1G_MASK >> PAGE_SHIFT);
Zhi Wang2707e442016-03-28 23:23:16 +0800362 } else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY) {
363 e->val64 &= ~ADDR_2M_MASK;
Changbin Dud861ca22018-01-30 19:19:47 +0800364 pfn &= (ADDR_2M_MASK >> PAGE_SHIFT);
Zhi Wang2707e442016-03-28 23:23:16 +0800365 } else {
366 e->val64 &= ~ADDR_4K_MASK;
Changbin Dud861ca22018-01-30 19:19:47 +0800367 pfn &= (ADDR_4K_MASK >> PAGE_SHIFT);
Zhi Wang2707e442016-03-28 23:23:16 +0800368 }
369
Changbin Dud861ca22018-01-30 19:19:47 +0800370 e->val64 |= (pfn << PAGE_SHIFT);
Zhi Wang2707e442016-03-28 23:23:16 +0800371}
372
373static bool gen8_gtt_test_pse(struct intel_gvt_gtt_entry *e)
374{
375 /* Entry doesn't have PSE bit. */
376 if (get_pse_type(e->type) == GTT_TYPE_INVALID)
377 return false;
378
379 e->type = get_entry_type(e->type);
Changbin Dud861ca22018-01-30 19:19:47 +0800380 if (!(e->val64 & _PAGE_PSE))
Zhi Wang2707e442016-03-28 23:23:16 +0800381 return false;
382
383 e->type = get_pse_type(e->type);
384 return true;
385}
386
387static bool gen8_gtt_test_present(struct intel_gvt_gtt_entry *e)
388{
389 /*
390 * i915 writes PDP root pointer registers without present bit,
391 * it also works, so we need to treat root pointer entry
392 * specifically.
393 */
394 if (e->type == GTT_TYPE_PPGTT_ROOT_L3_ENTRY
395 || e->type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY)
396 return (e->val64 != 0);
397 else
Changbin Dud861ca22018-01-30 19:19:47 +0800398 return (e->val64 & _PAGE_PRESENT);
Zhi Wang2707e442016-03-28 23:23:16 +0800399}
400
401static void gtt_entry_clear_present(struct intel_gvt_gtt_entry *e)
402{
Changbin Dud861ca22018-01-30 19:19:47 +0800403 e->val64 &= ~_PAGE_PRESENT;
Zhi Wang2707e442016-03-28 23:23:16 +0800404}
405
Zhi Wang655c64e2017-10-10 17:24:26 +0800406static void gtt_entry_set_present(struct intel_gvt_gtt_entry *e)
407{
Changbin Dud861ca22018-01-30 19:19:47 +0800408 e->val64 |= _PAGE_PRESENT;
Zhi Wang2707e442016-03-28 23:23:16 +0800409}
410
411/*
412 * Per-platform GMA routines.
413 */
414static unsigned long gma_to_ggtt_pte_index(unsigned long gma)
415{
Zhi Wang9556e112017-10-10 13:51:32 +0800416 unsigned long x = (gma >> I915_GTT_PAGE_SHIFT);
Zhi Wang2707e442016-03-28 23:23:16 +0800417
418 trace_gma_index(__func__, gma, x);
419 return x;
420}
421
422#define DEFINE_PPGTT_GMA_TO_INDEX(prefix, ename, exp) \
423static unsigned long prefix##_gma_to_##ename##_index(unsigned long gma) \
424{ \
425 unsigned long x = (exp); \
426 trace_gma_index(__func__, gma, x); \
427 return x; \
428}
429
430DEFINE_PPGTT_GMA_TO_INDEX(gen8, pte, (gma >> 12 & 0x1ff));
431DEFINE_PPGTT_GMA_TO_INDEX(gen8, pde, (gma >> 21 & 0x1ff));
432DEFINE_PPGTT_GMA_TO_INDEX(gen8, l3_pdp, (gma >> 30 & 0x3));
433DEFINE_PPGTT_GMA_TO_INDEX(gen8, l4_pdp, (gma >> 30 & 0x1ff));
434DEFINE_PPGTT_GMA_TO_INDEX(gen8, pml4, (gma >> 39 & 0x1ff));
435
436static struct intel_gvt_gtt_pte_ops gen8_gtt_pte_ops = {
437 .get_entry = gtt_get_entry64,
438 .set_entry = gtt_set_entry64,
439 .clear_present = gtt_entry_clear_present,
Zhi Wang655c64e2017-10-10 17:24:26 +0800440 .set_present = gtt_entry_set_present,
Zhi Wang2707e442016-03-28 23:23:16 +0800441 .test_present = gen8_gtt_test_present,
442 .test_pse = gen8_gtt_test_pse,
443 .get_pfn = gen8_gtt_get_pfn,
444 .set_pfn = gen8_gtt_set_pfn,
445};
446
447static struct intel_gvt_gtt_gma_ops gen8_gtt_gma_ops = {
448 .gma_to_ggtt_pte_index = gma_to_ggtt_pte_index,
449 .gma_to_pte_index = gen8_gma_to_pte_index,
450 .gma_to_pde_index = gen8_gma_to_pde_index,
451 .gma_to_l3_pdp_index = gen8_gma_to_l3_pdp_index,
452 .gma_to_l4_pdp_index = gen8_gma_to_l4_pdp_index,
453 .gma_to_pml4_index = gen8_gma_to_pml4_index,
454};
455
Zhi Wang2707e442016-03-28 23:23:16 +0800456/*
457 * MM helpers.
458 */
Changbin Du3aff3512018-01-30 19:19:42 +0800459static void _ppgtt_get_root_entry(struct intel_vgpu_mm *mm,
460 struct intel_gvt_gtt_entry *entry, unsigned long index,
461 bool guest)
Zhi Wang2707e442016-03-28 23:23:16 +0800462{
Changbin Du3aff3512018-01-30 19:19:42 +0800463 struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
Zhi Wang2707e442016-03-28 23:23:16 +0800464
Changbin Du3aff3512018-01-30 19:19:42 +0800465 GEM_BUG_ON(mm->type != INTEL_GVT_MM_PPGTT);
Zhi Wang2707e442016-03-28 23:23:16 +0800466
Changbin Du3aff3512018-01-30 19:19:42 +0800467 entry->type = mm->ppgtt_mm.root_entry_type;
468 pte_ops->get_entry(guest ? mm->ppgtt_mm.guest_pdps :
469 mm->ppgtt_mm.shadow_pdps,
470 entry, index, false, 0, mm->vgpu);
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800471
Changbin Du3aff3512018-01-30 19:19:42 +0800472 pte_ops->test_pse(entry);
Zhi Wang2707e442016-03-28 23:23:16 +0800473}
474
Changbin Du3aff3512018-01-30 19:19:42 +0800475static inline void ppgtt_get_guest_root_entry(struct intel_vgpu_mm *mm,
476 struct intel_gvt_gtt_entry *entry, unsigned long index)
Zhi Wang2707e442016-03-28 23:23:16 +0800477{
Changbin Du3aff3512018-01-30 19:19:42 +0800478 _ppgtt_get_root_entry(mm, entry, index, true);
479}
Zhi Wang2707e442016-03-28 23:23:16 +0800480
Changbin Du3aff3512018-01-30 19:19:42 +0800481static inline void ppgtt_get_shadow_root_entry(struct intel_vgpu_mm *mm,
482 struct intel_gvt_gtt_entry *entry, unsigned long index)
483{
484 _ppgtt_get_root_entry(mm, entry, index, false);
485}
486
487static void _ppgtt_set_root_entry(struct intel_vgpu_mm *mm,
488 struct intel_gvt_gtt_entry *entry, unsigned long index,
489 bool guest)
490{
491 struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
492
493 pte_ops->set_entry(guest ? mm->ppgtt_mm.guest_pdps :
494 mm->ppgtt_mm.shadow_pdps,
495 entry, index, false, 0, mm->vgpu);
496}
497
498static inline void ppgtt_set_guest_root_entry(struct intel_vgpu_mm *mm,
499 struct intel_gvt_gtt_entry *entry, unsigned long index)
500{
501 _ppgtt_set_root_entry(mm, entry, index, true);
502}
503
504static inline void ppgtt_set_shadow_root_entry(struct intel_vgpu_mm *mm,
505 struct intel_gvt_gtt_entry *entry, unsigned long index)
506{
507 _ppgtt_set_root_entry(mm, entry, index, false);
508}
509
510static void ggtt_get_guest_entry(struct intel_vgpu_mm *mm,
511 struct intel_gvt_gtt_entry *entry, unsigned long index)
512{
513 struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
514
515 GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
516
517 entry->type = GTT_TYPE_GGTT_PTE;
518 pte_ops->get_entry(mm->ggtt_mm.virtual_ggtt, entry, index,
519 false, 0, mm->vgpu);
520}
521
522static void ggtt_set_guest_entry(struct intel_vgpu_mm *mm,
523 struct intel_gvt_gtt_entry *entry, unsigned long index)
524{
525 struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
526
527 GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
528
529 pte_ops->set_entry(mm->ggtt_mm.virtual_ggtt, entry, index,
530 false, 0, mm->vgpu);
531}
532
533static void ggtt_set_host_entry(struct intel_vgpu_mm *mm,
534 struct intel_gvt_gtt_entry *entry, unsigned long index)
535{
536 struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
537
538 GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
539
540 pte_ops->set_entry(NULL, entry, index, false, 0, mm->vgpu);
Zhi Wang2707e442016-03-28 23:23:16 +0800541}
542
543/*
544 * PPGTT shadow page table helpers.
545 */
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800546static inline int ppgtt_spt_get_entry(
Zhi Wang2707e442016-03-28 23:23:16 +0800547 struct intel_vgpu_ppgtt_spt *spt,
548 void *page_table, int type,
549 struct intel_gvt_gtt_entry *e, unsigned long index,
550 bool guest)
551{
552 struct intel_gvt *gvt = spt->vgpu->gvt;
553 struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800554 int ret;
Zhi Wang2707e442016-03-28 23:23:16 +0800555
556 e->type = get_entry_type(type);
557
558 if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n"))
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800559 return -EINVAL;
Zhi Wang2707e442016-03-28 23:23:16 +0800560
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800561 ret = ops->get_entry(page_table, e, index, guest,
Changbin Due502a2a2018-01-30 19:19:53 +0800562 spt->guest_page.gfn << I915_GTT_PAGE_SHIFT,
Zhi Wang2707e442016-03-28 23:23:16 +0800563 spt->vgpu);
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800564 if (ret)
565 return ret;
566
Zhi Wang2707e442016-03-28 23:23:16 +0800567 ops->test_pse(e);
Changbin Dubc37ab52018-01-30 19:19:44 +0800568
569 gvt_vdbg_mm("read ppgtt entry, spt type %d, entry type %d, index %lu, value %llx\n",
570 type, e->type, index, e->val64);
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800571 return 0;
Zhi Wang2707e442016-03-28 23:23:16 +0800572}
573
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800574static inline int ppgtt_spt_set_entry(
Zhi Wang2707e442016-03-28 23:23:16 +0800575 struct intel_vgpu_ppgtt_spt *spt,
576 void *page_table, int type,
577 struct intel_gvt_gtt_entry *e, unsigned long index,
578 bool guest)
579{
580 struct intel_gvt *gvt = spt->vgpu->gvt;
581 struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
582
583 if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n"))
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800584 return -EINVAL;
Zhi Wang2707e442016-03-28 23:23:16 +0800585
Changbin Dubc37ab52018-01-30 19:19:44 +0800586 gvt_vdbg_mm("set ppgtt entry, spt type %d, entry type %d, index %lu, value %llx\n",
587 type, e->type, index, e->val64);
588
Zhi Wang2707e442016-03-28 23:23:16 +0800589 return ops->set_entry(page_table, e, index, guest,
Changbin Due502a2a2018-01-30 19:19:53 +0800590 spt->guest_page.gfn << I915_GTT_PAGE_SHIFT,
Zhi Wang2707e442016-03-28 23:23:16 +0800591 spt->vgpu);
592}
593
594#define ppgtt_get_guest_entry(spt, e, index) \
595 ppgtt_spt_get_entry(spt, NULL, \
Changbin Du44b46732018-01-30 19:19:49 +0800596 spt->guest_page.type, e, index, true)
Zhi Wang2707e442016-03-28 23:23:16 +0800597
598#define ppgtt_set_guest_entry(spt, e, index) \
599 ppgtt_spt_set_entry(spt, NULL, \
Changbin Du44b46732018-01-30 19:19:49 +0800600 spt->guest_page.type, e, index, true)
Zhi Wang2707e442016-03-28 23:23:16 +0800601
602#define ppgtt_get_shadow_entry(spt, e, index) \
603 ppgtt_spt_get_entry(spt, spt->shadow_page.vaddr, \
604 spt->shadow_page.type, e, index, false)
605
606#define ppgtt_set_shadow_entry(spt, e, index) \
607 ppgtt_spt_set_entry(spt, spt->shadow_page.vaddr, \
608 spt->shadow_page.type, e, index, false)
609
Changbin Du44b46732018-01-30 19:19:49 +0800610static void *alloc_spt(gfp_t gfp_mask)
Zhi Wang7d1e5cd2017-09-29 02:47:55 +0800611{
Changbin Du44b46732018-01-30 19:19:49 +0800612 struct intel_vgpu_ppgtt_spt *spt;
Zhi Wang7d1e5cd2017-09-29 02:47:55 +0800613
Changbin Du44b46732018-01-30 19:19:49 +0800614 spt = kzalloc(sizeof(*spt), gfp_mask);
615 if (!spt)
616 return NULL;
Zhi Wang7d1e5cd2017-09-29 02:47:55 +0800617
Changbin Du44b46732018-01-30 19:19:49 +0800618 spt->shadow_page.page = alloc_page(gfp_mask);
619 if (!spt->shadow_page.page) {
620 kfree(spt);
621 return NULL;
622 }
623 return spt;
Zhi Wang7d1e5cd2017-09-29 02:47:55 +0800624}
625
Changbin Du44b46732018-01-30 19:19:49 +0800626static void free_spt(struct intel_vgpu_ppgtt_spt *spt)
Zhi Wang7d1e5cd2017-09-29 02:47:55 +0800627{
Changbin Du44b46732018-01-30 19:19:49 +0800628 __free_page(spt->shadow_page.page);
629 kfree(spt);
Zhi Wang7d1e5cd2017-09-29 02:47:55 +0800630}
631
Zhi Wang2707e442016-03-28 23:23:16 +0800632static int detach_oos_page(struct intel_vgpu *vgpu,
633 struct intel_vgpu_oos_page *oos_page);
634
Changbin Dud87f5ff2018-01-30 19:19:50 +0800635static void ppgtt_free_spt(struct intel_vgpu_ppgtt_spt *spt)
Zhi Wang2707e442016-03-28 23:23:16 +0800636{
Changbin Du44b46732018-01-30 19:19:49 +0800637 struct device *kdev = &spt->vgpu->gvt->dev_priv->drm.pdev->dev;
Zhi Wang2707e442016-03-28 23:23:16 +0800638
Changbin Du44b46732018-01-30 19:19:49 +0800639 trace_spt_free(spt->vgpu->id, spt, spt->guest_page.type);
640
641 dma_unmap_page(kdev, spt->shadow_page.mfn << I915_GTT_PAGE_SHIFT, 4096,
642 PCI_DMA_BIDIRECTIONAL);
643 if (!hlist_unhashed(&spt->node))
644 hash_del(&spt->node);
645
646 if (spt->guest_page.oos_page)
647 detach_oos_page(spt->vgpu, spt->guest_page.oos_page);
648
Changbin Due502a2a2018-01-30 19:19:53 +0800649 intel_vgpu_unregister_page_track(spt->vgpu, spt->guest_page.gfn);
Changbin Du44b46732018-01-30 19:19:49 +0800650
Zhi Wang2707e442016-03-28 23:23:16 +0800651 list_del_init(&spt->post_shadow_list);
Zhi Wang2707e442016-03-28 23:23:16 +0800652 free_spt(spt);
653}
654
Changbin Dud87f5ff2018-01-30 19:19:50 +0800655static void ppgtt_free_all_spt(struct intel_vgpu *vgpu)
Zhi Wang2707e442016-03-28 23:23:16 +0800656{
657 struct hlist_node *n;
Changbin Du44b46732018-01-30 19:19:49 +0800658 struct intel_vgpu_ppgtt_spt *spt;
Zhi Wang2707e442016-03-28 23:23:16 +0800659 int i;
660
Changbin Dud87f5ff2018-01-30 19:19:50 +0800661 hash_for_each_safe(vgpu->gtt.spt_hash_table, i, n, spt, node)
662 ppgtt_free_spt(spt);
Zhi Wang2707e442016-03-28 23:23:16 +0800663}
664
Zhi Wang7d1e5cd2017-09-29 02:47:55 +0800665static int ppgtt_handle_guest_write_page_table_bytes(
Changbin Du44b46732018-01-30 19:19:49 +0800666 struct intel_vgpu_ppgtt_spt *spt,
Zhi Wang2707e442016-03-28 23:23:16 +0800667 u64 pa, void *p_data, int bytes);
668
Changbin Due502a2a2018-01-30 19:19:53 +0800669static int ppgtt_write_protection_handler(
670 struct intel_vgpu_page_track *page_track,
671 u64 gpa, void *data, int bytes)
Zhi Wang2707e442016-03-28 23:23:16 +0800672{
Changbin Due502a2a2018-01-30 19:19:53 +0800673 struct intel_vgpu_ppgtt_spt *spt = page_track->priv_data;
674
Zhi Wang2707e442016-03-28 23:23:16 +0800675 int ret;
676
677 if (bytes != 4 && bytes != 8)
678 return -EINVAL;
679
Changbin Due502a2a2018-01-30 19:19:53 +0800680 ret = ppgtt_handle_guest_write_page_table_bytes(spt, gpa, data, bytes);
Zhi Wang2707e442016-03-28 23:23:16 +0800681 if (ret)
682 return ret;
683 return ret;
684}
685
Changbin Du44b46732018-01-30 19:19:49 +0800686/* Find a spt by guest gfn. */
687static struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_gfn(
688 struct intel_vgpu *vgpu, unsigned long gfn)
689{
690 struct intel_vgpu_page_track *track;
691
Changbin Due502a2a2018-01-30 19:19:53 +0800692 track = intel_vgpu_find_page_track(vgpu, gfn);
693 if (track && track->handler == ppgtt_write_protection_handler)
694 return track->priv_data;
Changbin Du44b46732018-01-30 19:19:49 +0800695
696 return NULL;
697}
698
699/* Find the spt by shadow page mfn. */
700static struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_mfn(
701 struct intel_vgpu *vgpu, unsigned long mfn)
702{
703 struct intel_vgpu_ppgtt_spt *spt;
704
Changbin Dud87f5ff2018-01-30 19:19:50 +0800705 hash_for_each_possible(vgpu->gtt.spt_hash_table, spt, node, mfn) {
Changbin Du44b46732018-01-30 19:19:49 +0800706 if (spt->shadow_page.mfn == mfn)
707 return spt;
708 }
709 return NULL;
710}
711
Changbin Duede9d0c2018-01-30 19:19:40 +0800712static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt);
Zhi Wang2707e442016-03-28 23:23:16 +0800713
Changbin Dud87f5ff2018-01-30 19:19:50 +0800714static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt(
Zhi Wang2707e442016-03-28 23:23:16 +0800715 struct intel_vgpu *vgpu, int type, unsigned long gfn)
716{
Changbin Du44b46732018-01-30 19:19:49 +0800717 struct device *kdev = &vgpu->gvt->dev_priv->drm.pdev->dev;
Zhi Wang2707e442016-03-28 23:23:16 +0800718 struct intel_vgpu_ppgtt_spt *spt = NULL;
Changbin Du44b46732018-01-30 19:19:49 +0800719 dma_addr_t daddr;
Changbin Due502a2a2018-01-30 19:19:53 +0800720 int ret;
Zhi Wang2707e442016-03-28 23:23:16 +0800721
722retry:
723 spt = alloc_spt(GFP_KERNEL | __GFP_ZERO);
724 if (!spt) {
Changbin Duede9d0c2018-01-30 19:19:40 +0800725 if (reclaim_one_ppgtt_mm(vgpu->gvt))
Zhi Wang2707e442016-03-28 23:23:16 +0800726 goto retry;
727
Tina Zhang695fbc02017-03-10 04:26:53 -0500728 gvt_vgpu_err("fail to allocate ppgtt shadow page\n");
Zhi Wang2707e442016-03-28 23:23:16 +0800729 return ERR_PTR(-ENOMEM);
730 }
731
732 spt->vgpu = vgpu;
Zhi Wang2707e442016-03-28 23:23:16 +0800733 atomic_set(&spt->refcount, 1);
734 INIT_LIST_HEAD(&spt->post_shadow_list);
735
736 /*
Changbin Du44b46732018-01-30 19:19:49 +0800737 * Init shadow_page.
Zhi Wang2707e442016-03-28 23:23:16 +0800738 */
Changbin Du44b46732018-01-30 19:19:49 +0800739 spt->shadow_page.type = type;
740 daddr = dma_map_page(kdev, spt->shadow_page.page,
741 0, 4096, PCI_DMA_BIDIRECTIONAL);
742 if (dma_mapping_error(kdev, daddr)) {
743 gvt_vgpu_err("fail to map dma addr\n");
744 free_spt(spt);
745 return ERR_PTR(-EINVAL);
Zhi Wang2707e442016-03-28 23:23:16 +0800746 }
Changbin Du44b46732018-01-30 19:19:49 +0800747 spt->shadow_page.vaddr = page_address(spt->shadow_page.page);
748 spt->shadow_page.mfn = daddr >> I915_GTT_PAGE_SHIFT;
Zhi Wang2707e442016-03-28 23:23:16 +0800749
Changbin Du44b46732018-01-30 19:19:49 +0800750 /*
751 * Init guest_page.
752 */
753 spt->guest_page.type = type;
754 spt->guest_page.gfn = gfn;
755
Changbin Due502a2a2018-01-30 19:19:53 +0800756 ret = intel_vgpu_register_page_track(vgpu, spt->guest_page.gfn,
757 ppgtt_write_protection_handler, spt);
758 if (ret) {
759 free_spt(spt);
760 dma_unmap_page(kdev, daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
761 return ERR_PTR(ret);
762 }
Changbin Du44b46732018-01-30 19:19:49 +0800763
764 INIT_HLIST_NODE(&spt->node);
Changbin Dud87f5ff2018-01-30 19:19:50 +0800765 hash_add(vgpu->gtt.spt_hash_table, &spt->node, spt->shadow_page.mfn);
Zhi Wang2707e442016-03-28 23:23:16 +0800766
767 trace_spt_alloc(vgpu->id, spt, type, spt->shadow_page.mfn, gfn);
768 return spt;
Zhi Wang2707e442016-03-28 23:23:16 +0800769}
770
771#define pt_entry_size_shift(spt) \
772 ((spt)->vgpu->gvt->device_info.gtt_entry_size_shift)
773
774#define pt_entries(spt) \
Zhi Wang9556e112017-10-10 13:51:32 +0800775 (I915_GTT_PAGE_SIZE >> pt_entry_size_shift(spt))
Zhi Wang2707e442016-03-28 23:23:16 +0800776
777#define for_each_present_guest_entry(spt, e, i) \
778 for (i = 0; i < pt_entries(spt); i++) \
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800779 if (!ppgtt_get_guest_entry(spt, e, i) && \
780 spt->vgpu->gvt->gtt.pte_ops->test_present(e))
Zhi Wang2707e442016-03-28 23:23:16 +0800781
782#define for_each_present_shadow_entry(spt, e, i) \
783 for (i = 0; i < pt_entries(spt); i++) \
Changbin Du4b2dbbc2017-08-02 15:06:37 +0800784 if (!ppgtt_get_shadow_entry(spt, e, i) && \
785 spt->vgpu->gvt->gtt.pte_ops->test_present(e))
Zhi Wang2707e442016-03-28 23:23:16 +0800786
Changbin Dud87f5ff2018-01-30 19:19:50 +0800787static void ppgtt_get_spt(struct intel_vgpu_ppgtt_spt *spt)
Zhi Wang2707e442016-03-28 23:23:16 +0800788{
789 int v = atomic_read(&spt->refcount);
790
791 trace_spt_refcount(spt->vgpu->id, "inc", spt, v, (v + 1));
792
793 atomic_inc(&spt->refcount);
794}
795
Changbin Dud87f5ff2018-01-30 19:19:50 +0800796static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt);
Zhi Wang2707e442016-03-28 23:23:16 +0800797
Changbin Dud87f5ff2018-01-30 19:19:50 +0800798static int ppgtt_invalidate_spt_by_shadow_entry(struct intel_vgpu *vgpu,
Zhi Wang2707e442016-03-28 23:23:16 +0800799 struct intel_gvt_gtt_entry *e)
800{
801 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
802 struct intel_vgpu_ppgtt_spt *s;
Ping Gao3b6411c2016-11-04 13:47:35 +0800803 intel_gvt_gtt_type_t cur_pt_type;
Zhi Wang2707e442016-03-28 23:23:16 +0800804
Changbin Du72f03d72018-01-30 19:19:48 +0800805 GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(e->type)));
Zhi Wang2707e442016-03-28 23:23:16 +0800806
Ping Gao3b6411c2016-11-04 13:47:35 +0800807 if (e->type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY
808 && e->type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY) {
809 cur_pt_type = get_next_pt_type(e->type) + 1;
810 if (ops->get_pfn(e) ==
811 vgpu->gtt.scratch_pt[cur_pt_type].page_mfn)
812 return 0;
813 }
Changbin Du44b46732018-01-30 19:19:49 +0800814 s = intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(e));
Zhi Wang2707e442016-03-28 23:23:16 +0800815 if (!s) {
Tina Zhang695fbc02017-03-10 04:26:53 -0500816 gvt_vgpu_err("fail to find shadow page: mfn: 0x%lx\n",
817 ops->get_pfn(e));
Zhi Wang2707e442016-03-28 23:23:16 +0800818 return -ENXIO;
819 }
Changbin Dud87f5ff2018-01-30 19:19:50 +0800820 return ppgtt_invalidate_spt(s);
Zhi Wang2707e442016-03-28 23:23:16 +0800821}
822
Changbin Dud87f5ff2018-01-30 19:19:50 +0800823static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt)
Zhi Wang2707e442016-03-28 23:23:16 +0800824{
Tina Zhang695fbc02017-03-10 04:26:53 -0500825 struct intel_vgpu *vgpu = spt->vgpu;
Zhi Wang2707e442016-03-28 23:23:16 +0800826 struct intel_gvt_gtt_entry e;
827 unsigned long index;
828 int ret;
829 int v = atomic_read(&spt->refcount);
830
831 trace_spt_change(spt->vgpu->id, "die", spt,
Changbin Du44b46732018-01-30 19:19:49 +0800832 spt->guest_page.gfn, spt->shadow_page.type);
Zhi Wang2707e442016-03-28 23:23:16 +0800833
834 trace_spt_refcount(spt->vgpu->id, "dec", spt, v, (v - 1));
835
836 if (atomic_dec_return(&spt->refcount) > 0)
837 return 0;
838
839 if (gtt_type_is_pte_pt(spt->shadow_page.type))
840 goto release;
841
842 for_each_present_shadow_entry(spt, &e, index) {
Changbin Du72f03d72018-01-30 19:19:48 +0800843 switch (e.type) {
844 case GTT_TYPE_PPGTT_PTE_4K_ENTRY:
845 gvt_vdbg_mm("invalidate 4K entry\n");
846 continue;
847 case GTT_TYPE_PPGTT_PTE_2M_ENTRY:
848 case GTT_TYPE_PPGTT_PTE_1G_ENTRY:
849 WARN(1, "GVT doesn't support 2M/1GB page\n");
850 continue;
851 case GTT_TYPE_PPGTT_PML4_ENTRY:
852 case GTT_TYPE_PPGTT_PDP_ENTRY:
853 case GTT_TYPE_PPGTT_PDE_ENTRY:
854 gvt_vdbg_mm("invalidate PMUL4/PDP/PDE entry\n");
Changbin Dud87f5ff2018-01-30 19:19:50 +0800855 ret = ppgtt_invalidate_spt_by_shadow_entry(
Changbin Du72f03d72018-01-30 19:19:48 +0800856 spt->vgpu, &e);
857 if (ret)
858 goto fail;
859 break;
860 default:
861 GEM_BUG_ON(1);
Zhi Wang2707e442016-03-28 23:23:16 +0800862 }
Zhi Wang2707e442016-03-28 23:23:16 +0800863 }
864release:
865 trace_spt_change(spt->vgpu->id, "release", spt,
Changbin Du44b46732018-01-30 19:19:49 +0800866 spt->guest_page.gfn, spt->shadow_page.type);
Changbin Dud87f5ff2018-01-30 19:19:50 +0800867 ppgtt_free_spt(spt);
Zhi Wang2707e442016-03-28 23:23:16 +0800868 return 0;
869fail:
Tina Zhang695fbc02017-03-10 04:26:53 -0500870 gvt_vgpu_err("fail: shadow page %p shadow entry 0x%llx type %d\n",
871 spt, e.val64, e.type);
Zhi Wang2707e442016-03-28 23:23:16 +0800872 return ret;
873}
874
Changbin Dud87f5ff2018-01-30 19:19:50 +0800875static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt);
Zhi Wang2707e442016-03-28 23:23:16 +0800876
Changbin Dud87f5ff2018-01-30 19:19:50 +0800877static struct intel_vgpu_ppgtt_spt *ppgtt_populate_spt_by_guest_entry(
Zhi Wang2707e442016-03-28 23:23:16 +0800878 struct intel_vgpu *vgpu, struct intel_gvt_gtt_entry *we)
879{
880 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
Changbin Du44b46732018-01-30 19:19:49 +0800881 struct intel_vgpu_ppgtt_spt *spt = NULL;
Zhi Wang2707e442016-03-28 23:23:16 +0800882 int ret;
883
Changbin Du72f03d72018-01-30 19:19:48 +0800884 GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(we->type)));
Zhi Wang2707e442016-03-28 23:23:16 +0800885
Changbin Du44b46732018-01-30 19:19:49 +0800886 spt = intel_vgpu_find_spt_by_gfn(vgpu, ops->get_pfn(we));
887 if (spt)
Changbin Dud87f5ff2018-01-30 19:19:50 +0800888 ppgtt_get_spt(spt);
Changbin Du44b46732018-01-30 19:19:49 +0800889 else {
Zhi Wang2707e442016-03-28 23:23:16 +0800890 int type = get_next_pt_type(we->type);
891
Changbin Dud87f5ff2018-01-30 19:19:50 +0800892 spt = ppgtt_alloc_spt(vgpu, type, ops->get_pfn(we));
Changbin Du44b46732018-01-30 19:19:49 +0800893 if (IS_ERR(spt)) {
894 ret = PTR_ERR(spt);
Zhi Wang2707e442016-03-28 23:23:16 +0800895 goto fail;
896 }
897
Changbin Due502a2a2018-01-30 19:19:53 +0800898 ret = intel_vgpu_enable_page_track(vgpu, spt->guest_page.gfn);
Zhi Wang2707e442016-03-28 23:23:16 +0800899 if (ret)
900 goto fail;
901
Changbin Dud87f5ff2018-01-30 19:19:50 +0800902 ret = ppgtt_populate_spt(spt);
Zhi Wang2707e442016-03-28 23:23:16 +0800903 if (ret)
904 goto fail;
905
Changbin Du44b46732018-01-30 19:19:49 +0800906 trace_spt_change(vgpu->id, "new", spt, spt->guest_page.gfn,
907 spt->shadow_page.type);
Zhi Wang2707e442016-03-28 23:23:16 +0800908 }
Changbin Du44b46732018-01-30 19:19:49 +0800909 return spt;
Zhi Wang2707e442016-03-28 23:23:16 +0800910fail:
Tina Zhang695fbc02017-03-10 04:26:53 -0500911 gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
Changbin Du44b46732018-01-30 19:19:49 +0800912 spt, we->val64, we->type);
Zhi Wang2707e442016-03-28 23:23:16 +0800913 return ERR_PTR(ret);
914}
915
916static inline void ppgtt_generate_shadow_entry(struct intel_gvt_gtt_entry *se,
917 struct intel_vgpu_ppgtt_spt *s, struct intel_gvt_gtt_entry *ge)
918{
919 struct intel_gvt_gtt_pte_ops *ops = s->vgpu->gvt->gtt.pte_ops;
920
921 se->type = ge->type;
922 se->val64 = ge->val64;
923
924 ops->set_pfn(se, s->shadow_page.mfn);
925}
926
Changbin Du72f03d72018-01-30 19:19:48 +0800927static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu,
928 struct intel_vgpu_ppgtt_spt *spt, unsigned long index,
929 struct intel_gvt_gtt_entry *ge)
930{
931 struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
932 struct intel_gvt_gtt_entry se = *ge;
933 unsigned long gfn, mfn;
934
935 if (!pte_ops->test_present(ge))
936 return 0;
937
938 gfn = pte_ops->get_pfn(ge);
939
940 switch (ge->type) {
941 case GTT_TYPE_PPGTT_PTE_4K_ENTRY:
942 gvt_vdbg_mm("shadow 4K gtt entry\n");
943 break;
944 case GTT_TYPE_PPGTT_PTE_2M_ENTRY:
945 case GTT_TYPE_PPGTT_PTE_1G_ENTRY:
946 gvt_vgpu_err("GVT doesn't support 2M/1GB entry\n");
947 return -EINVAL;
948 default:
949 GEM_BUG_ON(1);
950 };
951
952 /* direct shadow */
953 mfn = intel_gvt_hypervisor_gfn_to_mfn(vgpu, gfn);
954 if (mfn == INTEL_GVT_INVALID_ADDR)
955 return -ENXIO;
956
957 pte_ops->set_pfn(&se, mfn);
958 ppgtt_set_shadow_entry(spt, &se, index);
959 return 0;
960}
961
Changbin Dud87f5ff2018-01-30 19:19:50 +0800962static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt)
Zhi Wang2707e442016-03-28 23:23:16 +0800963{
964 struct intel_vgpu *vgpu = spt->vgpu;
Hang Yuancc753fb2017-12-22 18:06:31 +0800965 struct intel_gvt *gvt = vgpu->gvt;
966 struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
Zhi Wang2707e442016-03-28 23:23:16 +0800967 struct intel_vgpu_ppgtt_spt *s;
968 struct intel_gvt_gtt_entry se, ge;
Hang Yuancc753fb2017-12-22 18:06:31 +0800969 unsigned long gfn, i;
Zhi Wang2707e442016-03-28 23:23:16 +0800970 int ret;
971
972 trace_spt_change(spt->vgpu->id, "born", spt,
Changbin Due502a2a2018-01-30 19:19:53 +0800973 spt->guest_page.gfn, spt->shadow_page.type);
Zhi Wang2707e442016-03-28 23:23:16 +0800974
Zhi Wang2707e442016-03-28 23:23:16 +0800975 for_each_present_guest_entry(spt, &ge, i) {
Changbin Du72f03d72018-01-30 19:19:48 +0800976 if (gtt_type_is_pt(get_next_pt_type(ge.type))) {
Changbin Dud87f5ff2018-01-30 19:19:50 +0800977 s = ppgtt_populate_spt_by_guest_entry(vgpu, &ge);
Changbin Du72f03d72018-01-30 19:19:48 +0800978 if (IS_ERR(s)) {
979 ret = PTR_ERR(s);
980 goto fail;
981 }
982 ppgtt_get_shadow_entry(spt, &se, i);
983 ppgtt_generate_shadow_entry(&se, s, &ge);
984 ppgtt_set_shadow_entry(spt, &se, i);
985 } else {
986 gfn = ops->get_pfn(&ge);
987 if (!intel_gvt_hypervisor_is_valid_gfn(vgpu, gfn)) {
988 ops->set_pfn(&se, gvt->gtt.scratch_mfn);
989 ppgtt_set_shadow_entry(spt, &se, i);
990 continue;
991 }
Zhi Wang2707e442016-03-28 23:23:16 +0800992
Changbin Du72f03d72018-01-30 19:19:48 +0800993 ret = ppgtt_populate_shadow_entry(vgpu, spt, i, &ge);
994 if (ret)
995 goto fail;
Zhi Wang2707e442016-03-28 23:23:16 +0800996 }
Zhi Wang2707e442016-03-28 23:23:16 +0800997 }
998 return 0;
999fail:
Tina Zhang695fbc02017-03-10 04:26:53 -05001000 gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
1001 spt, ge.val64, ge.type);
Zhi Wang2707e442016-03-28 23:23:16 +08001002 return ret;
1003}
1004
Changbin Du44b46732018-01-30 19:19:49 +08001005static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_ppgtt_spt *spt,
Tina Zhang6b3816d2017-08-14 15:24:14 +08001006 struct intel_gvt_gtt_entry *se, unsigned long index)
Zhi Wang2707e442016-03-28 23:23:16 +08001007{
Zhi Wang2707e442016-03-28 23:23:16 +08001008 struct intel_vgpu *vgpu = spt->vgpu;
1009 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
Zhi Wang2707e442016-03-28 23:23:16 +08001010 int ret;
1011
Changbin Du44b46732018-01-30 19:19:49 +08001012 trace_spt_guest_change(spt->vgpu->id, "remove", spt,
1013 spt->shadow_page.type, se->val64, index);
Bing Niu9baf0922016-11-07 10:44:36 +08001014
Changbin Dubc37ab52018-01-30 19:19:44 +08001015 gvt_vdbg_mm("destroy old shadow entry, type %d, index %lu, value %llx\n",
1016 se->type, index, se->val64);
1017
Tina Zhang6b3816d2017-08-14 15:24:14 +08001018 if (!ops->test_present(se))
Zhi Wang2707e442016-03-28 23:23:16 +08001019 return 0;
1020
Changbin Du44b46732018-01-30 19:19:49 +08001021 if (ops->get_pfn(se) ==
1022 vgpu->gtt.scratch_pt[spt->shadow_page.type].page_mfn)
Zhi Wang2707e442016-03-28 23:23:16 +08001023 return 0;
1024
Tina Zhang6b3816d2017-08-14 15:24:14 +08001025 if (gtt_type_is_pt(get_next_pt_type(se->type))) {
Bing Niu9baf0922016-11-07 10:44:36 +08001026 struct intel_vgpu_ppgtt_spt *s =
Changbin Du44b46732018-01-30 19:19:49 +08001027 intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(se));
Bing Niu9baf0922016-11-07 10:44:36 +08001028 if (!s) {
Tina Zhang695fbc02017-03-10 04:26:53 -05001029 gvt_vgpu_err("fail to find guest page\n");
Zhi Wang2707e442016-03-28 23:23:16 +08001030 ret = -ENXIO;
1031 goto fail;
1032 }
Changbin Dud87f5ff2018-01-30 19:19:50 +08001033 ret = ppgtt_invalidate_spt(s);
Zhi Wang2707e442016-03-28 23:23:16 +08001034 if (ret)
1035 goto fail;
1036 }
Zhi Wang2707e442016-03-28 23:23:16 +08001037 return 0;
1038fail:
Tina Zhang695fbc02017-03-10 04:26:53 -05001039 gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
Tina Zhang6b3816d2017-08-14 15:24:14 +08001040 spt, se->val64, se->type);
Zhi Wang2707e442016-03-28 23:23:16 +08001041 return ret;
1042}
1043
Changbin Du44b46732018-01-30 19:19:49 +08001044static int ppgtt_handle_guest_entry_add(struct intel_vgpu_ppgtt_spt *spt,
Zhi Wang2707e442016-03-28 23:23:16 +08001045 struct intel_gvt_gtt_entry *we, unsigned long index)
1046{
Zhi Wang2707e442016-03-28 23:23:16 +08001047 struct intel_vgpu *vgpu = spt->vgpu;
1048 struct intel_gvt_gtt_entry m;
1049 struct intel_vgpu_ppgtt_spt *s;
1050 int ret;
1051
Changbin Du44b46732018-01-30 19:19:49 +08001052 trace_spt_guest_change(spt->vgpu->id, "add", spt, spt->shadow_page.type,
1053 we->val64, index);
Zhi Wang2707e442016-03-28 23:23:16 +08001054
Changbin Dubc37ab52018-01-30 19:19:44 +08001055 gvt_vdbg_mm("add shadow entry: type %d, index %lu, value %llx\n",
1056 we->type, index, we->val64);
1057
Zhi Wang2707e442016-03-28 23:23:16 +08001058 if (gtt_type_is_pt(get_next_pt_type(we->type))) {
Changbin Dud87f5ff2018-01-30 19:19:50 +08001059 s = ppgtt_populate_spt_by_guest_entry(vgpu, we);
Zhi Wang2707e442016-03-28 23:23:16 +08001060 if (IS_ERR(s)) {
1061 ret = PTR_ERR(s);
1062 goto fail;
1063 }
1064 ppgtt_get_shadow_entry(spt, &m, index);
1065 ppgtt_generate_shadow_entry(&m, s, we);
1066 ppgtt_set_shadow_entry(spt, &m, index);
1067 } else {
Changbin Du72f03d72018-01-30 19:19:48 +08001068 ret = ppgtt_populate_shadow_entry(vgpu, spt, index, we);
Zhi Wang2707e442016-03-28 23:23:16 +08001069 if (ret)
1070 goto fail;
Zhi Wang2707e442016-03-28 23:23:16 +08001071 }
1072 return 0;
1073fail:
Tina Zhang695fbc02017-03-10 04:26:53 -05001074 gvt_vgpu_err("fail: spt %p guest entry 0x%llx type %d\n",
1075 spt, we->val64, we->type);
Zhi Wang2707e442016-03-28 23:23:16 +08001076 return ret;
1077}
1078
1079static int sync_oos_page(struct intel_vgpu *vgpu,
1080 struct intel_vgpu_oos_page *oos_page)
1081{
1082 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
1083 struct intel_gvt *gvt = vgpu->gvt;
1084 struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
Changbin Du44b46732018-01-30 19:19:49 +08001085 struct intel_vgpu_ppgtt_spt *spt = oos_page->spt;
Changbin Du72f03d72018-01-30 19:19:48 +08001086 struct intel_gvt_gtt_entry old, new;
Zhi Wang2707e442016-03-28 23:23:16 +08001087 int index;
1088 int ret;
1089
1090 trace_oos_change(vgpu->id, "sync", oos_page->id,
Changbin Du44b46732018-01-30 19:19:49 +08001091 spt, spt->guest_page.type);
Zhi Wang2707e442016-03-28 23:23:16 +08001092
Changbin Du44b46732018-01-30 19:19:49 +08001093 old.type = new.type = get_entry_type(spt->guest_page.type);
Zhi Wang2707e442016-03-28 23:23:16 +08001094 old.val64 = new.val64 = 0;
1095
Zhi Wang9556e112017-10-10 13:51:32 +08001096 for (index = 0; index < (I915_GTT_PAGE_SIZE >>
1097 info->gtt_entry_size_shift); index++) {
Zhi Wang2707e442016-03-28 23:23:16 +08001098 ops->get_entry(oos_page->mem, &old, index, false, 0, vgpu);
1099 ops->get_entry(NULL, &new, index, true,
Changbin Du44b46732018-01-30 19:19:49 +08001100 spt->guest_page.gfn << PAGE_SHIFT, vgpu);
Zhi Wang2707e442016-03-28 23:23:16 +08001101
1102 if (old.val64 == new.val64
1103 && !test_and_clear_bit(index, spt->post_shadow_bitmap))
1104 continue;
1105
1106 trace_oos_sync(vgpu->id, oos_page->id,
Changbin Du44b46732018-01-30 19:19:49 +08001107 spt, spt->guest_page.type,
Zhi Wang2707e442016-03-28 23:23:16 +08001108 new.val64, index);
1109
Changbin Du72f03d72018-01-30 19:19:48 +08001110 ret = ppgtt_populate_shadow_entry(vgpu, spt, index, &new);
Zhi Wang2707e442016-03-28 23:23:16 +08001111 if (ret)
1112 return ret;
1113
1114 ops->set_entry(oos_page->mem, &new, index, false, 0, vgpu);
Zhi Wang2707e442016-03-28 23:23:16 +08001115 }
1116
Changbin Du44b46732018-01-30 19:19:49 +08001117 spt->guest_page.write_cnt = 0;
Zhi Wang2707e442016-03-28 23:23:16 +08001118 list_del_init(&spt->post_shadow_list);
1119 return 0;
1120}
1121
1122static int detach_oos_page(struct intel_vgpu *vgpu,
1123 struct intel_vgpu_oos_page *oos_page)
1124{
1125 struct intel_gvt *gvt = vgpu->gvt;
Changbin Du44b46732018-01-30 19:19:49 +08001126 struct intel_vgpu_ppgtt_spt *spt = oos_page->spt;
Zhi Wang2707e442016-03-28 23:23:16 +08001127
1128 trace_oos_change(vgpu->id, "detach", oos_page->id,
Changbin Du44b46732018-01-30 19:19:49 +08001129 spt, spt->guest_page.type);
Zhi Wang2707e442016-03-28 23:23:16 +08001130
Changbin Du44b46732018-01-30 19:19:49 +08001131 spt->guest_page.write_cnt = 0;
1132 spt->guest_page.oos_page = NULL;
1133 oos_page->spt = NULL;
Zhi Wang2707e442016-03-28 23:23:16 +08001134
1135 list_del_init(&oos_page->vm_list);
1136 list_move_tail(&oos_page->list, &gvt->gtt.oos_page_free_list_head);
1137
1138 return 0;
1139}
1140
Changbin Du44b46732018-01-30 19:19:49 +08001141static int attach_oos_page(struct intel_vgpu_oos_page *oos_page,
1142 struct intel_vgpu_ppgtt_spt *spt)
Zhi Wang2707e442016-03-28 23:23:16 +08001143{
Changbin Du44b46732018-01-30 19:19:49 +08001144 struct intel_gvt *gvt = spt->vgpu->gvt;
Zhi Wang2707e442016-03-28 23:23:16 +08001145 int ret;
1146
Changbin Du44b46732018-01-30 19:19:49 +08001147 ret = intel_gvt_hypervisor_read_gpa(spt->vgpu,
1148 spt->guest_page.gfn << I915_GTT_PAGE_SHIFT,
Zhi Wang9556e112017-10-10 13:51:32 +08001149 oos_page->mem, I915_GTT_PAGE_SIZE);
Zhi Wang2707e442016-03-28 23:23:16 +08001150 if (ret)
1151 return ret;
1152
Changbin Du44b46732018-01-30 19:19:49 +08001153 oos_page->spt = spt;
1154 spt->guest_page.oos_page = oos_page;
Zhi Wang2707e442016-03-28 23:23:16 +08001155
1156 list_move_tail(&oos_page->list, &gvt->gtt.oos_page_use_list_head);
1157
Changbin Du44b46732018-01-30 19:19:49 +08001158 trace_oos_change(spt->vgpu->id, "attach", oos_page->id,
1159 spt, spt->guest_page.type);
Zhi Wang2707e442016-03-28 23:23:16 +08001160 return 0;
1161}
1162
Changbin Du44b46732018-01-30 19:19:49 +08001163static int ppgtt_set_guest_page_sync(struct intel_vgpu_ppgtt_spt *spt)
Zhi Wang2707e442016-03-28 23:23:16 +08001164{
Changbin Du44b46732018-01-30 19:19:49 +08001165 struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page;
Zhi Wang2707e442016-03-28 23:23:16 +08001166 int ret;
1167
Changbin Due502a2a2018-01-30 19:19:53 +08001168 ret = intel_vgpu_enable_page_track(spt->vgpu, spt->guest_page.gfn);
Zhi Wang2707e442016-03-28 23:23:16 +08001169 if (ret)
1170 return ret;
1171
Changbin Du44b46732018-01-30 19:19:49 +08001172 trace_oos_change(spt->vgpu->id, "set page sync", oos_page->id,
1173 spt, spt->guest_page.type);
Zhi Wang2707e442016-03-28 23:23:16 +08001174
Changbin Du44b46732018-01-30 19:19:49 +08001175 list_del_init(&oos_page->vm_list);
1176 return sync_oos_page(spt->vgpu, oos_page);
Zhi Wang2707e442016-03-28 23:23:16 +08001177}
1178
Changbin Du44b46732018-01-30 19:19:49 +08001179static int ppgtt_allocate_oos_page(struct intel_vgpu_ppgtt_spt *spt)
Zhi Wang2707e442016-03-28 23:23:16 +08001180{
Changbin Du44b46732018-01-30 19:19:49 +08001181 struct intel_gvt *gvt = spt->vgpu->gvt;
Zhi Wang2707e442016-03-28 23:23:16 +08001182 struct intel_gvt_gtt *gtt = &gvt->gtt;
Changbin Du44b46732018-01-30 19:19:49 +08001183 struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page;
Zhi Wang2707e442016-03-28 23:23:16 +08001184 int ret;
1185
1186 WARN(oos_page, "shadow PPGTT page has already has a oos page\n");
1187
1188 if (list_empty(&gtt->oos_page_free_list_head)) {
1189 oos_page = container_of(gtt->oos_page_use_list_head.next,
1190 struct intel_vgpu_oos_page, list);
Changbin Du44b46732018-01-30 19:19:49 +08001191 ret = ppgtt_set_guest_page_sync(oos_page->spt);
Zhi Wang2707e442016-03-28 23:23:16 +08001192 if (ret)
1193 return ret;
Changbin Du44b46732018-01-30 19:19:49 +08001194 ret = detach_oos_page(spt->vgpu, oos_page);
Zhi Wang2707e442016-03-28 23:23:16 +08001195 if (ret)
1196 return ret;
1197 } else
1198 oos_page = container_of(gtt->oos_page_free_list_head.next,
1199 struct intel_vgpu_oos_page, list);
Changbin Du44b46732018-01-30 19:19:49 +08001200 return attach_oos_page(oos_page, spt);
Zhi Wang2707e442016-03-28 23:23:16 +08001201}
1202
Changbin Du44b46732018-01-30 19:19:49 +08001203static int ppgtt_set_guest_page_oos(struct intel_vgpu_ppgtt_spt *spt)
Zhi Wang2707e442016-03-28 23:23:16 +08001204{
Changbin Du44b46732018-01-30 19:19:49 +08001205 struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page;
Zhi Wang2707e442016-03-28 23:23:16 +08001206
1207 if (WARN(!oos_page, "shadow PPGTT page should have a oos page\n"))
1208 return -EINVAL;
1209
Changbin Du44b46732018-01-30 19:19:49 +08001210 trace_oos_change(spt->vgpu->id, "set page out of sync", oos_page->id,
1211 spt, spt->guest_page.type);
Zhi Wang2707e442016-03-28 23:23:16 +08001212
Changbin Du44b46732018-01-30 19:19:49 +08001213 list_add_tail(&oos_page->vm_list, &spt->vgpu->gtt.oos_page_list_head);
Changbin Due502a2a2018-01-30 19:19:53 +08001214 return intel_vgpu_disable_page_track(spt->vgpu, spt->guest_page.gfn);
Zhi Wang2707e442016-03-28 23:23:16 +08001215}
1216
1217/**
1218 * intel_vgpu_sync_oos_pages - sync all the out-of-synced shadow for vGPU
1219 * @vgpu: a vGPU
1220 *
1221 * This function is called before submitting a guest workload to host,
1222 * to sync all the out-of-synced shadow for vGPU
1223 *
1224 * Returns:
1225 * Zero on success, negative error code if failed.
1226 */
1227int intel_vgpu_sync_oos_pages(struct intel_vgpu *vgpu)
1228{
1229 struct list_head *pos, *n;
1230 struct intel_vgpu_oos_page *oos_page;
1231 int ret;
1232
1233 if (!enable_out_of_sync)
1234 return 0;
1235
1236 list_for_each_safe(pos, n, &vgpu->gtt.oos_page_list_head) {
1237 oos_page = container_of(pos,
1238 struct intel_vgpu_oos_page, vm_list);
Changbin Du44b46732018-01-30 19:19:49 +08001239 ret = ppgtt_set_guest_page_sync(oos_page->spt);
Zhi Wang2707e442016-03-28 23:23:16 +08001240 if (ret)
1241 return ret;
1242 }
1243 return 0;
1244}
1245
1246/*
1247 * The heart of PPGTT shadow page table.
1248 */
1249static int ppgtt_handle_guest_write_page_table(
Changbin Du44b46732018-01-30 19:19:49 +08001250 struct intel_vgpu_ppgtt_spt *spt,
Zhi Wang2707e442016-03-28 23:23:16 +08001251 struct intel_gvt_gtt_entry *we, unsigned long index)
1252{
Zhi Wang2707e442016-03-28 23:23:16 +08001253 struct intel_vgpu *vgpu = spt->vgpu;
Tina Zhang6b3816d2017-08-14 15:24:14 +08001254 int type = spt->shadow_page.type;
Zhi Wang2707e442016-03-28 23:23:16 +08001255 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
Changbin Du72f03d72018-01-30 19:19:48 +08001256 struct intel_gvt_gtt_entry old_se;
Bing Niu9baf0922016-11-07 10:44:36 +08001257 int new_present;
Changbin Du72f03d72018-01-30 19:19:48 +08001258 int ret;
Zhi Wang2707e442016-03-28 23:23:16 +08001259
Zhi Wang2707e442016-03-28 23:23:16 +08001260 new_present = ops->test_present(we);
1261
Tina Zhang6b3816d2017-08-14 15:24:14 +08001262 /*
1263 * Adding the new entry first and then removing the old one, that can
1264 * guarantee the ppgtt table is validated during the window between
1265 * adding and removal.
1266 */
Changbin Du72f03d72018-01-30 19:19:48 +08001267 ppgtt_get_shadow_entry(spt, &old_se, index);
Zhi Wang2707e442016-03-28 23:23:16 +08001268
Zhi Wang2707e442016-03-28 23:23:16 +08001269 if (new_present) {
Changbin Du44b46732018-01-30 19:19:49 +08001270 ret = ppgtt_handle_guest_entry_add(spt, we, index);
Zhi Wang2707e442016-03-28 23:23:16 +08001271 if (ret)
1272 goto fail;
1273 }
Tina Zhang6b3816d2017-08-14 15:24:14 +08001274
Changbin Du44b46732018-01-30 19:19:49 +08001275 ret = ppgtt_handle_guest_entry_removal(spt, &old_se, index);
Tina Zhang6b3816d2017-08-14 15:24:14 +08001276 if (ret)
1277 goto fail;
1278
1279 if (!new_present) {
Changbin Du72f03d72018-01-30 19:19:48 +08001280 ops->set_pfn(&old_se, vgpu->gtt.scratch_pt[type].page_mfn);
1281 ppgtt_set_shadow_entry(spt, &old_se, index);
Tina Zhang6b3816d2017-08-14 15:24:14 +08001282 }
1283
Zhi Wang2707e442016-03-28 23:23:16 +08001284 return 0;
1285fail:
Tina Zhang695fbc02017-03-10 04:26:53 -05001286 gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d.\n",
1287 spt, we->val64, we->type);
Zhi Wang2707e442016-03-28 23:23:16 +08001288 return ret;
1289}
1290
Changbin Du72f03d72018-01-30 19:19:48 +08001291
1292
Changbin Du44b46732018-01-30 19:19:49 +08001293static inline bool can_do_out_of_sync(struct intel_vgpu_ppgtt_spt *spt)
Zhi Wang2707e442016-03-28 23:23:16 +08001294{
1295 return enable_out_of_sync
Changbin Du44b46732018-01-30 19:19:49 +08001296 && gtt_type_is_pte_pt(spt->guest_page.type)
1297 && spt->guest_page.write_cnt >= 2;
Zhi Wang2707e442016-03-28 23:23:16 +08001298}
1299
1300static void ppgtt_set_post_shadow(struct intel_vgpu_ppgtt_spt *spt,
1301 unsigned long index)
1302{
1303 set_bit(index, spt->post_shadow_bitmap);
1304 if (!list_empty(&spt->post_shadow_list))
1305 return;
1306
1307 list_add_tail(&spt->post_shadow_list,
1308 &spt->vgpu->gtt.post_shadow_list_head);
1309}
1310
1311/**
1312 * intel_vgpu_flush_post_shadow - flush the post shadow transactions
1313 * @vgpu: a vGPU
1314 *
1315 * This function is called before submitting a guest workload to host,
1316 * to flush all the post shadows for a vGPU.
1317 *
1318 * Returns:
1319 * Zero on success, negative error code if failed.
1320 */
1321int intel_vgpu_flush_post_shadow(struct intel_vgpu *vgpu)
1322{
1323 struct list_head *pos, *n;
1324 struct intel_vgpu_ppgtt_spt *spt;
Bing Niu9baf0922016-11-07 10:44:36 +08001325 struct intel_gvt_gtt_entry ge;
Zhi Wang2707e442016-03-28 23:23:16 +08001326 unsigned long index;
1327 int ret;
1328
1329 list_for_each_safe(pos, n, &vgpu->gtt.post_shadow_list_head) {
1330 spt = container_of(pos, struct intel_vgpu_ppgtt_spt,
1331 post_shadow_list);
1332
1333 for_each_set_bit(index, spt->post_shadow_bitmap,
1334 GTT_ENTRY_NUM_IN_ONE_PAGE) {
1335 ppgtt_get_guest_entry(spt, &ge, index);
Zhi Wang2707e442016-03-28 23:23:16 +08001336
Changbin Du44b46732018-01-30 19:19:49 +08001337 ret = ppgtt_handle_guest_write_page_table(spt,
1338 &ge, index);
Zhi Wang2707e442016-03-28 23:23:16 +08001339 if (ret)
1340 return ret;
1341 clear_bit(index, spt->post_shadow_bitmap);
1342 }
1343 list_del_init(&spt->post_shadow_list);
1344 }
1345 return 0;
1346}
1347
Zhi Wang7d1e5cd2017-09-29 02:47:55 +08001348static int ppgtt_handle_guest_write_page_table_bytes(
Changbin Du44b46732018-01-30 19:19:49 +08001349 struct intel_vgpu_ppgtt_spt *spt,
Zhi Wang2707e442016-03-28 23:23:16 +08001350 u64 pa, void *p_data, int bytes)
1351{
Zhi Wang2707e442016-03-28 23:23:16 +08001352 struct intel_vgpu *vgpu = spt->vgpu;
1353 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1354 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
Tina Zhang6b3816d2017-08-14 15:24:14 +08001355 struct intel_gvt_gtt_entry we, se;
Zhi Wang2707e442016-03-28 23:23:16 +08001356 unsigned long index;
1357 int ret;
1358
1359 index = (pa & (PAGE_SIZE - 1)) >> info->gtt_entry_size_shift;
1360
1361 ppgtt_get_guest_entry(spt, &we, index);
Zhi Wang2707e442016-03-28 23:23:16 +08001362
1363 ops->test_pse(&we);
1364
1365 if (bytes == info->gtt_entry_size) {
Changbin Du44b46732018-01-30 19:19:49 +08001366 ret = ppgtt_handle_guest_write_page_table(spt, &we, index);
Zhi Wang2707e442016-03-28 23:23:16 +08001367 if (ret)
1368 return ret;
1369 } else {
Zhi Wang2707e442016-03-28 23:23:16 +08001370 if (!test_bit(index, spt->post_shadow_bitmap)) {
Zhi Wang121d760d2017-12-29 02:50:08 +08001371 int type = spt->shadow_page.type;
1372
Tina Zhang6b3816d2017-08-14 15:24:14 +08001373 ppgtt_get_shadow_entry(spt, &se, index);
Changbin Du44b46732018-01-30 19:19:49 +08001374 ret = ppgtt_handle_guest_entry_removal(spt, &se, index);
Zhi Wang2707e442016-03-28 23:23:16 +08001375 if (ret)
1376 return ret;
Zhi Wang121d760d2017-12-29 02:50:08 +08001377 ops->set_pfn(&se, vgpu->gtt.scratch_pt[type].page_mfn);
1378 ppgtt_set_shadow_entry(spt, &se, index);
Zhi Wang2707e442016-03-28 23:23:16 +08001379 }
Zhi Wang2707e442016-03-28 23:23:16 +08001380 ppgtt_set_post_shadow(spt, index);
Zhi Wang2707e442016-03-28 23:23:16 +08001381 }
1382
1383 if (!enable_out_of_sync)
1384 return 0;
1385
Changbin Du44b46732018-01-30 19:19:49 +08001386 spt->guest_page.write_cnt++;
Zhi Wang2707e442016-03-28 23:23:16 +08001387
Changbin Du44b46732018-01-30 19:19:49 +08001388 if (spt->guest_page.oos_page)
1389 ops->set_entry(spt->guest_page.oos_page->mem, &we, index,
Zhi Wang2707e442016-03-28 23:23:16 +08001390 false, 0, vgpu);
1391
Changbin Du44b46732018-01-30 19:19:49 +08001392 if (can_do_out_of_sync(spt)) {
1393 if (!spt->guest_page.oos_page)
1394 ppgtt_allocate_oos_page(spt);
Zhi Wang2707e442016-03-28 23:23:16 +08001395
Changbin Du44b46732018-01-30 19:19:49 +08001396 ret = ppgtt_set_guest_page_oos(spt);
Zhi Wang2707e442016-03-28 23:23:16 +08001397 if (ret < 0)
1398 return ret;
1399 }
1400 return 0;
1401}
1402
Changbin Duede9d0c2018-01-30 19:19:40 +08001403static void invalidate_ppgtt_mm(struct intel_vgpu_mm *mm)
Zhi Wang2707e442016-03-28 23:23:16 +08001404{
1405 struct intel_vgpu *vgpu = mm->vgpu;
1406 struct intel_gvt *gvt = vgpu->gvt;
1407 struct intel_gvt_gtt *gtt = &gvt->gtt;
1408 struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops;
1409 struct intel_gvt_gtt_entry se;
Changbin Duede9d0c2018-01-30 19:19:40 +08001410 int index;
Zhi Wang2707e442016-03-28 23:23:16 +08001411
Changbin Duede9d0c2018-01-30 19:19:40 +08001412 if (!mm->ppgtt_mm.shadowed)
Zhi Wang2707e442016-03-28 23:23:16 +08001413 return;
1414
Changbin Duede9d0c2018-01-30 19:19:40 +08001415 for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.shadow_pdps); index++) {
1416 ppgtt_get_shadow_root_entry(mm, &se, index);
1417
Zhi Wang2707e442016-03-28 23:23:16 +08001418 if (!ops->test_present(&se))
1419 continue;
Changbin Duede9d0c2018-01-30 19:19:40 +08001420
Changbin Dud87f5ff2018-01-30 19:19:50 +08001421 ppgtt_invalidate_spt_by_shadow_entry(vgpu, &se);
Zhi Wang2707e442016-03-28 23:23:16 +08001422 se.val64 = 0;
Changbin Duede9d0c2018-01-30 19:19:40 +08001423 ppgtt_set_shadow_root_entry(mm, &se, index);
Zhi Wang2707e442016-03-28 23:23:16 +08001424
Changbin Du44b46732018-01-30 19:19:49 +08001425 trace_spt_guest_change(vgpu->id, "destroy root pointer",
1426 NULL, se.type, se.val64, index);
Zhi Wang2707e442016-03-28 23:23:16 +08001427 }
Changbin Duede9d0c2018-01-30 19:19:40 +08001428
1429 mm->ppgtt_mm.shadowed = false;
Zhi Wang2707e442016-03-28 23:23:16 +08001430}
1431
Zhi Wang2707e442016-03-28 23:23:16 +08001432
Changbin Duede9d0c2018-01-30 19:19:40 +08001433static int shadow_ppgtt_mm(struct intel_vgpu_mm *mm)
Zhi Wang2707e442016-03-28 23:23:16 +08001434{
1435 struct intel_vgpu *vgpu = mm->vgpu;
1436 struct intel_gvt *gvt = vgpu->gvt;
1437 struct intel_gvt_gtt *gtt = &gvt->gtt;
1438 struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops;
1439 struct intel_vgpu_ppgtt_spt *spt;
1440 struct intel_gvt_gtt_entry ge, se;
Changbin Duede9d0c2018-01-30 19:19:40 +08001441 int index, ret;
Zhi Wang2707e442016-03-28 23:23:16 +08001442
Changbin Duede9d0c2018-01-30 19:19:40 +08001443 if (mm->ppgtt_mm.shadowed)
Zhi Wang2707e442016-03-28 23:23:16 +08001444 return 0;
1445
Changbin Duede9d0c2018-01-30 19:19:40 +08001446 mm->ppgtt_mm.shadowed = true;
Zhi Wang2707e442016-03-28 23:23:16 +08001447
Changbin Duede9d0c2018-01-30 19:19:40 +08001448 for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.guest_pdps); index++) {
1449 ppgtt_get_guest_root_entry(mm, &ge, index);
1450
Zhi Wang2707e442016-03-28 23:23:16 +08001451 if (!ops->test_present(&ge))
1452 continue;
1453
Changbin Du44b46732018-01-30 19:19:49 +08001454 trace_spt_guest_change(vgpu->id, __func__, NULL,
1455 ge.type, ge.val64, index);
Zhi Wang2707e442016-03-28 23:23:16 +08001456
Changbin Dud87f5ff2018-01-30 19:19:50 +08001457 spt = ppgtt_populate_spt_by_guest_entry(vgpu, &ge);
Zhi Wang2707e442016-03-28 23:23:16 +08001458 if (IS_ERR(spt)) {
Tina Zhang695fbc02017-03-10 04:26:53 -05001459 gvt_vgpu_err("fail to populate guest root pointer\n");
Zhi Wang2707e442016-03-28 23:23:16 +08001460 ret = PTR_ERR(spt);
1461 goto fail;
1462 }
1463 ppgtt_generate_shadow_entry(&se, spt, &ge);
Changbin Duede9d0c2018-01-30 19:19:40 +08001464 ppgtt_set_shadow_root_entry(mm, &se, index);
Zhi Wang2707e442016-03-28 23:23:16 +08001465
Changbin Du44b46732018-01-30 19:19:49 +08001466 trace_spt_guest_change(vgpu->id, "populate root pointer",
1467 NULL, se.type, se.val64, index);
Zhi Wang2707e442016-03-28 23:23:16 +08001468 }
Changbin Duede9d0c2018-01-30 19:19:40 +08001469
Zhi Wang2707e442016-03-28 23:23:16 +08001470 return 0;
1471fail:
Changbin Duede9d0c2018-01-30 19:19:40 +08001472 invalidate_ppgtt_mm(mm);
Zhi Wang2707e442016-03-28 23:23:16 +08001473 return ret;
1474}
1475
Changbin Duede9d0c2018-01-30 19:19:40 +08001476static struct intel_vgpu_mm *vgpu_alloc_mm(struct intel_vgpu *vgpu)
1477{
1478 struct intel_vgpu_mm *mm;
1479
1480 mm = kzalloc(sizeof(*mm), GFP_KERNEL);
1481 if (!mm)
1482 return NULL;
1483
1484 mm->vgpu = vgpu;
1485 kref_init(&mm->ref);
1486 atomic_set(&mm->pincount, 0);
1487
1488 return mm;
1489}
1490
1491static void vgpu_free_mm(struct intel_vgpu_mm *mm)
1492{
1493 kfree(mm);
1494}
1495
Zhi Wang2707e442016-03-28 23:23:16 +08001496/**
Changbin Duede9d0c2018-01-30 19:19:40 +08001497 * intel_vgpu_create_ppgtt_mm - create a ppgtt mm object for a vGPU
Zhi Wang2707e442016-03-28 23:23:16 +08001498 * @vgpu: a vGPU
Changbin Duede9d0c2018-01-30 19:19:40 +08001499 * @root_entry_type: ppgtt root entry type
1500 * @pdps: guest pdps.
Zhi Wang2707e442016-03-28 23:23:16 +08001501 *
Changbin Duede9d0c2018-01-30 19:19:40 +08001502 * This function is used to create a ppgtt mm object for a vGPU.
Zhi Wang2707e442016-03-28 23:23:16 +08001503 *
1504 * Returns:
1505 * Zero on success, negative error code in pointer if failed.
1506 */
Changbin Duede9d0c2018-01-30 19:19:40 +08001507struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu,
1508 intel_gvt_gtt_type_t root_entry_type, u64 pdps[])
Zhi Wang2707e442016-03-28 23:23:16 +08001509{
1510 struct intel_gvt *gvt = vgpu->gvt;
Zhi Wang2707e442016-03-28 23:23:16 +08001511 struct intel_vgpu_mm *mm;
1512 int ret;
1513
Changbin Duede9d0c2018-01-30 19:19:40 +08001514 mm = vgpu_alloc_mm(vgpu);
1515 if (!mm)
1516 return ERR_PTR(-ENOMEM);
Zhi Wang2707e442016-03-28 23:23:16 +08001517
Changbin Duede9d0c2018-01-30 19:19:40 +08001518 mm->type = INTEL_GVT_MM_PPGTT;
Zhi Wang2707e442016-03-28 23:23:16 +08001519
Changbin Duede9d0c2018-01-30 19:19:40 +08001520 GEM_BUG_ON(root_entry_type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY &&
1521 root_entry_type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY);
1522 mm->ppgtt_mm.root_entry_type = root_entry_type;
Zhi Wang2707e442016-03-28 23:23:16 +08001523
Changbin Duede9d0c2018-01-30 19:19:40 +08001524 INIT_LIST_HEAD(&mm->ppgtt_mm.list);
1525 INIT_LIST_HEAD(&mm->ppgtt_mm.lru_list);
Zhi Wang2707e442016-03-28 23:23:16 +08001526
Changbin Duede9d0c2018-01-30 19:19:40 +08001527 if (root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY)
1528 mm->ppgtt_mm.guest_pdps[0] = pdps[0];
1529 else
1530 memcpy(mm->ppgtt_mm.guest_pdps, pdps,
1531 sizeof(mm->ppgtt_mm.guest_pdps));
Zhi Wang2707e442016-03-28 23:23:16 +08001532
Changbin Duede9d0c2018-01-30 19:19:40 +08001533 ret = shadow_ppgtt_mm(mm);
Zhi Wang2707e442016-03-28 23:23:16 +08001534 if (ret) {
Changbin Duede9d0c2018-01-30 19:19:40 +08001535 gvt_vgpu_err("failed to shadow ppgtt mm\n");
1536 vgpu_free_mm(mm);
1537 return ERR_PTR(ret);
Zhi Wang2707e442016-03-28 23:23:16 +08001538 }
1539
Changbin Duede9d0c2018-01-30 19:19:40 +08001540 list_add_tail(&mm->ppgtt_mm.list, &vgpu->gtt.ppgtt_mm_list_head);
1541 list_add_tail(&mm->ppgtt_mm.lru_list, &gvt->gtt.ppgtt_mm_lru_list_head);
Zhi Wang2707e442016-03-28 23:23:16 +08001542 return mm;
Changbin Duede9d0c2018-01-30 19:19:40 +08001543}
1544
1545static struct intel_vgpu_mm *intel_vgpu_create_ggtt_mm(struct intel_vgpu *vgpu)
1546{
1547 struct intel_vgpu_mm *mm;
1548 unsigned long nr_entries;
1549
1550 mm = vgpu_alloc_mm(vgpu);
1551 if (!mm)
1552 return ERR_PTR(-ENOMEM);
1553
1554 mm->type = INTEL_GVT_MM_GGTT;
1555
1556 nr_entries = gvt_ggtt_gm_sz(vgpu->gvt) >> I915_GTT_PAGE_SHIFT;
1557 mm->ggtt_mm.virtual_ggtt = vzalloc(nr_entries *
1558 vgpu->gvt->device_info.gtt_entry_size);
1559 if (!mm->ggtt_mm.virtual_ggtt) {
1560 vgpu_free_mm(mm);
1561 return ERR_PTR(-ENOMEM);
1562 }
1563
1564 return mm;
1565}
1566
1567/**
Changbin Du1bc25852018-01-30 19:19:41 +08001568 * _intel_vgpu_mm_release - destroy a mm object
Changbin Duede9d0c2018-01-30 19:19:40 +08001569 * @mm_ref: a kref object
1570 *
1571 * This function is used to destroy a mm object for vGPU
1572 *
1573 */
Changbin Du1bc25852018-01-30 19:19:41 +08001574void _intel_vgpu_mm_release(struct kref *mm_ref)
Changbin Duede9d0c2018-01-30 19:19:40 +08001575{
1576 struct intel_vgpu_mm *mm = container_of(mm_ref, typeof(*mm), ref);
1577
1578 if (GEM_WARN_ON(atomic_read(&mm->pincount)))
1579 gvt_err("vgpu mm pin count bug detected\n");
1580
1581 if (mm->type == INTEL_GVT_MM_PPGTT) {
1582 list_del(&mm->ppgtt_mm.list);
1583 list_del(&mm->ppgtt_mm.lru_list);
1584 invalidate_ppgtt_mm(mm);
1585 } else {
1586 vfree(mm->ggtt_mm.virtual_ggtt);
1587 }
1588
1589 vgpu_free_mm(mm);
Zhi Wang2707e442016-03-28 23:23:16 +08001590}
1591
1592/**
1593 * intel_vgpu_unpin_mm - decrease the pin count of a vGPU mm object
1594 * @mm: a vGPU mm object
1595 *
1596 * This function is called when user doesn't want to use a vGPU mm object
1597 */
1598void intel_vgpu_unpin_mm(struct intel_vgpu_mm *mm)
1599{
Zhi Wang2707e442016-03-28 23:23:16 +08001600 atomic_dec(&mm->pincount);
1601}
1602
1603/**
1604 * intel_vgpu_pin_mm - increase the pin count of a vGPU mm object
1605 * @vgpu: a vGPU
1606 *
1607 * This function is called when user wants to use a vGPU mm object. If this
1608 * mm object hasn't been shadowed yet, the shadow will be populated at this
1609 * time.
1610 *
1611 * Returns:
1612 * Zero on success, negative error code if failed.
1613 */
1614int intel_vgpu_pin_mm(struct intel_vgpu_mm *mm)
1615{
1616 int ret;
1617
Changbin Duede9d0c2018-01-30 19:19:40 +08001618 atomic_inc(&mm->pincount);
Zhi Wang2707e442016-03-28 23:23:16 +08001619
Changbin Duede9d0c2018-01-30 19:19:40 +08001620 if (mm->type == INTEL_GVT_MM_PPGTT) {
1621 ret = shadow_ppgtt_mm(mm);
Zhi Wang2707e442016-03-28 23:23:16 +08001622 if (ret)
1623 return ret;
Changbin Duede9d0c2018-01-30 19:19:40 +08001624
1625 list_move_tail(&mm->ppgtt_mm.lru_list,
1626 &mm->vgpu->gvt->gtt.ppgtt_mm_lru_list_head);
1627
Zhi Wang2707e442016-03-28 23:23:16 +08001628 }
1629
Zhi Wang2707e442016-03-28 23:23:16 +08001630 return 0;
1631}
1632
Changbin Duede9d0c2018-01-30 19:19:40 +08001633static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt)
Zhi Wang2707e442016-03-28 23:23:16 +08001634{
1635 struct intel_vgpu_mm *mm;
1636 struct list_head *pos, *n;
1637
Changbin Duede9d0c2018-01-30 19:19:40 +08001638 list_for_each_safe(pos, n, &gvt->gtt.ppgtt_mm_lru_list_head) {
1639 mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.lru_list);
Zhi Wang2707e442016-03-28 23:23:16 +08001640
Zhi Wang2707e442016-03-28 23:23:16 +08001641 if (atomic_read(&mm->pincount))
1642 continue;
1643
Changbin Duede9d0c2018-01-30 19:19:40 +08001644 list_del_init(&mm->ppgtt_mm.lru_list);
1645 invalidate_ppgtt_mm(mm);
Zhi Wang2707e442016-03-28 23:23:16 +08001646 return 1;
1647 }
1648 return 0;
1649}
1650
1651/*
1652 * GMA translation APIs.
1653 */
1654static inline int ppgtt_get_next_level_entry(struct intel_vgpu_mm *mm,
1655 struct intel_gvt_gtt_entry *e, unsigned long index, bool guest)
1656{
1657 struct intel_vgpu *vgpu = mm->vgpu;
1658 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1659 struct intel_vgpu_ppgtt_spt *s;
1660
Changbin Du44b46732018-01-30 19:19:49 +08001661 s = intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(e));
Zhi Wang2707e442016-03-28 23:23:16 +08001662 if (!s)
1663 return -ENXIO;
1664
1665 if (!guest)
1666 ppgtt_get_shadow_entry(s, e, index);
1667 else
1668 ppgtt_get_guest_entry(s, e, index);
1669 return 0;
1670}
1671
1672/**
1673 * intel_vgpu_gma_to_gpa - translate a gma to GPA
1674 * @mm: mm object. could be a PPGTT or GGTT mm object
1675 * @gma: graphics memory address in this mm object
1676 *
1677 * This function is used to translate a graphics memory address in specific
1678 * graphics memory space to guest physical address.
1679 *
1680 * Returns:
1681 * Guest physical address on success, INTEL_GVT_INVALID_ADDR if failed.
1682 */
1683unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma)
1684{
1685 struct intel_vgpu *vgpu = mm->vgpu;
1686 struct intel_gvt *gvt = vgpu->gvt;
1687 struct intel_gvt_gtt_pte_ops *pte_ops = gvt->gtt.pte_ops;
1688 struct intel_gvt_gtt_gma_ops *gma_ops = gvt->gtt.gma_ops;
1689 unsigned long gpa = INTEL_GVT_INVALID_ADDR;
1690 unsigned long gma_index[4];
1691 struct intel_gvt_gtt_entry e;
Changbin Duede9d0c2018-01-30 19:19:40 +08001692 int i, levels = 0;
Zhi Wang2707e442016-03-28 23:23:16 +08001693 int ret;
1694
Changbin Duede9d0c2018-01-30 19:19:40 +08001695 GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT &&
1696 mm->type != INTEL_GVT_MM_PPGTT);
Zhi Wang2707e442016-03-28 23:23:16 +08001697
1698 if (mm->type == INTEL_GVT_MM_GGTT) {
1699 if (!vgpu_gmadr_is_valid(vgpu, gma))
1700 goto err;
1701
Changbin Duede9d0c2018-01-30 19:19:40 +08001702 ggtt_get_guest_entry(mm, &e,
1703 gma_ops->gma_to_ggtt_pte_index(gma));
1704
Zhi Wang9556e112017-10-10 13:51:32 +08001705 gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT)
1706 + (gma & ~I915_GTT_PAGE_MASK);
Zhi Wang2707e442016-03-28 23:23:16 +08001707
1708 trace_gma_translate(vgpu->id, "ggtt", 0, 0, gma, gpa);
Changbin Duede9d0c2018-01-30 19:19:40 +08001709 } else {
1710 switch (mm->ppgtt_mm.root_entry_type) {
1711 case GTT_TYPE_PPGTT_ROOT_L4_ENTRY:
1712 ppgtt_get_shadow_root_entry(mm, &e, 0);
Zhi Wang2707e442016-03-28 23:23:16 +08001713
Changbin Duede9d0c2018-01-30 19:19:40 +08001714 gma_index[0] = gma_ops->gma_to_pml4_index(gma);
1715 gma_index[1] = gma_ops->gma_to_l4_pdp_index(gma);
1716 gma_index[2] = gma_ops->gma_to_pde_index(gma);
1717 gma_index[3] = gma_ops->gma_to_pte_index(gma);
1718 levels = 4;
1719 break;
1720 case GTT_TYPE_PPGTT_ROOT_L3_ENTRY:
1721 ppgtt_get_shadow_root_entry(mm, &e,
1722 gma_ops->gma_to_l3_pdp_index(gma));
Zhi Wang2707e442016-03-28 23:23:16 +08001723
Changbin Duede9d0c2018-01-30 19:19:40 +08001724 gma_index[0] = gma_ops->gma_to_pde_index(gma);
1725 gma_index[1] = gma_ops->gma_to_pte_index(gma);
1726 levels = 2;
1727 break;
1728 default:
1729 GEM_BUG_ON(1);
Changbin Du4b2dbbc2017-08-02 15:06:37 +08001730 }
Changbin Duede9d0c2018-01-30 19:19:40 +08001731
1732 /* walk the shadow page table and get gpa from guest entry */
1733 for (i = 0; i < levels; i++) {
1734 ret = ppgtt_get_next_level_entry(mm, &e, gma_index[i],
1735 (i == levels - 1));
1736 if (ret)
1737 goto err;
1738
1739 if (!pte_ops->test_present(&e)) {
1740 gvt_dbg_core("GMA 0x%lx is not present\n", gma);
1741 goto err;
1742 }
1743 }
1744
1745 gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT) +
1746 (gma & ~I915_GTT_PAGE_MASK);
1747 trace_gma_translate(vgpu->id, "ppgtt", 0,
1748 mm->ppgtt_mm.root_entry_type, gma, gpa);
Zhi Wang2707e442016-03-28 23:23:16 +08001749 }
1750
Zhi Wang2707e442016-03-28 23:23:16 +08001751 return gpa;
1752err:
Tina Zhang695fbc02017-03-10 04:26:53 -05001753 gvt_vgpu_err("invalid mm type: %d gma %lx\n", mm->type, gma);
Zhi Wang2707e442016-03-28 23:23:16 +08001754 return INTEL_GVT_INVALID_ADDR;
1755}
1756
Changbin Dua143cef2018-01-30 19:19:45 +08001757static int emulate_ggtt_mmio_read(struct intel_vgpu *vgpu,
Zhi Wang2707e442016-03-28 23:23:16 +08001758 unsigned int off, void *p_data, unsigned int bytes)
1759{
1760 struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm;
1761 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
1762 unsigned long index = off >> info->gtt_entry_size_shift;
1763 struct intel_gvt_gtt_entry e;
1764
1765 if (bytes != 4 && bytes != 8)
1766 return -EINVAL;
1767
1768 ggtt_get_guest_entry(ggtt_mm, &e, index);
1769 memcpy(p_data, (void *)&e.val64 + (off & (info->gtt_entry_size - 1)),
1770 bytes);
1771 return 0;
1772}
1773
1774/**
1775 * intel_vgpu_emulate_gtt_mmio_read - emulate GTT MMIO register read
1776 * @vgpu: a vGPU
1777 * @off: register offset
1778 * @p_data: data will be returned to guest
1779 * @bytes: data length
1780 *
1781 * This function is used to emulate the GTT MMIO register read
1782 *
1783 * Returns:
1784 * Zero on success, error code if failed.
1785 */
Changbin Dua143cef2018-01-30 19:19:45 +08001786int intel_vgpu_emulate_ggtt_mmio_read(struct intel_vgpu *vgpu, unsigned int off,
Zhi Wang2707e442016-03-28 23:23:16 +08001787 void *p_data, unsigned int bytes)
1788{
1789 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
1790 int ret;
1791
1792 if (bytes != 4 && bytes != 8)
1793 return -EINVAL;
1794
1795 off -= info->gtt_start_offset;
Changbin Dua143cef2018-01-30 19:19:45 +08001796 ret = emulate_ggtt_mmio_read(vgpu, off, p_data, bytes);
Zhi Wang2707e442016-03-28 23:23:16 +08001797 return ret;
1798}
1799
Changbin Dua143cef2018-01-30 19:19:45 +08001800static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
Zhi Wang2707e442016-03-28 23:23:16 +08001801 void *p_data, unsigned int bytes)
1802{
1803 struct intel_gvt *gvt = vgpu->gvt;
1804 const struct intel_gvt_device_info *info = &gvt->device_info;
1805 struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm;
1806 struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
1807 unsigned long g_gtt_index = off >> info->gtt_entry_size_shift;
Changbin Du72f03d72018-01-30 19:19:48 +08001808 unsigned long gma, gfn, mfn;
Zhi Wang2707e442016-03-28 23:23:16 +08001809 struct intel_gvt_gtt_entry e, m;
Zhi Wang2707e442016-03-28 23:23:16 +08001810
1811 if (bytes != 4 && bytes != 8)
1812 return -EINVAL;
1813
Zhi Wang9556e112017-10-10 13:51:32 +08001814 gma = g_gtt_index << I915_GTT_PAGE_SHIFT;
Zhi Wang2707e442016-03-28 23:23:16 +08001815
1816 /* the VM may configure the whole GM space when ballooning is used */
Zhao, Xinda7c281352017-02-21 15:54:56 +08001817 if (!vgpu_gmadr_is_valid(vgpu, gma))
Zhi Wang2707e442016-03-28 23:23:16 +08001818 return 0;
Zhi Wang2707e442016-03-28 23:23:16 +08001819
1820 ggtt_get_guest_entry(ggtt_mm, &e, g_gtt_index);
1821
1822 memcpy((void *)&e.val64 + (off & (info->gtt_entry_size - 1)), p_data,
1823 bytes);
Changbin Du72f03d72018-01-30 19:19:48 +08001824 m = e;
Zhi Wang2707e442016-03-28 23:23:16 +08001825
1826 if (ops->test_present(&e)) {
Hang Yuancc753fb2017-12-22 18:06:31 +08001827 gfn = ops->get_pfn(&e);
1828
1829 /* one PTE update may be issued in multiple writes and the
1830 * first write may not construct a valid gfn
1831 */
1832 if (!intel_gvt_hypervisor_is_valid_gfn(vgpu, gfn)) {
1833 ops->set_pfn(&m, gvt->gtt.scratch_mfn);
1834 goto out;
1835 }
1836
Changbin Du72f03d72018-01-30 19:19:48 +08001837 mfn = intel_gvt_hypervisor_gfn_to_mfn(vgpu, gfn);
1838 if (mfn == INTEL_GVT_INVALID_ADDR) {
1839 gvt_vgpu_err("fail to populate guest ggtt entry\n");
Xiaoguang Chen359b6932017-03-21 10:54:21 +08001840 /* guest driver may read/write the entry when partial
1841 * update the entry in this situation p2m will fail
1842 * settting the shadow entry to point to a scratch page
1843 */
Zhi Wang22115ce2017-10-10 14:34:11 +08001844 ops->set_pfn(&m, gvt->gtt.scratch_mfn);
Changbin Du72f03d72018-01-30 19:19:48 +08001845 } else
1846 ops->set_pfn(&m, mfn);
1847 } else
Zhi Wang22115ce2017-10-10 14:34:11 +08001848 ops->set_pfn(&m, gvt->gtt.scratch_mfn);
Zhi Wang2707e442016-03-28 23:23:16 +08001849
Hang Yuancc753fb2017-12-22 18:06:31 +08001850out:
Changbin Du3aff3512018-01-30 19:19:42 +08001851 ggtt_set_host_entry(ggtt_mm, &m, g_gtt_index);
Changbin Dua143cef2018-01-30 19:19:45 +08001852 ggtt_invalidate(gvt->dev_priv);
Zhi Wang2707e442016-03-28 23:23:16 +08001853 ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index);
1854 return 0;
1855}
1856
1857/*
Changbin Dua143cef2018-01-30 19:19:45 +08001858 * intel_vgpu_emulate_ggtt_mmio_write - emulate GTT MMIO register write
Zhi Wang2707e442016-03-28 23:23:16 +08001859 * @vgpu: a vGPU
1860 * @off: register offset
1861 * @p_data: data from guest write
1862 * @bytes: data length
1863 *
1864 * This function is used to emulate the GTT MMIO register write
1865 *
1866 * Returns:
1867 * Zero on success, error code if failed.
1868 */
Changbin Dua143cef2018-01-30 19:19:45 +08001869int intel_vgpu_emulate_ggtt_mmio_write(struct intel_vgpu *vgpu,
1870 unsigned int off, void *p_data, unsigned int bytes)
Zhi Wang2707e442016-03-28 23:23:16 +08001871{
1872 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
1873 int ret;
1874
1875 if (bytes != 4 && bytes != 8)
1876 return -EINVAL;
1877
1878 off -= info->gtt_start_offset;
Changbin Dua143cef2018-01-30 19:19:45 +08001879 ret = emulate_ggtt_mmio_write(vgpu, off, p_data, bytes);
Zhi Wang2707e442016-03-28 23:23:16 +08001880 return ret;
1881}
1882
Ping Gao3b6411c2016-11-04 13:47:35 +08001883static int alloc_scratch_pages(struct intel_vgpu *vgpu,
1884 intel_gvt_gtt_type_t type)
Zhi Wang2707e442016-03-28 23:23:16 +08001885{
1886 struct intel_vgpu_gtt *gtt = &vgpu->gtt;
Ping Gao3b6411c2016-11-04 13:47:35 +08001887 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
Zhenyu Wang5c352582017-11-02 17:44:52 +08001888 int page_entry_num = I915_GTT_PAGE_SIZE >>
Ping Gao3b6411c2016-11-04 13:47:35 +08001889 vgpu->gvt->device_info.gtt_entry_size_shift;
Jike Song96317392017-01-09 15:38:38 +08001890 void *scratch_pt;
Ping Gao3b6411c2016-11-04 13:47:35 +08001891 int i;
Chuanxiao Dong5de6bd42017-02-09 11:37:11 +08001892 struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev;
1893 dma_addr_t daddr;
Zhi Wang2707e442016-03-28 23:23:16 +08001894
Ping Gao3b6411c2016-11-04 13:47:35 +08001895 if (WARN_ON(type < GTT_TYPE_PPGTT_PTE_PT || type >= GTT_TYPE_MAX))
1896 return -EINVAL;
1897
Jike Song96317392017-01-09 15:38:38 +08001898 scratch_pt = (void *)get_zeroed_page(GFP_KERNEL);
Ping Gao3b6411c2016-11-04 13:47:35 +08001899 if (!scratch_pt) {
Tina Zhang695fbc02017-03-10 04:26:53 -05001900 gvt_vgpu_err("fail to allocate scratch page\n");
Zhi Wang2707e442016-03-28 23:23:16 +08001901 return -ENOMEM;
1902 }
1903
Chuanxiao Dong5de6bd42017-02-09 11:37:11 +08001904 daddr = dma_map_page(dev, virt_to_page(scratch_pt), 0,
1905 4096, PCI_DMA_BIDIRECTIONAL);
1906 if (dma_mapping_error(dev, daddr)) {
Tina Zhang695fbc02017-03-10 04:26:53 -05001907 gvt_vgpu_err("fail to dmamap scratch_pt\n");
Chuanxiao Dong5de6bd42017-02-09 11:37:11 +08001908 __free_page(virt_to_page(scratch_pt));
1909 return -ENOMEM;
Ping Gao3b6411c2016-11-04 13:47:35 +08001910 }
Chuanxiao Dong5de6bd42017-02-09 11:37:11 +08001911 gtt->scratch_pt[type].page_mfn =
Zhenyu Wang5c352582017-11-02 17:44:52 +08001912 (unsigned long)(daddr >> I915_GTT_PAGE_SHIFT);
Jike Song96317392017-01-09 15:38:38 +08001913 gtt->scratch_pt[type].page = virt_to_page(scratch_pt);
Ping Gao3b6411c2016-11-04 13:47:35 +08001914 gvt_dbg_mm("vgpu%d create scratch_pt: type %d mfn=0x%lx\n",
Chuanxiao Dong5de6bd42017-02-09 11:37:11 +08001915 vgpu->id, type, gtt->scratch_pt[type].page_mfn);
Ping Gao3b6411c2016-11-04 13:47:35 +08001916
1917 /* Build the tree by full filled the scratch pt with the entries which
1918 * point to the next level scratch pt or scratch page. The
1919 * scratch_pt[type] indicate the scratch pt/scratch page used by the
1920 * 'type' pt.
1921 * e.g. scratch_pt[GTT_TYPE_PPGTT_PDE_PT] is used by
Jike Song96317392017-01-09 15:38:38 +08001922 * GTT_TYPE_PPGTT_PDE_PT level pt, that means this scratch_pt it self
Ping Gao3b6411c2016-11-04 13:47:35 +08001923 * is GTT_TYPE_PPGTT_PTE_PT, and full filled by scratch page mfn.
1924 */
1925 if (type > GTT_TYPE_PPGTT_PTE_PT && type < GTT_TYPE_MAX) {
1926 struct intel_gvt_gtt_entry se;
1927
1928 memset(&se, 0, sizeof(struct intel_gvt_gtt_entry));
1929 se.type = get_entry_type(type - 1);
1930 ops->set_pfn(&se, gtt->scratch_pt[type - 1].page_mfn);
1931
1932 /* The entry parameters like present/writeable/cache type
1933 * set to the same as i915's scratch page tree.
1934 */
1935 se.val64 |= _PAGE_PRESENT | _PAGE_RW;
1936 if (type == GTT_TYPE_PPGTT_PDE_PT)
Zhi Wangc095b972017-09-14 20:39:41 +08001937 se.val64 |= PPAT_CACHED;
Ping Gao3b6411c2016-11-04 13:47:35 +08001938
1939 for (i = 0; i < page_entry_num; i++)
Jike Song96317392017-01-09 15:38:38 +08001940 ops->set_entry(scratch_pt, &se, i, false, 0, vgpu);
Zhi Wang2707e442016-03-28 23:23:16 +08001941 }
1942
Zhi Wang2707e442016-03-28 23:23:16 +08001943 return 0;
1944}
1945
Ping Gao3b6411c2016-11-04 13:47:35 +08001946static int release_scratch_page_tree(struct intel_vgpu *vgpu)
Zhi Wang2707e442016-03-28 23:23:16 +08001947{
Ping Gao3b6411c2016-11-04 13:47:35 +08001948 int i;
Chuanxiao Dong5de6bd42017-02-09 11:37:11 +08001949 struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev;
1950 dma_addr_t daddr;
Ping Gao3b6411c2016-11-04 13:47:35 +08001951
1952 for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) {
1953 if (vgpu->gtt.scratch_pt[i].page != NULL) {
Chuanxiao Dong5de6bd42017-02-09 11:37:11 +08001954 daddr = (dma_addr_t)(vgpu->gtt.scratch_pt[i].page_mfn <<
Zhenyu Wang5c352582017-11-02 17:44:52 +08001955 I915_GTT_PAGE_SHIFT);
Chuanxiao Dong5de6bd42017-02-09 11:37:11 +08001956 dma_unmap_page(dev, daddr, 4096, PCI_DMA_BIDIRECTIONAL);
Ping Gao3b6411c2016-11-04 13:47:35 +08001957 __free_page(vgpu->gtt.scratch_pt[i].page);
1958 vgpu->gtt.scratch_pt[i].page = NULL;
1959 vgpu->gtt.scratch_pt[i].page_mfn = 0;
1960 }
Zhi Wang2707e442016-03-28 23:23:16 +08001961 }
Ping Gao3b6411c2016-11-04 13:47:35 +08001962
1963 return 0;
1964}
1965
1966static int create_scratch_page_tree(struct intel_vgpu *vgpu)
1967{
1968 int i, ret;
1969
1970 for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) {
1971 ret = alloc_scratch_pages(vgpu, i);
1972 if (ret)
1973 goto err;
1974 }
1975
1976 return 0;
1977
1978err:
1979 release_scratch_page_tree(vgpu);
1980 return ret;
Zhi Wang2707e442016-03-28 23:23:16 +08001981}
1982
1983/**
1984 * intel_vgpu_init_gtt - initialize per-vGPU graphics memory virulization
1985 * @vgpu: a vGPU
1986 *
1987 * This function is used to initialize per-vGPU graphics memory virtualization
1988 * components.
1989 *
1990 * Returns:
1991 * Zero on success, error code if failed.
1992 */
1993int intel_vgpu_init_gtt(struct intel_vgpu *vgpu)
1994{
1995 struct intel_vgpu_gtt *gtt = &vgpu->gtt;
Zhi Wang2707e442016-03-28 23:23:16 +08001996
Changbin Dud87f5ff2018-01-30 19:19:50 +08001997 hash_init(gtt->spt_hash_table);
Zhi Wang2707e442016-03-28 23:23:16 +08001998
Changbin Duede9d0c2018-01-30 19:19:40 +08001999 INIT_LIST_HEAD(&gtt->ppgtt_mm_list_head);
Zhi Wang2707e442016-03-28 23:23:16 +08002000 INIT_LIST_HEAD(&gtt->oos_page_list_head);
2001 INIT_LIST_HEAD(&gtt->post_shadow_list_head);
2002
Changbin Duede9d0c2018-01-30 19:19:40 +08002003 gtt->ggtt_mm = intel_vgpu_create_ggtt_mm(vgpu);
2004 if (IS_ERR(gtt->ggtt_mm)) {
Tina Zhang695fbc02017-03-10 04:26:53 -05002005 gvt_vgpu_err("fail to create mm for ggtt.\n");
Changbin Duede9d0c2018-01-30 19:19:40 +08002006 return PTR_ERR(gtt->ggtt_mm);
Zhi Wang2707e442016-03-28 23:23:16 +08002007 }
2008
Changbin Duede9d0c2018-01-30 19:19:40 +08002009 intel_vgpu_reset_ggtt(vgpu);
Zhi Wang2707e442016-03-28 23:23:16 +08002010
Ping Gao3b6411c2016-11-04 13:47:35 +08002011 return create_scratch_page_tree(vgpu);
Zhi Wang2707e442016-03-28 23:23:16 +08002012}
2013
Changbin Duede9d0c2018-01-30 19:19:40 +08002014static void intel_vgpu_destroy_all_ppgtt_mm(struct intel_vgpu *vgpu)
Ping Gaoda9cc8d2017-02-21 15:52:56 +08002015{
2016 struct list_head *pos, *n;
2017 struct intel_vgpu_mm *mm;
2018
Changbin Duede9d0c2018-01-30 19:19:40 +08002019 list_for_each_safe(pos, n, &vgpu->gtt.ppgtt_mm_list_head) {
2020 mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list);
Changbin Du1bc25852018-01-30 19:19:41 +08002021 intel_vgpu_destroy_mm(mm);
Ping Gaoda9cc8d2017-02-21 15:52:56 +08002022 }
Changbin Duede9d0c2018-01-30 19:19:40 +08002023
2024 if (GEM_WARN_ON(!list_empty(&vgpu->gtt.ppgtt_mm_list_head)))
2025 gvt_err("vgpu ppgtt mm is not fully destoried\n");
2026
Changbin Dud87f5ff2018-01-30 19:19:50 +08002027 if (GEM_WARN_ON(!hlist_empty(vgpu->gtt.spt_hash_table))) {
Changbin Duede9d0c2018-01-30 19:19:40 +08002028 gvt_err("Why we still has spt not freed?\n");
Changbin Dud87f5ff2018-01-30 19:19:50 +08002029 ppgtt_free_all_spt(vgpu);
Changbin Duede9d0c2018-01-30 19:19:40 +08002030 }
2031}
2032
2033static void intel_vgpu_destroy_ggtt_mm(struct intel_vgpu *vgpu)
2034{
Changbin Du1bc25852018-01-30 19:19:41 +08002035 intel_vgpu_destroy_mm(vgpu->gtt.ggtt_mm);
Changbin Duede9d0c2018-01-30 19:19:40 +08002036 vgpu->gtt.ggtt_mm = NULL;
Ping Gaoda9cc8d2017-02-21 15:52:56 +08002037}
2038
Zhi Wang2707e442016-03-28 23:23:16 +08002039/**
2040 * intel_vgpu_clean_gtt - clean up per-vGPU graphics memory virulization
2041 * @vgpu: a vGPU
2042 *
2043 * This function is used to clean up per-vGPU graphics memory virtualization
2044 * components.
2045 *
2046 * Returns:
2047 * Zero on success, error code if failed.
2048 */
2049void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu)
2050{
Changbin Duede9d0c2018-01-30 19:19:40 +08002051 intel_vgpu_destroy_all_ppgtt_mm(vgpu);
2052 intel_vgpu_destroy_ggtt_mm(vgpu);
Ping Gao3b6411c2016-11-04 13:47:35 +08002053 release_scratch_page_tree(vgpu);
Zhi Wang2707e442016-03-28 23:23:16 +08002054}
2055
2056static void clean_spt_oos(struct intel_gvt *gvt)
2057{
2058 struct intel_gvt_gtt *gtt = &gvt->gtt;
2059 struct list_head *pos, *n;
2060 struct intel_vgpu_oos_page *oos_page;
2061
2062 WARN(!list_empty(&gtt->oos_page_use_list_head),
2063 "someone is still using oos page\n");
2064
2065 list_for_each_safe(pos, n, &gtt->oos_page_free_list_head) {
2066 oos_page = container_of(pos, struct intel_vgpu_oos_page, list);
2067 list_del(&oos_page->list);
2068 kfree(oos_page);
2069 }
2070}
2071
2072static int setup_spt_oos(struct intel_gvt *gvt)
2073{
2074 struct intel_gvt_gtt *gtt = &gvt->gtt;
2075 struct intel_vgpu_oos_page *oos_page;
2076 int i;
2077 int ret;
2078
2079 INIT_LIST_HEAD(&gtt->oos_page_free_list_head);
2080 INIT_LIST_HEAD(&gtt->oos_page_use_list_head);
2081
2082 for (i = 0; i < preallocated_oos_pages; i++) {
2083 oos_page = kzalloc(sizeof(*oos_page), GFP_KERNEL);
2084 if (!oos_page) {
Zhi Wang2707e442016-03-28 23:23:16 +08002085 ret = -ENOMEM;
2086 goto fail;
2087 }
2088
2089 INIT_LIST_HEAD(&oos_page->list);
2090 INIT_LIST_HEAD(&oos_page->vm_list);
2091 oos_page->id = i;
2092 list_add_tail(&oos_page->list, &gtt->oos_page_free_list_head);
2093 }
2094
2095 gvt_dbg_mm("%d oos pages preallocated\n", i);
2096
2097 return 0;
2098fail:
2099 clean_spt_oos(gvt);
2100 return ret;
2101}
2102
2103/**
2104 * intel_vgpu_find_ppgtt_mm - find a PPGTT mm object
2105 * @vgpu: a vGPU
2106 * @page_table_level: PPGTT page table level
2107 * @root_entry: PPGTT page table root pointers
2108 *
2109 * This function is used to find a PPGTT mm object from mm object pool
2110 *
2111 * Returns:
2112 * pointer to mm object on success, NULL if failed.
2113 */
2114struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu,
Changbin Duede9d0c2018-01-30 19:19:40 +08002115 u64 pdps[])
Zhi Wang2707e442016-03-28 23:23:16 +08002116{
Zhi Wang2707e442016-03-28 23:23:16 +08002117 struct intel_vgpu_mm *mm;
Changbin Duede9d0c2018-01-30 19:19:40 +08002118 struct list_head *pos;
Zhi Wang2707e442016-03-28 23:23:16 +08002119
Changbin Duede9d0c2018-01-30 19:19:40 +08002120 list_for_each(pos, &vgpu->gtt.ppgtt_mm_list_head) {
2121 mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list);
Zhi Wang2707e442016-03-28 23:23:16 +08002122
Changbin Duede9d0c2018-01-30 19:19:40 +08002123 switch (mm->ppgtt_mm.root_entry_type) {
2124 case GTT_TYPE_PPGTT_ROOT_L4_ENTRY:
2125 if (pdps[0] == mm->ppgtt_mm.guest_pdps[0])
Zhi Wang2707e442016-03-28 23:23:16 +08002126 return mm;
Changbin Duede9d0c2018-01-30 19:19:40 +08002127 break;
2128 case GTT_TYPE_PPGTT_ROOT_L3_ENTRY:
2129 if (!memcmp(pdps, mm->ppgtt_mm.guest_pdps,
2130 sizeof(mm->ppgtt_mm.guest_pdps)))
Zhi Wang2707e442016-03-28 23:23:16 +08002131 return mm;
Changbin Duede9d0c2018-01-30 19:19:40 +08002132 break;
2133 default:
2134 GEM_BUG_ON(1);
Zhi Wang2707e442016-03-28 23:23:16 +08002135 }
2136 }
2137 return NULL;
2138}
2139
2140/**
Changbin Due6e9c462018-01-30 19:19:46 +08002141 * intel_vgpu_get_ppgtt_mm - get or create a PPGTT mm object.
Zhi Wang2707e442016-03-28 23:23:16 +08002142 * @vgpu: a vGPU
Changbin Duede9d0c2018-01-30 19:19:40 +08002143 * @root_entry_type: ppgtt root entry type
2144 * @pdps: guest pdps
Zhi Wang2707e442016-03-28 23:23:16 +08002145 *
Changbin Due6e9c462018-01-30 19:19:46 +08002146 * This function is used to find or create a PPGTT mm object from a guest.
Zhi Wang2707e442016-03-28 23:23:16 +08002147 *
2148 * Returns:
2149 * Zero on success, negative error code if failed.
2150 */
Changbin Due6e9c462018-01-30 19:19:46 +08002151struct intel_vgpu_mm *intel_vgpu_get_ppgtt_mm(struct intel_vgpu *vgpu,
Changbin Duede9d0c2018-01-30 19:19:40 +08002152 intel_gvt_gtt_type_t root_entry_type, u64 pdps[])
Zhi Wang2707e442016-03-28 23:23:16 +08002153{
Zhi Wang2707e442016-03-28 23:23:16 +08002154 struct intel_vgpu_mm *mm;
2155
Changbin Duede9d0c2018-01-30 19:19:40 +08002156 mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps);
Zhi Wang2707e442016-03-28 23:23:16 +08002157 if (mm) {
Changbin Du1bc25852018-01-30 19:19:41 +08002158 intel_vgpu_mm_get(mm);
Zhi Wang2707e442016-03-28 23:23:16 +08002159 } else {
Changbin Duede9d0c2018-01-30 19:19:40 +08002160 mm = intel_vgpu_create_ppgtt_mm(vgpu, root_entry_type, pdps);
Changbin Due6e9c462018-01-30 19:19:46 +08002161 if (IS_ERR(mm))
Tina Zhang695fbc02017-03-10 04:26:53 -05002162 gvt_vgpu_err("fail to create mm\n");
Zhi Wang2707e442016-03-28 23:23:16 +08002163 }
Changbin Due6e9c462018-01-30 19:19:46 +08002164 return mm;
Zhi Wang2707e442016-03-28 23:23:16 +08002165}
2166
2167/**
Changbin Due6e9c462018-01-30 19:19:46 +08002168 * intel_vgpu_put_ppgtt_mm - find and put a PPGTT mm object.
Zhi Wang2707e442016-03-28 23:23:16 +08002169 * @vgpu: a vGPU
Changbin Duede9d0c2018-01-30 19:19:40 +08002170 * @pdps: guest pdps
Zhi Wang2707e442016-03-28 23:23:16 +08002171 *
Changbin Due6e9c462018-01-30 19:19:46 +08002172 * This function is used to find a PPGTT mm object from a guest and destroy it.
Zhi Wang2707e442016-03-28 23:23:16 +08002173 *
2174 * Returns:
2175 * Zero on success, negative error code if failed.
2176 */
Changbin Due6e9c462018-01-30 19:19:46 +08002177int intel_vgpu_put_ppgtt_mm(struct intel_vgpu *vgpu, u64 pdps[])
Zhi Wang2707e442016-03-28 23:23:16 +08002178{
Zhi Wang2707e442016-03-28 23:23:16 +08002179 struct intel_vgpu_mm *mm;
2180
Changbin Duede9d0c2018-01-30 19:19:40 +08002181 mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps);
Zhi Wang2707e442016-03-28 23:23:16 +08002182 if (!mm) {
Tina Zhang695fbc02017-03-10 04:26:53 -05002183 gvt_vgpu_err("fail to find ppgtt instance.\n");
Zhi Wang2707e442016-03-28 23:23:16 +08002184 return -EINVAL;
2185 }
Changbin Du1bc25852018-01-30 19:19:41 +08002186 intel_vgpu_mm_put(mm);
Zhi Wang2707e442016-03-28 23:23:16 +08002187 return 0;
2188}
2189
2190/**
2191 * intel_gvt_init_gtt - initialize mm components of a GVT device
2192 * @gvt: GVT device
2193 *
2194 * This function is called at the initialization stage, to initialize
2195 * the mm components of a GVT device.
2196 *
2197 * Returns:
2198 * zero on success, negative error code if failed.
2199 */
2200int intel_gvt_init_gtt(struct intel_gvt *gvt)
2201{
2202 int ret;
Jike Song96317392017-01-09 15:38:38 +08002203 void *page;
Chuanxiao Dong5de6bd42017-02-09 11:37:11 +08002204 struct device *dev = &gvt->dev_priv->drm.pdev->dev;
2205 dma_addr_t daddr;
Zhi Wang2707e442016-03-28 23:23:16 +08002206
2207 gvt_dbg_core("init gtt\n");
2208
Xu Hane3476c02017-03-29 10:13:59 +08002209 if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)
2210 || IS_KABYLAKE(gvt->dev_priv)) {
Zhi Wang2707e442016-03-28 23:23:16 +08002211 gvt->gtt.pte_ops = &gen8_gtt_pte_ops;
2212 gvt->gtt.gma_ops = &gen8_gtt_gma_ops;
Zhi Wang2707e442016-03-28 23:23:16 +08002213 } else {
2214 return -ENODEV;
2215 }
2216
Jike Song96317392017-01-09 15:38:38 +08002217 page = (void *)get_zeroed_page(GFP_KERNEL);
2218 if (!page) {
Ping Gaod650ac02016-12-08 10:14:48 +08002219 gvt_err("fail to allocate scratch ggtt page\n");
2220 return -ENOMEM;
2221 }
2222
Chuanxiao Dong5de6bd42017-02-09 11:37:11 +08002223 daddr = dma_map_page(dev, virt_to_page(page), 0,
2224 4096, PCI_DMA_BIDIRECTIONAL);
2225 if (dma_mapping_error(dev, daddr)) {
2226 gvt_err("fail to dmamap scratch ggtt page\n");
2227 __free_page(virt_to_page(page));
2228 return -ENOMEM;
Ping Gaod650ac02016-12-08 10:14:48 +08002229 }
Zhi Wang22115ce2017-10-10 14:34:11 +08002230
2231 gvt->gtt.scratch_page = virt_to_page(page);
2232 gvt->gtt.scratch_mfn = (unsigned long)(daddr >> I915_GTT_PAGE_SHIFT);
Ping Gaod650ac02016-12-08 10:14:48 +08002233
Zhi Wang2707e442016-03-28 23:23:16 +08002234 if (enable_out_of_sync) {
2235 ret = setup_spt_oos(gvt);
2236 if (ret) {
2237 gvt_err("fail to initialize SPT oos\n");
Zhou, Wenjia0de98702017-07-04 15:47:00 +08002238 dma_unmap_page(dev, daddr, 4096, PCI_DMA_BIDIRECTIONAL);
Zhi Wang22115ce2017-10-10 14:34:11 +08002239 __free_page(gvt->gtt.scratch_page);
Zhi Wang2707e442016-03-28 23:23:16 +08002240 return ret;
2241 }
2242 }
Changbin Duede9d0c2018-01-30 19:19:40 +08002243 INIT_LIST_HEAD(&gvt->gtt.ppgtt_mm_lru_list_head);
Zhi Wang2707e442016-03-28 23:23:16 +08002244 return 0;
2245}
2246
2247/**
2248 * intel_gvt_clean_gtt - clean up mm components of a GVT device
2249 * @gvt: GVT device
2250 *
2251 * This function is called at the driver unloading stage, to clean up the
2252 * the mm components of a GVT device.
2253 *
2254 */
2255void intel_gvt_clean_gtt(struct intel_gvt *gvt)
2256{
Chuanxiao Dong5de6bd42017-02-09 11:37:11 +08002257 struct device *dev = &gvt->dev_priv->drm.pdev->dev;
Zhi Wang22115ce2017-10-10 14:34:11 +08002258 dma_addr_t daddr = (dma_addr_t)(gvt->gtt.scratch_mfn <<
Zhi Wang9556e112017-10-10 13:51:32 +08002259 I915_GTT_PAGE_SHIFT);
Chuanxiao Dong5de6bd42017-02-09 11:37:11 +08002260
2261 dma_unmap_page(dev, daddr, 4096, PCI_DMA_BIDIRECTIONAL);
2262
Zhi Wang22115ce2017-10-10 14:34:11 +08002263 __free_page(gvt->gtt.scratch_page);
Ping Gaod650ac02016-12-08 10:14:48 +08002264
Zhi Wang2707e442016-03-28 23:23:16 +08002265 if (enable_out_of_sync)
2266 clean_spt_oos(gvt);
2267}
Ping Gaod650ac02016-12-08 10:14:48 +08002268
2269/**
2270 * intel_vgpu_reset_ggtt - reset the GGTT entry
2271 * @vgpu: a vGPU
2272 *
2273 * This function is called at the vGPU create stage
2274 * to reset all the GGTT entries.
2275 *
2276 */
2277void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu)
2278{
2279 struct intel_gvt *gvt = vgpu->gvt;
Zhenyu Wang5ad59bf2017-04-12 16:24:57 +08002280 struct drm_i915_private *dev_priv = gvt->dev_priv;
Changbin Dub0c766b2018-01-30 19:19:43 +08002281 struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
2282 struct intel_gvt_gtt_entry entry = {.type = GTT_TYPE_GGTT_PTE};
Ping Gaod650ac02016-12-08 10:14:48 +08002283 u32 index;
Ping Gaod650ac02016-12-08 10:14:48 +08002284 u32 num_entries;
Ping Gaod650ac02016-12-08 10:14:48 +08002285
Changbin Dub0c766b2018-01-30 19:19:43 +08002286 pte_ops->set_pfn(&entry, gvt->gtt.scratch_mfn);
2287 pte_ops->set_present(&entry);
Ping Gaod650ac02016-12-08 10:14:48 +08002288
2289 index = vgpu_aperture_gmadr_base(vgpu) >> PAGE_SHIFT;
2290 num_entries = vgpu_aperture_sz(vgpu) >> PAGE_SHIFT;
Changbin Dub0c766b2018-01-30 19:19:43 +08002291 while (num_entries--)
2292 ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++);
Ping Gaod650ac02016-12-08 10:14:48 +08002293
2294 index = vgpu_hidden_gmadr_base(vgpu) >> PAGE_SHIFT;
2295 num_entries = vgpu_hidden_sz(vgpu) >> PAGE_SHIFT;
Changbin Dub0c766b2018-01-30 19:19:43 +08002296 while (num_entries--)
2297 ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++);
Zhenyu Wang5ad59bf2017-04-12 16:24:57 +08002298
Changbin Dua143cef2018-01-30 19:19:45 +08002299 ggtt_invalidate(dev_priv);
Ping Gaod650ac02016-12-08 10:14:48 +08002300}
Changbin Dub6115812017-01-13 11:15:57 +08002301
2302/**
2303 * intel_vgpu_reset_gtt - reset the all GTT related status
2304 * @vgpu: a vGPU
Changbin Dub6115812017-01-13 11:15:57 +08002305 *
2306 * This function is called from vfio core to reset reset all
2307 * GTT related status, including GGTT, PPGTT, scratch page.
2308 *
2309 */
Chuanxiao Dong4d3e67b2017-08-04 13:08:59 +08002310void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu)
Changbin Dub6115812017-01-13 11:15:57 +08002311{
Ping Gaoda9cc8d2017-02-21 15:52:56 +08002312 /* Shadow pages are only created when there is no page
2313 * table tracking data, so remove page tracking data after
2314 * removing the shadow pages.
2315 */
Changbin Duede9d0c2018-01-30 19:19:40 +08002316 intel_vgpu_destroy_all_ppgtt_mm(vgpu);
Changbin Dub6115812017-01-13 11:15:57 +08002317 intel_vgpu_reset_ggtt(vgpu);
Changbin Dub6115812017-01-13 11:15:57 +08002318}