1 | /* |
2 | * GTT virtualization |
3 | * |
4 | * Copyright(c) 2011-2016 Intel Corporation. All rights reserved. |
5 | * |
6 | * Permission is hereby granted, free of charge, to any person obtaining a |
7 | * copy of this software and associated documentation files (the "Software"), |
8 | * to deal in the Software without restriction, including without limitation |
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
10 | * and/or sell copies of the Software, and to permit persons to whom the |
11 | * Software is furnished to do so, subject to the following conditions: |
12 | * |
13 | * The above copyright notice and this permission notice (including the next |
14 | * paragraph) shall be included in all copies or substantial portions of the |
15 | * Software. |
16 | * |
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
20 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
23 | * SOFTWARE. |
24 | * |
25 | * Authors: |
26 | * Zhi Wang <zhi.a.wang@intel.com> |
27 | * Zhenyu Wang <zhenyuw@linux.intel.com> |
28 | * Xiao Zheng <xiao.zheng@intel.com> |
29 | * |
30 | * Contributors: |
31 | * Min He <min.he@intel.com> |
32 | * Bing Niu <bing.niu@intel.com> |
33 | * |
34 | */ |
35 | |
36 | #include "i915_drv.h" |
37 | #include "gvt.h" |
38 | #include "i915_pvinfo.h" |
39 | #include "trace.h" |
40 | |
41 | #include "gt/intel_gt_regs.h" |
42 | |
43 | #if defined(VERBOSE_DEBUG) |
44 | #define gvt_vdbg_mm(fmt, args...) gvt_dbg_mm(fmt, ##args) |
45 | #else |
46 | #define gvt_vdbg_mm(fmt, args...) |
47 | #endif |
48 | |
49 | static bool enable_out_of_sync = false; |
50 | static int preallocated_oos_pages = 8192; |
51 | |
52 | /* |
53 | * validate a gm address and related range size, |
54 | * translate it to host gm address |
55 | */ |
56 | bool intel_gvt_ggtt_validate_range(struct intel_vgpu *vgpu, u64 addr, u32 size) |
57 | { |
58 | if (size == 0) |
59 | return vgpu_gmadr_is_valid(vgpu, addr); |
60 | |
61 | if (vgpu_gmadr_is_aperture(vgpu, addr) && |
62 | vgpu_gmadr_is_aperture(vgpu, addr + size - 1)) |
63 | return true; |
64 | else if (vgpu_gmadr_is_hidden(vgpu, addr) && |
65 | vgpu_gmadr_is_hidden(vgpu, addr + size - 1)) |
66 | return true; |
67 | |
68 | gvt_dbg_mm("Invalid ggtt range at 0x%llx, size: 0x%x\n" , |
69 | addr, size); |
70 | return false; |
71 | } |
72 | |
73 | /* translate a guest gmadr to host gmadr */ |
74 | int intel_gvt_ggtt_gmadr_g2h(struct intel_vgpu *vgpu, u64 g_addr, u64 *h_addr) |
75 | { |
76 | struct drm_i915_private *i915 = vgpu->gvt->gt->i915; |
77 | |
78 | if (drm_WARN(&i915->drm, !vgpu_gmadr_is_valid(vgpu, g_addr), |
79 | "invalid guest gmadr %llx\n" , g_addr)) |
80 | return -EACCES; |
81 | |
82 | if (vgpu_gmadr_is_aperture(vgpu, g_addr)) |
83 | *h_addr = vgpu_aperture_gmadr_base(vgpu) |
84 | + (g_addr - vgpu_aperture_offset(vgpu)); |
85 | else |
86 | *h_addr = vgpu_hidden_gmadr_base(vgpu) |
87 | + (g_addr - vgpu_hidden_offset(vgpu)); |
88 | return 0; |
89 | } |
90 | |
91 | /* translate a host gmadr to guest gmadr */ |
92 | int intel_gvt_ggtt_gmadr_h2g(struct intel_vgpu *vgpu, u64 h_addr, u64 *g_addr) |
93 | { |
94 | struct drm_i915_private *i915 = vgpu->gvt->gt->i915; |
95 | |
96 | if (drm_WARN(&i915->drm, !gvt_gmadr_is_valid(vgpu->gvt, h_addr), |
97 | "invalid host gmadr %llx\n" , h_addr)) |
98 | return -EACCES; |
99 | |
100 | if (gvt_gmadr_is_aperture(vgpu->gvt, h_addr)) |
101 | *g_addr = vgpu_aperture_gmadr_base(vgpu) |
102 | + (h_addr - gvt_aperture_gmadr_base(vgpu->gvt)); |
103 | else |
104 | *g_addr = vgpu_hidden_gmadr_base(vgpu) |
105 | + (h_addr - gvt_hidden_gmadr_base(vgpu->gvt)); |
106 | return 0; |
107 | } |
108 | |
109 | int intel_gvt_ggtt_index_g2h(struct intel_vgpu *vgpu, unsigned long g_index, |
110 | unsigned long *h_index) |
111 | { |
112 | u64 h_addr; |
113 | int ret; |
114 | |
115 | ret = intel_gvt_ggtt_gmadr_g2h(vgpu, g_addr: g_index << I915_GTT_PAGE_SHIFT, |
116 | h_addr: &h_addr); |
117 | if (ret) |
118 | return ret; |
119 | |
120 | *h_index = h_addr >> I915_GTT_PAGE_SHIFT; |
121 | return 0; |
122 | } |
123 | |
124 | int intel_gvt_ggtt_h2g_index(struct intel_vgpu *vgpu, unsigned long h_index, |
125 | unsigned long *g_index) |
126 | { |
127 | u64 g_addr; |
128 | int ret; |
129 | |
130 | ret = intel_gvt_ggtt_gmadr_h2g(vgpu, h_addr: h_index << I915_GTT_PAGE_SHIFT, |
131 | g_addr: &g_addr); |
132 | if (ret) |
133 | return ret; |
134 | |
135 | *g_index = g_addr >> I915_GTT_PAGE_SHIFT; |
136 | return 0; |
137 | } |
138 | |
139 | #define gtt_type_is_entry(type) \ |
140 | (type > GTT_TYPE_INVALID && type < GTT_TYPE_PPGTT_ENTRY \ |
141 | && type != GTT_TYPE_PPGTT_PTE_ENTRY \ |
142 | && type != GTT_TYPE_PPGTT_ROOT_ENTRY) |
143 | |
144 | #define gtt_type_is_pt(type) \ |
145 | (type >= GTT_TYPE_PPGTT_PTE_PT && type < GTT_TYPE_MAX) |
146 | |
147 | #define gtt_type_is_pte_pt(type) \ |
148 | (type == GTT_TYPE_PPGTT_PTE_PT) |
149 | |
150 | #define gtt_type_is_root_pointer(type) \ |
151 | (gtt_type_is_entry(type) && type > GTT_TYPE_PPGTT_ROOT_ENTRY) |
152 | |
153 | #define gtt_init_entry(e, t, p, v) do { \ |
154 | (e)->type = t; \ |
155 | (e)->pdev = p; \ |
156 | memcpy(&(e)->val64, &v, sizeof(v)); \ |
157 | } while (0) |
158 | |
159 | /* |
160 | * Mappings between GTT_TYPE* enumerations. |
161 | * Following information can be found according to the given type: |
162 | * - type of next level page table |
163 | * - type of entry inside this level page table |
164 | * - type of entry with PSE set |
165 | * |
166 | * If the given type doesn't have such a kind of information, |
167 | * e.g. give a l4 root entry type, then request to get its PSE type, |
168 | * give a PTE page table type, then request to get its next level page |
169 | * table type, as we know l4 root entry doesn't have a PSE bit, |
170 | * and a PTE page table doesn't have a next level page table type, |
171 | * GTT_TYPE_INVALID will be returned. This is useful when traversing a |
172 | * page table. |
173 | */ |
174 | |
175 | struct gtt_type_table_entry { |
176 | int entry_type; |
177 | int pt_type; |
178 | int next_pt_type; |
179 | int pse_entry_type; |
180 | }; |
181 | |
182 | #define GTT_TYPE_TABLE_ENTRY(type, e_type, cpt_type, npt_type, pse_type) \ |
183 | [type] = { \ |
184 | .entry_type = e_type, \ |
185 | .pt_type = cpt_type, \ |
186 | .next_pt_type = npt_type, \ |
187 | .pse_entry_type = pse_type, \ |
188 | } |
189 | |
190 | static const struct gtt_type_table_entry gtt_type_table[] = { |
191 | GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L4_ENTRY, |
192 | GTT_TYPE_PPGTT_ROOT_L4_ENTRY, |
193 | GTT_TYPE_INVALID, |
194 | GTT_TYPE_PPGTT_PML4_PT, |
195 | GTT_TYPE_INVALID), |
196 | GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_PT, |
197 | GTT_TYPE_PPGTT_PML4_ENTRY, |
198 | GTT_TYPE_PPGTT_PML4_PT, |
199 | GTT_TYPE_PPGTT_PDP_PT, |
200 | GTT_TYPE_INVALID), |
201 | GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_ENTRY, |
202 | GTT_TYPE_PPGTT_PML4_ENTRY, |
203 | GTT_TYPE_PPGTT_PML4_PT, |
204 | GTT_TYPE_PPGTT_PDP_PT, |
205 | GTT_TYPE_INVALID), |
206 | GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_PT, |
207 | GTT_TYPE_PPGTT_PDP_ENTRY, |
208 | GTT_TYPE_PPGTT_PDP_PT, |
209 | GTT_TYPE_PPGTT_PDE_PT, |
210 | GTT_TYPE_PPGTT_PTE_1G_ENTRY), |
211 | GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L3_ENTRY, |
212 | GTT_TYPE_PPGTT_ROOT_L3_ENTRY, |
213 | GTT_TYPE_INVALID, |
214 | GTT_TYPE_PPGTT_PDE_PT, |
215 | GTT_TYPE_PPGTT_PTE_1G_ENTRY), |
216 | GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_ENTRY, |
217 | GTT_TYPE_PPGTT_PDP_ENTRY, |
218 | GTT_TYPE_PPGTT_PDP_PT, |
219 | GTT_TYPE_PPGTT_PDE_PT, |
220 | GTT_TYPE_PPGTT_PTE_1G_ENTRY), |
221 | GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_PT, |
222 | GTT_TYPE_PPGTT_PDE_ENTRY, |
223 | GTT_TYPE_PPGTT_PDE_PT, |
224 | GTT_TYPE_PPGTT_PTE_PT, |
225 | GTT_TYPE_PPGTT_PTE_2M_ENTRY), |
226 | GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_ENTRY, |
227 | GTT_TYPE_PPGTT_PDE_ENTRY, |
228 | GTT_TYPE_PPGTT_PDE_PT, |
229 | GTT_TYPE_PPGTT_PTE_PT, |
230 | GTT_TYPE_PPGTT_PTE_2M_ENTRY), |
231 | /* We take IPS bit as 'PSE' for PTE level. */ |
232 | GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_PT, |
233 | GTT_TYPE_PPGTT_PTE_4K_ENTRY, |
234 | GTT_TYPE_PPGTT_PTE_PT, |
235 | GTT_TYPE_INVALID, |
236 | GTT_TYPE_PPGTT_PTE_64K_ENTRY), |
237 | GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_4K_ENTRY, |
238 | GTT_TYPE_PPGTT_PTE_4K_ENTRY, |
239 | GTT_TYPE_PPGTT_PTE_PT, |
240 | GTT_TYPE_INVALID, |
241 | GTT_TYPE_PPGTT_PTE_64K_ENTRY), |
242 | GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_64K_ENTRY, |
243 | GTT_TYPE_PPGTT_PTE_4K_ENTRY, |
244 | GTT_TYPE_PPGTT_PTE_PT, |
245 | GTT_TYPE_INVALID, |
246 | GTT_TYPE_PPGTT_PTE_64K_ENTRY), |
247 | GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_2M_ENTRY, |
248 | GTT_TYPE_PPGTT_PDE_ENTRY, |
249 | GTT_TYPE_PPGTT_PDE_PT, |
250 | GTT_TYPE_INVALID, |
251 | GTT_TYPE_PPGTT_PTE_2M_ENTRY), |
252 | GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_1G_ENTRY, |
253 | GTT_TYPE_PPGTT_PDP_ENTRY, |
254 | GTT_TYPE_PPGTT_PDP_PT, |
255 | GTT_TYPE_INVALID, |
256 | GTT_TYPE_PPGTT_PTE_1G_ENTRY), |
257 | GTT_TYPE_TABLE_ENTRY(GTT_TYPE_GGTT_PTE, |
258 | GTT_TYPE_GGTT_PTE, |
259 | GTT_TYPE_INVALID, |
260 | GTT_TYPE_INVALID, |
261 | GTT_TYPE_INVALID), |
262 | }; |
263 | |
264 | static inline int get_next_pt_type(int type) |
265 | { |
266 | return gtt_type_table[type].next_pt_type; |
267 | } |
268 | |
269 | static inline int get_entry_type(int type) |
270 | { |
271 | return gtt_type_table[type].entry_type; |
272 | } |
273 | |
274 | static inline int get_pse_type(int type) |
275 | { |
276 | return gtt_type_table[type].pse_entry_type; |
277 | } |
278 | |
279 | static u64 read_pte64(struct i915_ggtt *ggtt, unsigned long index) |
280 | { |
281 | void __iomem *addr = (gen8_pte_t __iomem *)ggtt->gsm + index; |
282 | |
283 | return readq(addr); |
284 | } |
285 | |
286 | static void ggtt_invalidate(struct intel_gt *gt) |
287 | { |
288 | mmio_hw_access_pre(gt); |
289 | intel_uncore_write(uncore: gt->uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); |
290 | mmio_hw_access_post(gt); |
291 | } |
292 | |
293 | static void write_pte64(struct i915_ggtt *ggtt, unsigned long index, u64 pte) |
294 | { |
295 | void __iomem *addr = (gen8_pte_t __iomem *)ggtt->gsm + index; |
296 | |
297 | writeq(val: pte, addr); |
298 | } |
299 | |
300 | static inline int gtt_get_entry64(void *pt, |
301 | struct intel_gvt_gtt_entry *e, |
302 | unsigned long index, bool hypervisor_access, unsigned long gpa, |
303 | struct intel_vgpu *vgpu) |
304 | { |
305 | const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; |
306 | int ret; |
307 | |
308 | if (WARN_ON(info->gtt_entry_size != 8)) |
309 | return -EINVAL; |
310 | |
311 | if (hypervisor_access) { |
312 | ret = intel_gvt_read_gpa(vgpu, gpa: gpa + |
313 | (index << info->gtt_entry_size_shift), |
314 | buf: &e->val64, len: 8); |
315 | if (WARN_ON(ret)) |
316 | return ret; |
317 | } else if (!pt) { |
318 | e->val64 = read_pte64(ggtt: vgpu->gvt->gt->ggtt, index); |
319 | } else { |
320 | e->val64 = *((u64 *)pt + index); |
321 | } |
322 | return 0; |
323 | } |
324 | |
325 | static inline int gtt_set_entry64(void *pt, |
326 | struct intel_gvt_gtt_entry *e, |
327 | unsigned long index, bool hypervisor_access, unsigned long gpa, |
328 | struct intel_vgpu *vgpu) |
329 | { |
330 | const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; |
331 | int ret; |
332 | |
333 | if (WARN_ON(info->gtt_entry_size != 8)) |
334 | return -EINVAL; |
335 | |
336 | if (hypervisor_access) { |
337 | ret = intel_gvt_write_gpa(vgpu, gpa: gpa + |
338 | (index << info->gtt_entry_size_shift), |
339 | buf: &e->val64, len: 8); |
340 | if (WARN_ON(ret)) |
341 | return ret; |
342 | } else if (!pt) { |
343 | write_pte64(ggtt: vgpu->gvt->gt->ggtt, index, pte: e->val64); |
344 | } else { |
345 | *((u64 *)pt + index) = e->val64; |
346 | } |
347 | return 0; |
348 | } |
349 | |
350 | #define GTT_HAW 46 |
351 | |
352 | #define ADDR_1G_MASK GENMASK_ULL(GTT_HAW - 1, 30) |
353 | #define ADDR_2M_MASK GENMASK_ULL(GTT_HAW - 1, 21) |
354 | #define ADDR_64K_MASK GENMASK_ULL(GTT_HAW - 1, 16) |
355 | #define ADDR_4K_MASK GENMASK_ULL(GTT_HAW - 1, 12) |
356 | |
357 | #define GTT_SPTE_FLAG_MASK GENMASK_ULL(62, 52) |
358 | #define GTT_SPTE_FLAG_64K_SPLITED BIT(52) /* splited 64K gtt entry */ |
359 | |
360 | #define GTT_64K_PTE_STRIDE 16 |
361 | |
362 | static unsigned long gen8_gtt_get_pfn(struct intel_gvt_gtt_entry *e) |
363 | { |
364 | unsigned long pfn; |
365 | |
366 | if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) |
367 | pfn = (e->val64 & ADDR_1G_MASK) >> PAGE_SHIFT; |
368 | else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY) |
369 | pfn = (e->val64 & ADDR_2M_MASK) >> PAGE_SHIFT; |
370 | else if (e->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY) |
371 | pfn = (e->val64 & ADDR_64K_MASK) >> PAGE_SHIFT; |
372 | else |
373 | pfn = (e->val64 & ADDR_4K_MASK) >> PAGE_SHIFT; |
374 | return pfn; |
375 | } |
376 | |
377 | static void gen8_gtt_set_pfn(struct intel_gvt_gtt_entry *e, unsigned long pfn) |
378 | { |
379 | if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) { |
380 | e->val64 &= ~ADDR_1G_MASK; |
381 | pfn &= (ADDR_1G_MASK >> PAGE_SHIFT); |
382 | } else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY) { |
383 | e->val64 &= ~ADDR_2M_MASK; |
384 | pfn &= (ADDR_2M_MASK >> PAGE_SHIFT); |
385 | } else if (e->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY) { |
386 | e->val64 &= ~ADDR_64K_MASK; |
387 | pfn &= (ADDR_64K_MASK >> PAGE_SHIFT); |
388 | } else { |
389 | e->val64 &= ~ADDR_4K_MASK; |
390 | pfn &= (ADDR_4K_MASK >> PAGE_SHIFT); |
391 | } |
392 | |
393 | e->val64 |= (pfn << PAGE_SHIFT); |
394 | } |
395 | |
396 | static bool gen8_gtt_test_pse(struct intel_gvt_gtt_entry *e) |
397 | { |
398 | return !!(e->val64 & _PAGE_PSE); |
399 | } |
400 | |
401 | static void gen8_gtt_clear_pse(struct intel_gvt_gtt_entry *e) |
402 | { |
403 | if (gen8_gtt_test_pse(e)) { |
404 | switch (e->type) { |
405 | case GTT_TYPE_PPGTT_PTE_2M_ENTRY: |
406 | e->val64 &= ~_PAGE_PSE; |
407 | e->type = GTT_TYPE_PPGTT_PDE_ENTRY; |
408 | break; |
409 | case GTT_TYPE_PPGTT_PTE_1G_ENTRY: |
410 | e->type = GTT_TYPE_PPGTT_PDP_ENTRY; |
411 | e->val64 &= ~_PAGE_PSE; |
412 | break; |
413 | default: |
414 | WARN_ON(1); |
415 | } |
416 | } |
417 | } |
418 | |
419 | static bool gen8_gtt_test_ips(struct intel_gvt_gtt_entry *e) |
420 | { |
421 | if (GEM_WARN_ON(e->type != GTT_TYPE_PPGTT_PDE_ENTRY)) |
422 | return false; |
423 | |
424 | return !!(e->val64 & GEN8_PDE_IPS_64K); |
425 | } |
426 | |
427 | static void gen8_gtt_clear_ips(struct intel_gvt_gtt_entry *e) |
428 | { |
429 | if (GEM_WARN_ON(e->type != GTT_TYPE_PPGTT_PDE_ENTRY)) |
430 | return; |
431 | |
432 | e->val64 &= ~GEN8_PDE_IPS_64K; |
433 | } |
434 | |
435 | static bool gen8_gtt_test_present(struct intel_gvt_gtt_entry *e) |
436 | { |
437 | /* |
438 | * i915 writes PDP root pointer registers without present bit, |
439 | * it also works, so we need to treat root pointer entry |
440 | * specifically. |
441 | */ |
442 | if (e->type == GTT_TYPE_PPGTT_ROOT_L3_ENTRY |
443 | || e->type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) |
444 | return (e->val64 != 0); |
445 | else |
446 | return (e->val64 & GEN8_PAGE_PRESENT); |
447 | } |
448 | |
449 | static void gtt_entry_clear_present(struct intel_gvt_gtt_entry *e) |
450 | { |
451 | e->val64 &= ~GEN8_PAGE_PRESENT; |
452 | } |
453 | |
454 | static void gtt_entry_set_present(struct intel_gvt_gtt_entry *e) |
455 | { |
456 | e->val64 |= GEN8_PAGE_PRESENT; |
457 | } |
458 | |
459 | static bool gen8_gtt_test_64k_splited(struct intel_gvt_gtt_entry *e) |
460 | { |
461 | return !!(e->val64 & GTT_SPTE_FLAG_64K_SPLITED); |
462 | } |
463 | |
464 | static void gen8_gtt_set_64k_splited(struct intel_gvt_gtt_entry *e) |
465 | { |
466 | e->val64 |= GTT_SPTE_FLAG_64K_SPLITED; |
467 | } |
468 | |
469 | static void gen8_gtt_clear_64k_splited(struct intel_gvt_gtt_entry *e) |
470 | { |
471 | e->val64 &= ~GTT_SPTE_FLAG_64K_SPLITED; |
472 | } |
473 | |
474 | /* |
475 | * Per-platform GMA routines. |
476 | */ |
477 | static unsigned long gma_to_ggtt_pte_index(unsigned long gma) |
478 | { |
479 | unsigned long x = (gma >> I915_GTT_PAGE_SHIFT); |
480 | |
481 | trace_gma_index(prefix: __func__, gma, index: x); |
482 | return x; |
483 | } |
484 | |
485 | #define DEFINE_PPGTT_GMA_TO_INDEX(prefix, ename, exp) \ |
486 | static unsigned long prefix##_gma_to_##ename##_index(unsigned long gma) \ |
487 | { \ |
488 | unsigned long x = (exp); \ |
489 | trace_gma_index(__func__, gma, x); \ |
490 | return x; \ |
491 | } |
492 | |
493 | DEFINE_PPGTT_GMA_TO_INDEX(gen8, pte, (gma >> 12 & 0x1ff)); |
494 | DEFINE_PPGTT_GMA_TO_INDEX(gen8, pde, (gma >> 21 & 0x1ff)); |
495 | DEFINE_PPGTT_GMA_TO_INDEX(gen8, l3_pdp, (gma >> 30 & 0x3)); |
496 | DEFINE_PPGTT_GMA_TO_INDEX(gen8, l4_pdp, (gma >> 30 & 0x1ff)); |
497 | DEFINE_PPGTT_GMA_TO_INDEX(gen8, pml4, (gma >> 39 & 0x1ff)); |
498 | |
499 | static const struct intel_gvt_gtt_pte_ops gen8_gtt_pte_ops = { |
500 | .get_entry = gtt_get_entry64, |
501 | .set_entry = gtt_set_entry64, |
502 | .clear_present = gtt_entry_clear_present, |
503 | .set_present = gtt_entry_set_present, |
504 | .test_present = gen8_gtt_test_present, |
505 | .test_pse = gen8_gtt_test_pse, |
506 | .clear_pse = gen8_gtt_clear_pse, |
507 | .clear_ips = gen8_gtt_clear_ips, |
508 | .test_ips = gen8_gtt_test_ips, |
509 | .clear_64k_splited = gen8_gtt_clear_64k_splited, |
510 | .set_64k_splited = gen8_gtt_set_64k_splited, |
511 | .test_64k_splited = gen8_gtt_test_64k_splited, |
512 | .get_pfn = gen8_gtt_get_pfn, |
513 | .set_pfn = gen8_gtt_set_pfn, |
514 | }; |
515 | |
516 | static const struct intel_gvt_gtt_gma_ops gen8_gtt_gma_ops = { |
517 | .gma_to_ggtt_pte_index = gma_to_ggtt_pte_index, |
518 | .gma_to_pte_index = gen8_gma_to_pte_index, |
519 | .gma_to_pde_index = gen8_gma_to_pde_index, |
520 | .gma_to_l3_pdp_index = gen8_gma_to_l3_pdp_index, |
521 | .gma_to_l4_pdp_index = gen8_gma_to_l4_pdp_index, |
522 | .gma_to_pml4_index = gen8_gma_to_pml4_index, |
523 | }; |
524 | |
525 | /* Update entry type per pse and ips bit. */ |
526 | static void update_entry_type_for_real(const struct intel_gvt_gtt_pte_ops *pte_ops, |
527 | struct intel_gvt_gtt_entry *entry, bool ips) |
528 | { |
529 | switch (entry->type) { |
530 | case GTT_TYPE_PPGTT_PDE_ENTRY: |
531 | case GTT_TYPE_PPGTT_PDP_ENTRY: |
532 | if (pte_ops->test_pse(entry)) |
533 | entry->type = get_pse_type(type: entry->type); |
534 | break; |
535 | case GTT_TYPE_PPGTT_PTE_4K_ENTRY: |
536 | if (ips) |
537 | entry->type = get_pse_type(type: entry->type); |
538 | break; |
539 | default: |
540 | GEM_BUG_ON(!gtt_type_is_entry(entry->type)); |
541 | } |
542 | |
543 | GEM_BUG_ON(entry->type == GTT_TYPE_INVALID); |
544 | } |
545 | |
546 | /* |
547 | * MM helpers. |
548 | */ |
549 | static void _ppgtt_get_root_entry(struct intel_vgpu_mm *mm, |
550 | struct intel_gvt_gtt_entry *entry, unsigned long index, |
551 | bool guest) |
552 | { |
553 | const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; |
554 | |
555 | GEM_BUG_ON(mm->type != INTEL_GVT_MM_PPGTT); |
556 | |
557 | entry->type = mm->ppgtt_mm.root_entry_type; |
558 | pte_ops->get_entry(guest ? mm->ppgtt_mm.guest_pdps : |
559 | mm->ppgtt_mm.shadow_pdps, |
560 | entry, index, false, 0, mm->vgpu); |
561 | update_entry_type_for_real(pte_ops, entry, ips: false); |
562 | } |
563 | |
564 | static inline void ppgtt_get_guest_root_entry(struct intel_vgpu_mm *mm, |
565 | struct intel_gvt_gtt_entry *entry, unsigned long index) |
566 | { |
567 | _ppgtt_get_root_entry(mm, entry, index, guest: true); |
568 | } |
569 | |
570 | static inline void ppgtt_get_shadow_root_entry(struct intel_vgpu_mm *mm, |
571 | struct intel_gvt_gtt_entry *entry, unsigned long index) |
572 | { |
573 | _ppgtt_get_root_entry(mm, entry, index, guest: false); |
574 | } |
575 | |
576 | static void _ppgtt_set_root_entry(struct intel_vgpu_mm *mm, |
577 | struct intel_gvt_gtt_entry *entry, unsigned long index, |
578 | bool guest) |
579 | { |
580 | const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; |
581 | |
582 | pte_ops->set_entry(guest ? mm->ppgtt_mm.guest_pdps : |
583 | mm->ppgtt_mm.shadow_pdps, |
584 | entry, index, false, 0, mm->vgpu); |
585 | } |
586 | |
587 | static inline void ppgtt_set_shadow_root_entry(struct intel_vgpu_mm *mm, |
588 | struct intel_gvt_gtt_entry *entry, unsigned long index) |
589 | { |
590 | _ppgtt_set_root_entry(mm, entry, index, guest: false); |
591 | } |
592 | |
593 | static void ggtt_get_guest_entry(struct intel_vgpu_mm *mm, |
594 | struct intel_gvt_gtt_entry *entry, unsigned long index) |
595 | { |
596 | const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; |
597 | |
598 | GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT); |
599 | |
600 | entry->type = GTT_TYPE_GGTT_PTE; |
601 | pte_ops->get_entry(mm->ggtt_mm.virtual_ggtt, entry, index, |
602 | false, 0, mm->vgpu); |
603 | } |
604 | |
605 | static void ggtt_set_guest_entry(struct intel_vgpu_mm *mm, |
606 | struct intel_gvt_gtt_entry *entry, unsigned long index) |
607 | { |
608 | const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; |
609 | |
610 | GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT); |
611 | |
612 | pte_ops->set_entry(mm->ggtt_mm.virtual_ggtt, entry, index, |
613 | false, 0, mm->vgpu); |
614 | } |
615 | |
616 | static void ggtt_get_host_entry(struct intel_vgpu_mm *mm, |
617 | struct intel_gvt_gtt_entry *entry, unsigned long index) |
618 | { |
619 | const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; |
620 | |
621 | GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT); |
622 | |
623 | pte_ops->get_entry(NULL, entry, index, false, 0, mm->vgpu); |
624 | } |
625 | |
626 | static void ggtt_set_host_entry(struct intel_vgpu_mm *mm, |
627 | struct intel_gvt_gtt_entry *entry, unsigned long index) |
628 | { |
629 | const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; |
630 | unsigned long offset = index; |
631 | |
632 | GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT); |
633 | |
634 | if (vgpu_gmadr_is_aperture(mm->vgpu, index << I915_GTT_PAGE_SHIFT)) { |
635 | offset -= (vgpu_aperture_gmadr_base(mm->vgpu) >> PAGE_SHIFT); |
636 | mm->ggtt_mm.host_ggtt_aperture[offset] = entry->val64; |
637 | } else if (vgpu_gmadr_is_hidden(mm->vgpu, index << I915_GTT_PAGE_SHIFT)) { |
638 | offset -= (vgpu_hidden_gmadr_base(mm->vgpu) >> PAGE_SHIFT); |
639 | mm->ggtt_mm.host_ggtt_hidden[offset] = entry->val64; |
640 | } |
641 | |
642 | pte_ops->set_entry(NULL, entry, index, false, 0, mm->vgpu); |
643 | } |
644 | |
645 | /* |
646 | * PPGTT shadow page table helpers. |
647 | */ |
648 | static inline int ppgtt_spt_get_entry( |
649 | struct intel_vgpu_ppgtt_spt *spt, |
650 | void *page_table, int type, |
651 | struct intel_gvt_gtt_entry *e, unsigned long index, |
652 | bool guest) |
653 | { |
654 | struct intel_gvt *gvt = spt->vgpu->gvt; |
655 | const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; |
656 | int ret; |
657 | |
658 | e->type = get_entry_type(type); |
659 | |
660 | if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n" )) |
661 | return -EINVAL; |
662 | |
663 | ret = ops->get_entry(page_table, e, index, guest, |
664 | spt->guest_page.gfn << I915_GTT_PAGE_SHIFT, |
665 | spt->vgpu); |
666 | if (ret) |
667 | return ret; |
668 | |
669 | update_entry_type_for_real(pte_ops: ops, entry: e, ips: guest ? |
670 | spt->guest_page.pde_ips : false); |
671 | |
672 | gvt_vdbg_mm("read ppgtt entry, spt type %d, entry type %d, index %lu, value %llx\n" , |
673 | type, e->type, index, e->val64); |
674 | return 0; |
675 | } |
676 | |
677 | static inline int ppgtt_spt_set_entry( |
678 | struct intel_vgpu_ppgtt_spt *spt, |
679 | void *page_table, int type, |
680 | struct intel_gvt_gtt_entry *e, unsigned long index, |
681 | bool guest) |
682 | { |
683 | struct intel_gvt *gvt = spt->vgpu->gvt; |
684 | const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; |
685 | |
686 | if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n" )) |
687 | return -EINVAL; |
688 | |
689 | gvt_vdbg_mm("set ppgtt entry, spt type %d, entry type %d, index %lu, value %llx\n" , |
690 | type, e->type, index, e->val64); |
691 | |
692 | return ops->set_entry(page_table, e, index, guest, |
693 | spt->guest_page.gfn << I915_GTT_PAGE_SHIFT, |
694 | spt->vgpu); |
695 | } |
696 | |
697 | #define ppgtt_get_guest_entry(spt, e, index) \ |
698 | ppgtt_spt_get_entry(spt, NULL, \ |
699 | spt->guest_page.type, e, index, true) |
700 | |
701 | #define ppgtt_set_guest_entry(spt, e, index) \ |
702 | ppgtt_spt_set_entry(spt, NULL, \ |
703 | spt->guest_page.type, e, index, true) |
704 | |
705 | #define ppgtt_get_shadow_entry(spt, e, index) \ |
706 | ppgtt_spt_get_entry(spt, spt->shadow_page.vaddr, \ |
707 | spt->shadow_page.type, e, index, false) |
708 | |
709 | #define ppgtt_set_shadow_entry(spt, e, index) \ |
710 | ppgtt_spt_set_entry(spt, spt->shadow_page.vaddr, \ |
711 | spt->shadow_page.type, e, index, false) |
712 | |
713 | static void *alloc_spt(gfp_t gfp_mask) |
714 | { |
715 | struct intel_vgpu_ppgtt_spt *spt; |
716 | |
717 | spt = kzalloc(size: sizeof(*spt), flags: gfp_mask); |
718 | if (!spt) |
719 | return NULL; |
720 | |
721 | spt->shadow_page.page = alloc_page(gfp_mask); |
722 | if (!spt->shadow_page.page) { |
723 | kfree(objp: spt); |
724 | return NULL; |
725 | } |
726 | return spt; |
727 | } |
728 | |
729 | static void free_spt(struct intel_vgpu_ppgtt_spt *spt) |
730 | { |
731 | __free_page(spt->shadow_page.page); |
732 | kfree(objp: spt); |
733 | } |
734 | |
735 | static int detach_oos_page(struct intel_vgpu *vgpu, |
736 | struct intel_vgpu_oos_page *oos_page); |
737 | |
738 | static void ppgtt_free_spt(struct intel_vgpu_ppgtt_spt *spt) |
739 | { |
740 | struct device *kdev = spt->vgpu->gvt->gt->i915->drm.dev; |
741 | |
742 | trace_spt_free(id: spt->vgpu->id, spt, type: spt->guest_page.type); |
743 | |
744 | dma_unmap_page(kdev, spt->shadow_page.mfn << I915_GTT_PAGE_SHIFT, 4096, |
745 | DMA_BIDIRECTIONAL); |
746 | |
747 | radix_tree_delete(&spt->vgpu->gtt.spt_tree, spt->shadow_page.mfn); |
748 | |
749 | if (spt->guest_page.gfn) { |
750 | if (spt->guest_page.oos_page) |
751 | detach_oos_page(vgpu: spt->vgpu, oos_page: spt->guest_page.oos_page); |
752 | |
753 | intel_vgpu_unregister_page_track(vgpu: spt->vgpu, gfn: spt->guest_page.gfn); |
754 | } |
755 | |
756 | list_del_init(entry: &spt->post_shadow_list); |
757 | free_spt(spt); |
758 | } |
759 | |
760 | static void ppgtt_free_all_spt(struct intel_vgpu *vgpu) |
761 | { |
762 | struct intel_vgpu_ppgtt_spt *spt, *spn; |
763 | struct radix_tree_iter iter; |
764 | LIST_HEAD(all_spt); |
765 | void __rcu **slot; |
766 | |
767 | rcu_read_lock(); |
768 | radix_tree_for_each_slot(slot, &vgpu->gtt.spt_tree, &iter, 0) { |
769 | spt = radix_tree_deref_slot(slot); |
770 | list_move(list: &spt->post_shadow_list, head: &all_spt); |
771 | } |
772 | rcu_read_unlock(); |
773 | |
774 | list_for_each_entry_safe(spt, spn, &all_spt, post_shadow_list) |
775 | ppgtt_free_spt(spt); |
776 | } |
777 | |
778 | static int ppgtt_handle_guest_write_page_table_bytes( |
779 | struct intel_vgpu_ppgtt_spt *spt, |
780 | u64 pa, void *p_data, int bytes); |
781 | |
782 | static int ppgtt_write_protection_handler( |
783 | struct intel_vgpu_page_track *page_track, |
784 | u64 gpa, void *data, int bytes) |
785 | { |
786 | struct intel_vgpu_ppgtt_spt *spt = page_track->priv_data; |
787 | |
788 | int ret; |
789 | |
790 | if (bytes != 4 && bytes != 8) |
791 | return -EINVAL; |
792 | |
793 | ret = ppgtt_handle_guest_write_page_table_bytes(spt, pa: gpa, p_data: data, bytes); |
794 | if (ret) |
795 | return ret; |
796 | return ret; |
797 | } |
798 | |
799 | /* Find a spt by guest gfn. */ |
800 | static struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_gfn( |
801 | struct intel_vgpu *vgpu, unsigned long gfn) |
802 | { |
803 | struct intel_vgpu_page_track *track; |
804 | |
805 | track = intel_vgpu_find_page_track(vgpu, gfn); |
806 | if (track && track->handler == ppgtt_write_protection_handler) |
807 | return track->priv_data; |
808 | |
809 | return NULL; |
810 | } |
811 | |
812 | /* Find the spt by shadow page mfn. */ |
813 | static inline struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_mfn( |
814 | struct intel_vgpu *vgpu, unsigned long mfn) |
815 | { |
816 | return radix_tree_lookup(&vgpu->gtt.spt_tree, mfn); |
817 | } |
818 | |
819 | static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt); |
820 | |
821 | /* Allocate shadow page table without guest page. */ |
822 | static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt( |
823 | struct intel_vgpu *vgpu, enum intel_gvt_gtt_type type) |
824 | { |
825 | struct device *kdev = vgpu->gvt->gt->i915->drm.dev; |
826 | struct intel_vgpu_ppgtt_spt *spt = NULL; |
827 | dma_addr_t daddr; |
828 | int ret; |
829 | |
830 | retry: |
831 | spt = alloc_spt(GFP_KERNEL | __GFP_ZERO); |
832 | if (!spt) { |
833 | if (reclaim_one_ppgtt_mm(gvt: vgpu->gvt)) |
834 | goto retry; |
835 | |
836 | gvt_vgpu_err("fail to allocate ppgtt shadow page\n" ); |
837 | return ERR_PTR(error: -ENOMEM); |
838 | } |
839 | |
840 | spt->vgpu = vgpu; |
841 | atomic_set(v: &spt->refcount, i: 1); |
842 | INIT_LIST_HEAD(list: &spt->post_shadow_list); |
843 | |
844 | /* |
845 | * Init shadow_page. |
846 | */ |
847 | spt->shadow_page.type = type; |
848 | daddr = dma_map_page(kdev, spt->shadow_page.page, |
849 | 0, 4096, DMA_BIDIRECTIONAL); |
850 | if (dma_mapping_error(dev: kdev, dma_addr: daddr)) { |
851 | gvt_vgpu_err("fail to map dma addr\n" ); |
852 | ret = -EINVAL; |
853 | goto err_free_spt; |
854 | } |
855 | spt->shadow_page.vaddr = page_address(spt->shadow_page.page); |
856 | spt->shadow_page.mfn = daddr >> I915_GTT_PAGE_SHIFT; |
857 | |
858 | ret = radix_tree_insert(&vgpu->gtt.spt_tree, index: spt->shadow_page.mfn, spt); |
859 | if (ret) |
860 | goto err_unmap_dma; |
861 | |
862 | return spt; |
863 | |
864 | err_unmap_dma: |
865 | dma_unmap_page(kdev, daddr, PAGE_SIZE, DMA_BIDIRECTIONAL); |
866 | err_free_spt: |
867 | free_spt(spt); |
868 | return ERR_PTR(error: ret); |
869 | } |
870 | |
871 | /* Allocate shadow page table associated with specific gfn. */ |
872 | static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt_gfn( |
873 | struct intel_vgpu *vgpu, enum intel_gvt_gtt_type type, |
874 | unsigned long gfn, bool guest_pde_ips) |
875 | { |
876 | struct intel_vgpu_ppgtt_spt *spt; |
877 | int ret; |
878 | |
879 | spt = ppgtt_alloc_spt(vgpu, type); |
880 | if (IS_ERR(ptr: spt)) |
881 | return spt; |
882 | |
883 | /* |
884 | * Init guest_page. |
885 | */ |
886 | ret = intel_vgpu_register_page_track(vgpu, gfn, |
887 | handler: ppgtt_write_protection_handler, priv: spt); |
888 | if (ret) { |
889 | ppgtt_free_spt(spt); |
890 | return ERR_PTR(error: ret); |
891 | } |
892 | |
893 | spt->guest_page.type = type; |
894 | spt->guest_page.gfn = gfn; |
895 | spt->guest_page.pde_ips = guest_pde_ips; |
896 | |
897 | trace_spt_alloc(id: vgpu->id, spt, type, mfn: spt->shadow_page.mfn, gpt_gfn: gfn); |
898 | |
899 | return spt; |
900 | } |
901 | |
902 | #define pt_entry_size_shift(spt) \ |
903 | ((spt)->vgpu->gvt->device_info.gtt_entry_size_shift) |
904 | |
905 | #define pt_entries(spt) \ |
906 | (I915_GTT_PAGE_SIZE >> pt_entry_size_shift(spt)) |
907 | |
908 | #define for_each_present_guest_entry(spt, e, i) \ |
909 | for (i = 0; i < pt_entries(spt); \ |
910 | i += spt->guest_page.pde_ips ? GTT_64K_PTE_STRIDE : 1) \ |
911 | if (!ppgtt_get_guest_entry(spt, e, i) && \ |
912 | spt->vgpu->gvt->gtt.pte_ops->test_present(e)) |
913 | |
914 | #define for_each_present_shadow_entry(spt, e, i) \ |
915 | for (i = 0; i < pt_entries(spt); \ |
916 | i += spt->shadow_page.pde_ips ? GTT_64K_PTE_STRIDE : 1) \ |
917 | if (!ppgtt_get_shadow_entry(spt, e, i) && \ |
918 | spt->vgpu->gvt->gtt.pte_ops->test_present(e)) |
919 | |
920 | #define for_each_shadow_entry(spt, e, i) \ |
921 | for (i = 0; i < pt_entries(spt); \ |
922 | i += (spt->shadow_page.pde_ips ? GTT_64K_PTE_STRIDE : 1)) \ |
923 | if (!ppgtt_get_shadow_entry(spt, e, i)) |
924 | |
925 | static inline void ppgtt_get_spt(struct intel_vgpu_ppgtt_spt *spt) |
926 | { |
927 | int v = atomic_read(v: &spt->refcount); |
928 | |
929 | trace_spt_refcount(id: spt->vgpu->id, action: "inc" , spt, before: v, after: (v + 1)); |
930 | atomic_inc(v: &spt->refcount); |
931 | } |
932 | |
933 | static inline int ppgtt_put_spt(struct intel_vgpu_ppgtt_spt *spt) |
934 | { |
935 | int v = atomic_read(v: &spt->refcount); |
936 | |
937 | trace_spt_refcount(id: spt->vgpu->id, action: "dec" , spt, before: v, after: (v - 1)); |
938 | return atomic_dec_return(v: &spt->refcount); |
939 | } |
940 | |
941 | static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt); |
942 | |
943 | static int ppgtt_invalidate_spt_by_shadow_entry(struct intel_vgpu *vgpu, |
944 | struct intel_gvt_gtt_entry *e) |
945 | { |
946 | struct drm_i915_private *i915 = vgpu->gvt->gt->i915; |
947 | const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; |
948 | struct intel_vgpu_ppgtt_spt *s; |
949 | enum intel_gvt_gtt_type cur_pt_type; |
950 | |
951 | GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(e->type))); |
952 | |
953 | if (e->type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY |
954 | && e->type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY) { |
955 | cur_pt_type = get_next_pt_type(type: e->type); |
956 | |
957 | if (!gtt_type_is_pt(cur_pt_type) || |
958 | !gtt_type_is_pt(cur_pt_type + 1)) { |
959 | drm_WARN(&i915->drm, 1, |
960 | "Invalid page table type, cur_pt_type is: %d\n" , |
961 | cur_pt_type); |
962 | return -EINVAL; |
963 | } |
964 | |
965 | cur_pt_type += 1; |
966 | |
967 | if (ops->get_pfn(e) == |
968 | vgpu->gtt.scratch_pt[cur_pt_type].page_mfn) |
969 | return 0; |
970 | } |
971 | s = intel_vgpu_find_spt_by_mfn(vgpu, mfn: ops->get_pfn(e)); |
972 | if (!s) { |
973 | gvt_vgpu_err("fail to find shadow page: mfn: 0x%lx\n" , |
974 | ops->get_pfn(e)); |
975 | return -ENXIO; |
976 | } |
977 | return ppgtt_invalidate_spt(spt: s); |
978 | } |
979 | |
980 | static inline void ppgtt_invalidate_pte(struct intel_vgpu_ppgtt_spt *spt, |
981 | struct intel_gvt_gtt_entry *entry) |
982 | { |
983 | struct intel_vgpu *vgpu = spt->vgpu; |
984 | const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; |
985 | unsigned long pfn; |
986 | int type; |
987 | |
988 | pfn = ops->get_pfn(entry); |
989 | type = spt->shadow_page.type; |
990 | |
991 | /* Uninitialized spte or unshadowed spte. */ |
992 | if (!pfn || pfn == vgpu->gtt.scratch_pt[type].page_mfn) |
993 | return; |
994 | |
995 | intel_gvt_dma_unmap_guest_page(vgpu, dma_addr: pfn << PAGE_SHIFT); |
996 | } |
997 | |
998 | static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt) |
999 | { |
1000 | struct intel_vgpu *vgpu = spt->vgpu; |
1001 | struct intel_gvt_gtt_entry e; |
1002 | unsigned long index; |
1003 | int ret; |
1004 | |
1005 | trace_spt_change(id: spt->vgpu->id, action: "die" , spt, |
1006 | gfn: spt->guest_page.gfn, type: spt->shadow_page.type); |
1007 | |
1008 | if (ppgtt_put_spt(spt) > 0) |
1009 | return 0; |
1010 | |
1011 | for_each_present_shadow_entry(spt, &e, index) { |
1012 | switch (e.type) { |
1013 | case GTT_TYPE_PPGTT_PTE_4K_ENTRY: |
1014 | gvt_vdbg_mm("invalidate 4K entry\n" ); |
1015 | ppgtt_invalidate_pte(spt, entry: &e); |
1016 | break; |
1017 | case GTT_TYPE_PPGTT_PTE_64K_ENTRY: |
1018 | /* We don't setup 64K shadow entry so far. */ |
1019 | WARN(1, "suspicious 64K gtt entry\n" ); |
1020 | continue; |
1021 | case GTT_TYPE_PPGTT_PTE_2M_ENTRY: |
1022 | gvt_vdbg_mm("invalidate 2M entry\n" ); |
1023 | continue; |
1024 | case GTT_TYPE_PPGTT_PTE_1G_ENTRY: |
1025 | WARN(1, "GVT doesn't support 1GB page\n" ); |
1026 | continue; |
1027 | case GTT_TYPE_PPGTT_PML4_ENTRY: |
1028 | case GTT_TYPE_PPGTT_PDP_ENTRY: |
1029 | case GTT_TYPE_PPGTT_PDE_ENTRY: |
1030 | gvt_vdbg_mm("invalidate PMUL4/PDP/PDE entry\n" ); |
1031 | ret = ppgtt_invalidate_spt_by_shadow_entry( |
1032 | vgpu: spt->vgpu, e: &e); |
1033 | if (ret) |
1034 | goto fail; |
1035 | break; |
1036 | default: |
1037 | GEM_BUG_ON(1); |
1038 | } |
1039 | } |
1040 | |
1041 | trace_spt_change(id: spt->vgpu->id, action: "release" , spt, |
1042 | gfn: spt->guest_page.gfn, type: spt->shadow_page.type); |
1043 | ppgtt_free_spt(spt); |
1044 | return 0; |
1045 | fail: |
1046 | gvt_vgpu_err("fail: shadow page %p shadow entry 0x%llx type %d\n" , |
1047 | spt, e.val64, e.type); |
1048 | return ret; |
1049 | } |
1050 | |
1051 | static bool vgpu_ips_enabled(struct intel_vgpu *vgpu) |
1052 | { |
1053 | struct drm_i915_private *dev_priv = vgpu->gvt->gt->i915; |
1054 | |
1055 | if (GRAPHICS_VER(dev_priv) == 9) { |
1056 | u32 ips = vgpu_vreg_t(vgpu, GEN8_GAMW_ECO_DEV_RW_IA) & |
1057 | GAMW_ECO_ENABLE_64K_IPS_FIELD; |
1058 | |
1059 | return ips == GAMW_ECO_ENABLE_64K_IPS_FIELD; |
1060 | } else if (GRAPHICS_VER(dev_priv) >= 11) { |
1061 | /* 64K paging only controlled by IPS bit in PTE now. */ |
1062 | return true; |
1063 | } else |
1064 | return false; |
1065 | } |
1066 | |
1067 | static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt); |
1068 | |
1069 | static struct intel_vgpu_ppgtt_spt *ppgtt_populate_spt_by_guest_entry( |
1070 | struct intel_vgpu *vgpu, struct intel_gvt_gtt_entry *we) |
1071 | { |
1072 | const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; |
1073 | struct intel_vgpu_ppgtt_spt *spt = NULL; |
1074 | bool ips = false; |
1075 | int ret; |
1076 | |
1077 | GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(we->type))); |
1078 | |
1079 | if (we->type == GTT_TYPE_PPGTT_PDE_ENTRY) |
1080 | ips = vgpu_ips_enabled(vgpu) && ops->test_ips(we); |
1081 | |
1082 | spt = intel_vgpu_find_spt_by_gfn(vgpu, gfn: ops->get_pfn(we)); |
1083 | if (spt) { |
1084 | ppgtt_get_spt(spt); |
1085 | |
1086 | if (ips != spt->guest_page.pde_ips) { |
1087 | spt->guest_page.pde_ips = ips; |
1088 | |
1089 | gvt_dbg_mm("reshadow PDE since ips changed\n" ); |
1090 | clear_page(page: spt->shadow_page.vaddr); |
1091 | ret = ppgtt_populate_spt(spt); |
1092 | if (ret) { |
1093 | ppgtt_put_spt(spt); |
1094 | goto err; |
1095 | } |
1096 | } |
1097 | } else { |
1098 | int type = get_next_pt_type(type: we->type); |
1099 | |
1100 | if (!gtt_type_is_pt(type)) { |
1101 | ret = -EINVAL; |
1102 | goto err; |
1103 | } |
1104 | |
1105 | spt = ppgtt_alloc_spt_gfn(vgpu, type, gfn: ops->get_pfn(we), guest_pde_ips: ips); |
1106 | if (IS_ERR(ptr: spt)) { |
1107 | ret = PTR_ERR(ptr: spt); |
1108 | goto err; |
1109 | } |
1110 | |
1111 | ret = intel_vgpu_enable_page_track(vgpu, gfn: spt->guest_page.gfn); |
1112 | if (ret) |
1113 | goto err_free_spt; |
1114 | |
1115 | ret = ppgtt_populate_spt(spt); |
1116 | if (ret) |
1117 | goto err_free_spt; |
1118 | |
1119 | trace_spt_change(id: vgpu->id, action: "new" , spt, gfn: spt->guest_page.gfn, |
1120 | type: spt->shadow_page.type); |
1121 | } |
1122 | return spt; |
1123 | |
1124 | err_free_spt: |
1125 | ppgtt_free_spt(spt); |
1126 | spt = NULL; |
1127 | err: |
1128 | gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n" , |
1129 | spt, we->val64, we->type); |
1130 | return ERR_PTR(error: ret); |
1131 | } |
1132 | |
1133 | static inline void ppgtt_generate_shadow_entry(struct intel_gvt_gtt_entry *se, |
1134 | struct intel_vgpu_ppgtt_spt *s, struct intel_gvt_gtt_entry *ge) |
1135 | { |
1136 | const struct intel_gvt_gtt_pte_ops *ops = s->vgpu->gvt->gtt.pte_ops; |
1137 | |
1138 | se->type = ge->type; |
1139 | se->val64 = ge->val64; |
1140 | |
1141 | /* Because we always split 64KB pages, so clear IPS in shadow PDE. */ |
1142 | if (se->type == GTT_TYPE_PPGTT_PDE_ENTRY) |
1143 | ops->clear_ips(se); |
1144 | |
1145 | ops->set_pfn(se, s->shadow_page.mfn); |
1146 | } |
1147 | |
1148 | static int split_2MB_gtt_entry(struct intel_vgpu *vgpu, |
1149 | struct intel_vgpu_ppgtt_spt *spt, unsigned long index, |
1150 | struct intel_gvt_gtt_entry *se) |
1151 | { |
1152 | const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; |
1153 | struct intel_vgpu_ppgtt_spt *sub_spt; |
1154 | struct intel_gvt_gtt_entry sub_se; |
1155 | unsigned long start_gfn; |
1156 | dma_addr_t dma_addr; |
1157 | unsigned long sub_index; |
1158 | int ret; |
1159 | |
1160 | gvt_dbg_mm("Split 2M gtt entry, index %lu\n" , index); |
1161 | |
1162 | start_gfn = ops->get_pfn(se); |
1163 | |
1164 | sub_spt = ppgtt_alloc_spt(vgpu, type: GTT_TYPE_PPGTT_PTE_PT); |
1165 | if (IS_ERR(ptr: sub_spt)) |
1166 | return PTR_ERR(ptr: sub_spt); |
1167 | |
1168 | for_each_shadow_entry(sub_spt, &sub_se, sub_index) { |
1169 | ret = intel_gvt_dma_map_guest_page(vgpu, gfn: start_gfn + sub_index, |
1170 | PAGE_SIZE, dma_addr: &dma_addr); |
1171 | if (ret) |
1172 | goto err; |
1173 | sub_se.val64 = se->val64; |
1174 | |
1175 | /* Copy the PAT field from PDE. */ |
1176 | sub_se.val64 &= ~_PAGE_PAT; |
1177 | sub_se.val64 |= (se->val64 & _PAGE_PAT_LARGE) >> 5; |
1178 | |
1179 | ops->set_pfn(&sub_se, dma_addr >> PAGE_SHIFT); |
1180 | ppgtt_set_shadow_entry(sub_spt, &sub_se, sub_index); |
1181 | } |
1182 | |
1183 | /* Clear dirty field. */ |
1184 | se->val64 &= ~_PAGE_DIRTY; |
1185 | |
1186 | ops->clear_pse(se); |
1187 | ops->clear_ips(se); |
1188 | ops->set_pfn(se, sub_spt->shadow_page.mfn); |
1189 | ppgtt_set_shadow_entry(spt, se, index); |
1190 | return 0; |
1191 | err: |
1192 | /* Cancel the existing addess mappings of DMA addr. */ |
1193 | for_each_present_shadow_entry(sub_spt, &sub_se, sub_index) { |
1194 | gvt_vdbg_mm("invalidate 4K entry\n" ); |
1195 | ppgtt_invalidate_pte(spt: sub_spt, entry: &sub_se); |
1196 | } |
1197 | /* Release the new allocated spt. */ |
1198 | trace_spt_change(id: sub_spt->vgpu->id, action: "release" , spt: sub_spt, |
1199 | gfn: sub_spt->guest_page.gfn, type: sub_spt->shadow_page.type); |
1200 | ppgtt_free_spt(spt: sub_spt); |
1201 | return ret; |
1202 | } |
1203 | |
1204 | static int split_64KB_gtt_entry(struct intel_vgpu *vgpu, |
1205 | struct intel_vgpu_ppgtt_spt *spt, unsigned long index, |
1206 | struct intel_gvt_gtt_entry *se) |
1207 | { |
1208 | const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; |
1209 | struct intel_gvt_gtt_entry entry = *se; |
1210 | unsigned long start_gfn; |
1211 | dma_addr_t dma_addr; |
1212 | int i, ret; |
1213 | |
1214 | gvt_vdbg_mm("Split 64K gtt entry, index %lu\n" , index); |
1215 | |
1216 | GEM_BUG_ON(index % GTT_64K_PTE_STRIDE); |
1217 | |
1218 | start_gfn = ops->get_pfn(se); |
1219 | |
1220 | entry.type = GTT_TYPE_PPGTT_PTE_4K_ENTRY; |
1221 | ops->set_64k_splited(&entry); |
1222 | |
1223 | for (i = 0; i < GTT_64K_PTE_STRIDE; i++) { |
1224 | ret = intel_gvt_dma_map_guest_page(vgpu, gfn: start_gfn + i, |
1225 | PAGE_SIZE, dma_addr: &dma_addr); |
1226 | if (ret) |
1227 | return ret; |
1228 | |
1229 | ops->set_pfn(&entry, dma_addr >> PAGE_SHIFT); |
1230 | ppgtt_set_shadow_entry(spt, &entry, index + i); |
1231 | } |
1232 | return 0; |
1233 | } |
1234 | |
1235 | static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu, |
1236 | struct intel_vgpu_ppgtt_spt *spt, unsigned long index, |
1237 | struct intel_gvt_gtt_entry *ge) |
1238 | { |
1239 | const struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops; |
1240 | struct intel_gvt_gtt_entry se = *ge; |
1241 | unsigned long gfn; |
1242 | dma_addr_t dma_addr; |
1243 | int ret; |
1244 | |
1245 | if (!pte_ops->test_present(ge)) |
1246 | return 0; |
1247 | |
1248 | gfn = pte_ops->get_pfn(ge); |
1249 | |
1250 | switch (ge->type) { |
1251 | case GTT_TYPE_PPGTT_PTE_4K_ENTRY: |
1252 | gvt_vdbg_mm("shadow 4K gtt entry\n" ); |
1253 | ret = intel_gvt_dma_map_guest_page(vgpu, gfn, PAGE_SIZE, dma_addr: &dma_addr); |
1254 | if (ret) |
1255 | return -ENXIO; |
1256 | break; |
1257 | case GTT_TYPE_PPGTT_PTE_64K_ENTRY: |
1258 | gvt_vdbg_mm("shadow 64K gtt entry\n" ); |
1259 | /* |
1260 | * The layout of 64K page is special, the page size is |
1261 | * controlled by uper PDE. To be simple, we always split |
1262 | * 64K page to smaller 4K pages in shadow PT. |
1263 | */ |
1264 | return split_64KB_gtt_entry(vgpu, spt, index, se: &se); |
1265 | case GTT_TYPE_PPGTT_PTE_2M_ENTRY: |
1266 | gvt_vdbg_mm("shadow 2M gtt entry\n" ); |
1267 | if (!HAS_PAGE_SIZES(vgpu->gvt->gt->i915, I915_GTT_PAGE_SIZE_2M) || |
1268 | intel_gvt_dma_map_guest_page(vgpu, gfn, |
1269 | I915_GTT_PAGE_SIZE_2M, dma_addr: &dma_addr)) |
1270 | return split_2MB_gtt_entry(vgpu, spt, index, se: &se); |
1271 | break; |
1272 | case GTT_TYPE_PPGTT_PTE_1G_ENTRY: |
1273 | gvt_vgpu_err("GVT doesn't support 1GB entry\n" ); |
1274 | return -EINVAL; |
1275 | default: |
1276 | GEM_BUG_ON(1); |
1277 | return -EINVAL; |
1278 | } |
1279 | |
1280 | /* Successfully shadowed a 4K or 2M page (without splitting). */ |
1281 | pte_ops->set_pfn(&se, dma_addr >> PAGE_SHIFT); |
1282 | ppgtt_set_shadow_entry(spt, &se, index); |
1283 | return 0; |
1284 | } |
1285 | |
1286 | static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt) |
1287 | { |
1288 | struct intel_vgpu *vgpu = spt->vgpu; |
1289 | struct intel_vgpu_ppgtt_spt *s; |
1290 | struct intel_gvt_gtt_entry se, ge; |
1291 | unsigned long i; |
1292 | int ret; |
1293 | |
1294 | trace_spt_change(id: spt->vgpu->id, action: "born" , spt, |
1295 | gfn: spt->guest_page.gfn, type: spt->shadow_page.type); |
1296 | |
1297 | for_each_present_guest_entry(spt, &ge, i) { |
1298 | if (gtt_type_is_pt(get_next_pt_type(ge.type))) { |
1299 | s = ppgtt_populate_spt_by_guest_entry(vgpu, we: &ge); |
1300 | if (IS_ERR(ptr: s)) { |
1301 | ret = PTR_ERR(ptr: s); |
1302 | goto fail; |
1303 | } |
1304 | ppgtt_get_shadow_entry(spt, &se, i); |
1305 | ppgtt_generate_shadow_entry(se: &se, s, ge: &ge); |
1306 | ppgtt_set_shadow_entry(spt, &se, i); |
1307 | } else { |
1308 | ret = ppgtt_populate_shadow_entry(vgpu, spt, index: i, ge: &ge); |
1309 | if (ret) |
1310 | goto fail; |
1311 | } |
1312 | } |
1313 | return 0; |
1314 | fail: |
1315 | gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n" , |
1316 | spt, ge.val64, ge.type); |
1317 | return ret; |
1318 | } |
1319 | |
1320 | static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_ppgtt_spt *spt, |
1321 | struct intel_gvt_gtt_entry *se, unsigned long index) |
1322 | { |
1323 | struct intel_vgpu *vgpu = spt->vgpu; |
1324 | const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; |
1325 | int ret; |
1326 | |
1327 | trace_spt_guest_change(id: spt->vgpu->id, tag: "remove" , spt, |
1328 | type: spt->shadow_page.type, v: se->val64, index); |
1329 | |
1330 | gvt_vdbg_mm("destroy old shadow entry, type %d, index %lu, value %llx\n" , |
1331 | se->type, index, se->val64); |
1332 | |
1333 | if (!ops->test_present(se)) |
1334 | return 0; |
1335 | |
1336 | if (ops->get_pfn(se) == |
1337 | vgpu->gtt.scratch_pt[spt->shadow_page.type].page_mfn) |
1338 | return 0; |
1339 | |
1340 | if (gtt_type_is_pt(get_next_pt_type(se->type))) { |
1341 | struct intel_vgpu_ppgtt_spt *s = |
1342 | intel_vgpu_find_spt_by_mfn(vgpu, mfn: ops->get_pfn(se)); |
1343 | if (!s) { |
1344 | gvt_vgpu_err("fail to find guest page\n" ); |
1345 | ret = -ENXIO; |
1346 | goto fail; |
1347 | } |
1348 | ret = ppgtt_invalidate_spt(spt: s); |
1349 | if (ret) |
1350 | goto fail; |
1351 | } else { |
1352 | /* We don't setup 64K shadow entry so far. */ |
1353 | WARN(se->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY, |
1354 | "suspicious 64K entry\n" ); |
1355 | ppgtt_invalidate_pte(spt, entry: se); |
1356 | } |
1357 | |
1358 | return 0; |
1359 | fail: |
1360 | gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n" , |
1361 | spt, se->val64, se->type); |
1362 | return ret; |
1363 | } |
1364 | |
1365 | static int ppgtt_handle_guest_entry_add(struct intel_vgpu_ppgtt_spt *spt, |
1366 | struct intel_gvt_gtt_entry *we, unsigned long index) |
1367 | { |
1368 | struct intel_vgpu *vgpu = spt->vgpu; |
1369 | struct intel_gvt_gtt_entry m; |
1370 | struct intel_vgpu_ppgtt_spt *s; |
1371 | int ret; |
1372 | |
1373 | trace_spt_guest_change(id: spt->vgpu->id, tag: "add" , spt, type: spt->shadow_page.type, |
1374 | v: we->val64, index); |
1375 | |
1376 | gvt_vdbg_mm("add shadow entry: type %d, index %lu, value %llx\n" , |
1377 | we->type, index, we->val64); |
1378 | |
1379 | if (gtt_type_is_pt(get_next_pt_type(we->type))) { |
1380 | s = ppgtt_populate_spt_by_guest_entry(vgpu, we); |
1381 | if (IS_ERR(ptr: s)) { |
1382 | ret = PTR_ERR(ptr: s); |
1383 | goto fail; |
1384 | } |
1385 | ppgtt_get_shadow_entry(spt, &m, index); |
1386 | ppgtt_generate_shadow_entry(se: &m, s, ge: we); |
1387 | ppgtt_set_shadow_entry(spt, &m, index); |
1388 | } else { |
1389 | ret = ppgtt_populate_shadow_entry(vgpu, spt, index, ge: we); |
1390 | if (ret) |
1391 | goto fail; |
1392 | } |
1393 | return 0; |
1394 | fail: |
1395 | gvt_vgpu_err("fail: spt %p guest entry 0x%llx type %d\n" , |
1396 | spt, we->val64, we->type); |
1397 | return ret; |
1398 | } |
1399 | |
1400 | static int sync_oos_page(struct intel_vgpu *vgpu, |
1401 | struct intel_vgpu_oos_page *oos_page) |
1402 | { |
1403 | const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; |
1404 | struct intel_gvt *gvt = vgpu->gvt; |
1405 | const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; |
1406 | struct intel_vgpu_ppgtt_spt *spt = oos_page->spt; |
1407 | struct intel_gvt_gtt_entry old, new; |
1408 | int index; |
1409 | int ret; |
1410 | |
1411 | trace_oos_change(id: vgpu->id, tag: "sync" , page_id: oos_page->id, |
1412 | gpt: spt, type: spt->guest_page.type); |
1413 | |
1414 | old.type = new.type = get_entry_type(type: spt->guest_page.type); |
1415 | old.val64 = new.val64 = 0; |
1416 | |
1417 | for (index = 0; index < (I915_GTT_PAGE_SIZE >> |
1418 | info->gtt_entry_size_shift); index++) { |
1419 | ops->get_entry(oos_page->mem, &old, index, false, 0, vgpu); |
1420 | ops->get_entry(NULL, &new, index, true, |
1421 | spt->guest_page.gfn << PAGE_SHIFT, vgpu); |
1422 | |
1423 | if (old.val64 == new.val64 |
1424 | && !test_and_clear_bit(nr: index, addr: spt->post_shadow_bitmap)) |
1425 | continue; |
1426 | |
1427 | trace_oos_sync(id: vgpu->id, page_id: oos_page->id, |
1428 | gpt: spt, type: spt->guest_page.type, |
1429 | v: new.val64, index); |
1430 | |
1431 | ret = ppgtt_populate_shadow_entry(vgpu, spt, index, ge: &new); |
1432 | if (ret) |
1433 | return ret; |
1434 | |
1435 | ops->set_entry(oos_page->mem, &new, index, false, 0, vgpu); |
1436 | } |
1437 | |
1438 | spt->guest_page.write_cnt = 0; |
1439 | list_del_init(entry: &spt->post_shadow_list); |
1440 | return 0; |
1441 | } |
1442 | |
1443 | static int detach_oos_page(struct intel_vgpu *vgpu, |
1444 | struct intel_vgpu_oos_page *oos_page) |
1445 | { |
1446 | struct intel_gvt *gvt = vgpu->gvt; |
1447 | struct intel_vgpu_ppgtt_spt *spt = oos_page->spt; |
1448 | |
1449 | trace_oos_change(id: vgpu->id, tag: "detach" , page_id: oos_page->id, |
1450 | gpt: spt, type: spt->guest_page.type); |
1451 | |
1452 | spt->guest_page.write_cnt = 0; |
1453 | spt->guest_page.oos_page = NULL; |
1454 | oos_page->spt = NULL; |
1455 | |
1456 | list_del_init(entry: &oos_page->vm_list); |
1457 | list_move_tail(list: &oos_page->list, head: &gvt->gtt.oos_page_free_list_head); |
1458 | |
1459 | return 0; |
1460 | } |
1461 | |
1462 | static int attach_oos_page(struct intel_vgpu_oos_page *oos_page, |
1463 | struct intel_vgpu_ppgtt_spt *spt) |
1464 | { |
1465 | struct intel_gvt *gvt = spt->vgpu->gvt; |
1466 | int ret; |
1467 | |
1468 | ret = intel_gvt_read_gpa(vgpu: spt->vgpu, |
1469 | gpa: spt->guest_page.gfn << I915_GTT_PAGE_SHIFT, |
1470 | buf: oos_page->mem, I915_GTT_PAGE_SIZE); |
1471 | if (ret) |
1472 | return ret; |
1473 | |
1474 | oos_page->spt = spt; |
1475 | spt->guest_page.oos_page = oos_page; |
1476 | |
1477 | list_move_tail(list: &oos_page->list, head: &gvt->gtt.oos_page_use_list_head); |
1478 | |
1479 | trace_oos_change(id: spt->vgpu->id, tag: "attach" , page_id: oos_page->id, |
1480 | gpt: spt, type: spt->guest_page.type); |
1481 | return 0; |
1482 | } |
1483 | |
1484 | static int ppgtt_set_guest_page_sync(struct intel_vgpu_ppgtt_spt *spt) |
1485 | { |
1486 | struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page; |
1487 | int ret; |
1488 | |
1489 | ret = intel_vgpu_enable_page_track(vgpu: spt->vgpu, gfn: spt->guest_page.gfn); |
1490 | if (ret) |
1491 | return ret; |
1492 | |
1493 | trace_oos_change(id: spt->vgpu->id, tag: "set page sync" , page_id: oos_page->id, |
1494 | gpt: spt, type: spt->guest_page.type); |
1495 | |
1496 | list_del_init(entry: &oos_page->vm_list); |
1497 | return sync_oos_page(vgpu: spt->vgpu, oos_page); |
1498 | } |
1499 | |
1500 | static int ppgtt_allocate_oos_page(struct intel_vgpu_ppgtt_spt *spt) |
1501 | { |
1502 | struct intel_gvt *gvt = spt->vgpu->gvt; |
1503 | struct intel_gvt_gtt *gtt = &gvt->gtt; |
1504 | struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page; |
1505 | int ret; |
1506 | |
1507 | WARN(oos_page, "shadow PPGTT page has already has a oos page\n" ); |
1508 | |
1509 | if (list_empty(head: >t->oos_page_free_list_head)) { |
1510 | oos_page = container_of(gtt->oos_page_use_list_head.next, |
1511 | struct intel_vgpu_oos_page, list); |
1512 | ret = ppgtt_set_guest_page_sync(spt: oos_page->spt); |
1513 | if (ret) |
1514 | return ret; |
1515 | ret = detach_oos_page(vgpu: spt->vgpu, oos_page); |
1516 | if (ret) |
1517 | return ret; |
1518 | } else |
1519 | oos_page = container_of(gtt->oos_page_free_list_head.next, |
1520 | struct intel_vgpu_oos_page, list); |
1521 | return attach_oos_page(oos_page, spt); |
1522 | } |
1523 | |
1524 | static int ppgtt_set_guest_page_oos(struct intel_vgpu_ppgtt_spt *spt) |
1525 | { |
1526 | struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page; |
1527 | |
1528 | if (WARN(!oos_page, "shadow PPGTT page should have a oos page\n" )) |
1529 | return -EINVAL; |
1530 | |
1531 | trace_oos_change(id: spt->vgpu->id, tag: "set page out of sync" , page_id: oos_page->id, |
1532 | gpt: spt, type: spt->guest_page.type); |
1533 | |
1534 | list_add_tail(new: &oos_page->vm_list, head: &spt->vgpu->gtt.oos_page_list_head); |
1535 | return intel_vgpu_disable_page_track(vgpu: spt->vgpu, gfn: spt->guest_page.gfn); |
1536 | } |
1537 | |
1538 | /** |
1539 | * intel_vgpu_sync_oos_pages - sync all the out-of-synced shadow for vGPU |
1540 | * @vgpu: a vGPU |
1541 | * |
1542 | * This function is called before submitting a guest workload to host, |
1543 | * to sync all the out-of-synced shadow for vGPU |
1544 | * |
1545 | * Returns: |
1546 | * Zero on success, negative error code if failed. |
1547 | */ |
1548 | int intel_vgpu_sync_oos_pages(struct intel_vgpu *vgpu) |
1549 | { |
1550 | struct list_head *pos, *n; |
1551 | struct intel_vgpu_oos_page *oos_page; |
1552 | int ret; |
1553 | |
1554 | if (!enable_out_of_sync) |
1555 | return 0; |
1556 | |
1557 | list_for_each_safe(pos, n, &vgpu->gtt.oos_page_list_head) { |
1558 | oos_page = container_of(pos, |
1559 | struct intel_vgpu_oos_page, vm_list); |
1560 | ret = ppgtt_set_guest_page_sync(spt: oos_page->spt); |
1561 | if (ret) |
1562 | return ret; |
1563 | } |
1564 | return 0; |
1565 | } |
1566 | |
1567 | /* |
1568 | * The heart of PPGTT shadow page table. |
1569 | */ |
1570 | static int ppgtt_handle_guest_write_page_table( |
1571 | struct intel_vgpu_ppgtt_spt *spt, |
1572 | struct intel_gvt_gtt_entry *we, unsigned long index) |
1573 | { |
1574 | struct intel_vgpu *vgpu = spt->vgpu; |
1575 | int type = spt->shadow_page.type; |
1576 | const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; |
1577 | struct intel_gvt_gtt_entry old_se; |
1578 | int new_present; |
1579 | int i, ret; |
1580 | |
1581 | new_present = ops->test_present(we); |
1582 | |
1583 | /* |
1584 | * Adding the new entry first and then removing the old one, that can |
1585 | * guarantee the ppgtt table is validated during the window between |
1586 | * adding and removal. |
1587 | */ |
1588 | ppgtt_get_shadow_entry(spt, &old_se, index); |
1589 | |
1590 | if (new_present) { |
1591 | ret = ppgtt_handle_guest_entry_add(spt, we, index); |
1592 | if (ret) |
1593 | goto fail; |
1594 | } |
1595 | |
1596 | ret = ppgtt_handle_guest_entry_removal(spt, se: &old_se, index); |
1597 | if (ret) |
1598 | goto fail; |
1599 | |
1600 | if (!new_present) { |
1601 | /* For 64KB splited entries, we need clear them all. */ |
1602 | if (ops->test_64k_splited(&old_se) && |
1603 | !(index % GTT_64K_PTE_STRIDE)) { |
1604 | gvt_vdbg_mm("remove splited 64K shadow entries\n" ); |
1605 | for (i = 0; i < GTT_64K_PTE_STRIDE; i++) { |
1606 | ops->clear_64k_splited(&old_se); |
1607 | ops->set_pfn(&old_se, |
1608 | vgpu->gtt.scratch_pt[type].page_mfn); |
1609 | ppgtt_set_shadow_entry(spt, &old_se, index + i); |
1610 | } |
1611 | } else if (old_se.type == GTT_TYPE_PPGTT_PTE_2M_ENTRY || |
1612 | old_se.type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) { |
1613 | ops->clear_pse(&old_se); |
1614 | ops->set_pfn(&old_se, |
1615 | vgpu->gtt.scratch_pt[type].page_mfn); |
1616 | ppgtt_set_shadow_entry(spt, &old_se, index); |
1617 | } else { |
1618 | ops->set_pfn(&old_se, |
1619 | vgpu->gtt.scratch_pt[type].page_mfn); |
1620 | ppgtt_set_shadow_entry(spt, &old_se, index); |
1621 | } |
1622 | } |
1623 | |
1624 | return 0; |
1625 | fail: |
1626 | gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d.\n" , |
1627 | spt, we->val64, we->type); |
1628 | return ret; |
1629 | } |
1630 | |
1631 | |
1632 | |
1633 | static inline bool can_do_out_of_sync(struct intel_vgpu_ppgtt_spt *spt) |
1634 | { |
1635 | return enable_out_of_sync |
1636 | && gtt_type_is_pte_pt(spt->guest_page.type) |
1637 | && spt->guest_page.write_cnt >= 2; |
1638 | } |
1639 | |
1640 | static void ppgtt_set_post_shadow(struct intel_vgpu_ppgtt_spt *spt, |
1641 | unsigned long index) |
1642 | { |
1643 | set_bit(nr: index, addr: spt->post_shadow_bitmap); |
1644 | if (!list_empty(head: &spt->post_shadow_list)) |
1645 | return; |
1646 | |
1647 | list_add_tail(new: &spt->post_shadow_list, |
1648 | head: &spt->vgpu->gtt.post_shadow_list_head); |
1649 | } |
1650 | |
1651 | /** |
1652 | * intel_vgpu_flush_post_shadow - flush the post shadow transactions |
1653 | * @vgpu: a vGPU |
1654 | * |
1655 | * This function is called before submitting a guest workload to host, |
1656 | * to flush all the post shadows for a vGPU. |
1657 | * |
1658 | * Returns: |
1659 | * Zero on success, negative error code if failed. |
1660 | */ |
1661 | int intel_vgpu_flush_post_shadow(struct intel_vgpu *vgpu) |
1662 | { |
1663 | struct list_head *pos, *n; |
1664 | struct intel_vgpu_ppgtt_spt *spt; |
1665 | struct intel_gvt_gtt_entry ge; |
1666 | unsigned long index; |
1667 | int ret; |
1668 | |
1669 | list_for_each_safe(pos, n, &vgpu->gtt.post_shadow_list_head) { |
1670 | spt = container_of(pos, struct intel_vgpu_ppgtt_spt, |
1671 | post_shadow_list); |
1672 | |
1673 | for_each_set_bit(index, spt->post_shadow_bitmap, |
1674 | GTT_ENTRY_NUM_IN_ONE_PAGE) { |
1675 | ppgtt_get_guest_entry(spt, &ge, index); |
1676 | |
1677 | ret = ppgtt_handle_guest_write_page_table(spt, |
1678 | we: &ge, index); |
1679 | if (ret) |
1680 | return ret; |
1681 | clear_bit(nr: index, addr: spt->post_shadow_bitmap); |
1682 | } |
1683 | list_del_init(entry: &spt->post_shadow_list); |
1684 | } |
1685 | return 0; |
1686 | } |
1687 | |
1688 | static int ppgtt_handle_guest_write_page_table_bytes( |
1689 | struct intel_vgpu_ppgtt_spt *spt, |
1690 | u64 pa, void *p_data, int bytes) |
1691 | { |
1692 | struct intel_vgpu *vgpu = spt->vgpu; |
1693 | const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; |
1694 | const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; |
1695 | struct intel_gvt_gtt_entry we, se; |
1696 | unsigned long index; |
1697 | int ret; |
1698 | |
1699 | index = (pa & (PAGE_SIZE - 1)) >> info->gtt_entry_size_shift; |
1700 | |
1701 | ppgtt_get_guest_entry(spt, &we, index); |
1702 | |
1703 | /* |
1704 | * For page table which has 64K gtt entry, only PTE#0, PTE#16, |
1705 | * PTE#32, ... PTE#496 are used. Unused PTEs update should be |
1706 | * ignored. |
1707 | */ |
1708 | if (we.type == GTT_TYPE_PPGTT_PTE_64K_ENTRY && |
1709 | (index % GTT_64K_PTE_STRIDE)) { |
1710 | gvt_vdbg_mm("Ignore write to unused PTE entry, index %lu\n" , |
1711 | index); |
1712 | return 0; |
1713 | } |
1714 | |
1715 | if (bytes == info->gtt_entry_size) { |
1716 | ret = ppgtt_handle_guest_write_page_table(spt, we: &we, index); |
1717 | if (ret) |
1718 | return ret; |
1719 | } else { |
1720 | if (!test_bit(index, spt->post_shadow_bitmap)) { |
1721 | int type = spt->shadow_page.type; |
1722 | |
1723 | ppgtt_get_shadow_entry(spt, &se, index); |
1724 | ret = ppgtt_handle_guest_entry_removal(spt, se: &se, index); |
1725 | if (ret) |
1726 | return ret; |
1727 | ops->set_pfn(&se, vgpu->gtt.scratch_pt[type].page_mfn); |
1728 | ppgtt_set_shadow_entry(spt, &se, index); |
1729 | } |
1730 | ppgtt_set_post_shadow(spt, index); |
1731 | } |
1732 | |
1733 | if (!enable_out_of_sync) |
1734 | return 0; |
1735 | |
1736 | spt->guest_page.write_cnt++; |
1737 | |
1738 | if (spt->guest_page.oos_page) |
1739 | ops->set_entry(spt->guest_page.oos_page->mem, &we, index, |
1740 | false, 0, vgpu); |
1741 | |
1742 | if (can_do_out_of_sync(spt)) { |
1743 | if (!spt->guest_page.oos_page) |
1744 | ppgtt_allocate_oos_page(spt); |
1745 | |
1746 | ret = ppgtt_set_guest_page_oos(spt); |
1747 | if (ret < 0) |
1748 | return ret; |
1749 | } |
1750 | return 0; |
1751 | } |
1752 | |
1753 | static void invalidate_ppgtt_mm(struct intel_vgpu_mm *mm) |
1754 | { |
1755 | struct intel_vgpu *vgpu = mm->vgpu; |
1756 | struct intel_gvt *gvt = vgpu->gvt; |
1757 | struct intel_gvt_gtt *gtt = &gvt->gtt; |
1758 | const struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops; |
1759 | struct intel_gvt_gtt_entry se; |
1760 | int index; |
1761 | |
1762 | if (!mm->ppgtt_mm.shadowed) |
1763 | return; |
1764 | |
1765 | for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.shadow_pdps); index++) { |
1766 | ppgtt_get_shadow_root_entry(mm, entry: &se, index); |
1767 | |
1768 | if (!ops->test_present(&se)) |
1769 | continue; |
1770 | |
1771 | ppgtt_invalidate_spt_by_shadow_entry(vgpu, e: &se); |
1772 | se.val64 = 0; |
1773 | ppgtt_set_shadow_root_entry(mm, entry: &se, index); |
1774 | |
1775 | trace_spt_guest_change(id: vgpu->id, tag: "destroy root pointer" , |
1776 | NULL, type: se.type, v: se.val64, index); |
1777 | } |
1778 | |
1779 | mm->ppgtt_mm.shadowed = false; |
1780 | } |
1781 | |
1782 | |
1783 | static int shadow_ppgtt_mm(struct intel_vgpu_mm *mm) |
1784 | { |
1785 | struct intel_vgpu *vgpu = mm->vgpu; |
1786 | struct intel_gvt *gvt = vgpu->gvt; |
1787 | struct intel_gvt_gtt *gtt = &gvt->gtt; |
1788 | const struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops; |
1789 | struct intel_vgpu_ppgtt_spt *spt; |
1790 | struct intel_gvt_gtt_entry ge, se; |
1791 | int index, ret; |
1792 | |
1793 | if (mm->ppgtt_mm.shadowed) |
1794 | return 0; |
1795 | |
1796 | if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status)) |
1797 | return -EINVAL; |
1798 | |
1799 | mm->ppgtt_mm.shadowed = true; |
1800 | |
1801 | for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.guest_pdps); index++) { |
1802 | ppgtt_get_guest_root_entry(mm, entry: &ge, index); |
1803 | |
1804 | if (!ops->test_present(&ge)) |
1805 | continue; |
1806 | |
1807 | trace_spt_guest_change(id: vgpu->id, tag: __func__, NULL, |
1808 | type: ge.type, v: ge.val64, index); |
1809 | |
1810 | spt = ppgtt_populate_spt_by_guest_entry(vgpu, we: &ge); |
1811 | if (IS_ERR(ptr: spt)) { |
1812 | gvt_vgpu_err("fail to populate guest root pointer\n" ); |
1813 | ret = PTR_ERR(ptr: spt); |
1814 | goto fail; |
1815 | } |
1816 | ppgtt_generate_shadow_entry(se: &se, s: spt, ge: &ge); |
1817 | ppgtt_set_shadow_root_entry(mm, entry: &se, index); |
1818 | |
1819 | trace_spt_guest_change(id: vgpu->id, tag: "populate root pointer" , |
1820 | NULL, type: se.type, v: se.val64, index); |
1821 | } |
1822 | |
1823 | return 0; |
1824 | fail: |
1825 | invalidate_ppgtt_mm(mm); |
1826 | return ret; |
1827 | } |
1828 | |
1829 | static struct intel_vgpu_mm *vgpu_alloc_mm(struct intel_vgpu *vgpu) |
1830 | { |
1831 | struct intel_vgpu_mm *mm; |
1832 | |
1833 | mm = kzalloc(size: sizeof(*mm), GFP_KERNEL); |
1834 | if (!mm) |
1835 | return NULL; |
1836 | |
1837 | mm->vgpu = vgpu; |
1838 | kref_init(kref: &mm->ref); |
1839 | atomic_set(v: &mm->pincount, i: 0); |
1840 | |
1841 | return mm; |
1842 | } |
1843 | |
1844 | static void vgpu_free_mm(struct intel_vgpu_mm *mm) |
1845 | { |
1846 | kfree(objp: mm); |
1847 | } |
1848 | |
1849 | /** |
1850 | * intel_vgpu_create_ppgtt_mm - create a ppgtt mm object for a vGPU |
1851 | * @vgpu: a vGPU |
1852 | * @root_entry_type: ppgtt root entry type |
1853 | * @pdps: guest pdps. |
1854 | * |
1855 | * This function is used to create a ppgtt mm object for a vGPU. |
1856 | * |
1857 | * Returns: |
1858 | * Zero on success, negative error code in pointer if failed. |
1859 | */ |
1860 | struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu, |
1861 | enum intel_gvt_gtt_type root_entry_type, u64 pdps[]) |
1862 | { |
1863 | struct intel_gvt *gvt = vgpu->gvt; |
1864 | struct intel_vgpu_mm *mm; |
1865 | int ret; |
1866 | |
1867 | mm = vgpu_alloc_mm(vgpu); |
1868 | if (!mm) |
1869 | return ERR_PTR(error: -ENOMEM); |
1870 | |
1871 | mm->type = INTEL_GVT_MM_PPGTT; |
1872 | |
1873 | GEM_BUG_ON(root_entry_type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY && |
1874 | root_entry_type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY); |
1875 | mm->ppgtt_mm.root_entry_type = root_entry_type; |
1876 | |
1877 | INIT_LIST_HEAD(list: &mm->ppgtt_mm.list); |
1878 | INIT_LIST_HEAD(list: &mm->ppgtt_mm.lru_list); |
1879 | INIT_LIST_HEAD(list: &mm->ppgtt_mm.link); |
1880 | |
1881 | if (root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) |
1882 | mm->ppgtt_mm.guest_pdps[0] = pdps[0]; |
1883 | else |
1884 | memcpy(mm->ppgtt_mm.guest_pdps, pdps, |
1885 | sizeof(mm->ppgtt_mm.guest_pdps)); |
1886 | |
1887 | ret = shadow_ppgtt_mm(mm); |
1888 | if (ret) { |
1889 | gvt_vgpu_err("failed to shadow ppgtt mm\n" ); |
1890 | vgpu_free_mm(mm); |
1891 | return ERR_PTR(error: ret); |
1892 | } |
1893 | |
1894 | list_add_tail(new: &mm->ppgtt_mm.list, head: &vgpu->gtt.ppgtt_mm_list_head); |
1895 | |
1896 | mutex_lock(&gvt->gtt.ppgtt_mm_lock); |
1897 | list_add_tail(new: &mm->ppgtt_mm.lru_list, head: &gvt->gtt.ppgtt_mm_lru_list_head); |
1898 | mutex_unlock(lock: &gvt->gtt.ppgtt_mm_lock); |
1899 | |
1900 | return mm; |
1901 | } |
1902 | |
1903 | static struct intel_vgpu_mm *intel_vgpu_create_ggtt_mm(struct intel_vgpu *vgpu) |
1904 | { |
1905 | struct intel_vgpu_mm *mm; |
1906 | unsigned long nr_entries; |
1907 | |
1908 | mm = vgpu_alloc_mm(vgpu); |
1909 | if (!mm) |
1910 | return ERR_PTR(error: -ENOMEM); |
1911 | |
1912 | mm->type = INTEL_GVT_MM_GGTT; |
1913 | |
1914 | nr_entries = gvt_ggtt_gm_sz(vgpu->gvt) >> I915_GTT_PAGE_SHIFT; |
1915 | mm->ggtt_mm.virtual_ggtt = |
1916 | vzalloc(array_size(nr_entries, |
1917 | vgpu->gvt->device_info.gtt_entry_size)); |
1918 | if (!mm->ggtt_mm.virtual_ggtt) { |
1919 | vgpu_free_mm(mm); |
1920 | return ERR_PTR(error: -ENOMEM); |
1921 | } |
1922 | |
1923 | mm->ggtt_mm.host_ggtt_aperture = vzalloc(size: (vgpu_aperture_sz(vgpu) >> PAGE_SHIFT) * sizeof(u64)); |
1924 | if (!mm->ggtt_mm.host_ggtt_aperture) { |
1925 | vfree(addr: mm->ggtt_mm.virtual_ggtt); |
1926 | vgpu_free_mm(mm); |
1927 | return ERR_PTR(error: -ENOMEM); |
1928 | } |
1929 | |
1930 | mm->ggtt_mm.host_ggtt_hidden = vzalloc(size: (vgpu_hidden_sz(vgpu) >> PAGE_SHIFT) * sizeof(u64)); |
1931 | if (!mm->ggtt_mm.host_ggtt_hidden) { |
1932 | vfree(addr: mm->ggtt_mm.host_ggtt_aperture); |
1933 | vfree(addr: mm->ggtt_mm.virtual_ggtt); |
1934 | vgpu_free_mm(mm); |
1935 | return ERR_PTR(error: -ENOMEM); |
1936 | } |
1937 | |
1938 | return mm; |
1939 | } |
1940 | |
1941 | /** |
1942 | * _intel_vgpu_mm_release - destroy a mm object |
1943 | * @mm_ref: a kref object |
1944 | * |
1945 | * This function is used to destroy a mm object for vGPU |
1946 | * |
1947 | */ |
1948 | void _intel_vgpu_mm_release(struct kref *mm_ref) |
1949 | { |
1950 | struct intel_vgpu_mm *mm = container_of(mm_ref, typeof(*mm), ref); |
1951 | |
1952 | if (GEM_WARN_ON(atomic_read(&mm->pincount))) |
1953 | gvt_err("vgpu mm pin count bug detected\n" ); |
1954 | |
1955 | if (mm->type == INTEL_GVT_MM_PPGTT) { |
1956 | list_del(entry: &mm->ppgtt_mm.list); |
1957 | |
1958 | mutex_lock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock); |
1959 | list_del(entry: &mm->ppgtt_mm.lru_list); |
1960 | mutex_unlock(lock: &mm->vgpu->gvt->gtt.ppgtt_mm_lock); |
1961 | |
1962 | invalidate_ppgtt_mm(mm); |
1963 | } else { |
1964 | vfree(addr: mm->ggtt_mm.virtual_ggtt); |
1965 | vfree(addr: mm->ggtt_mm.host_ggtt_aperture); |
1966 | vfree(addr: mm->ggtt_mm.host_ggtt_hidden); |
1967 | } |
1968 | |
1969 | vgpu_free_mm(mm); |
1970 | } |
1971 | |
1972 | /** |
1973 | * intel_vgpu_unpin_mm - decrease the pin count of a vGPU mm object |
1974 | * @mm: a vGPU mm object |
1975 | * |
1976 | * This function is called when user doesn't want to use a vGPU mm object |
1977 | */ |
1978 | void intel_vgpu_unpin_mm(struct intel_vgpu_mm *mm) |
1979 | { |
1980 | atomic_dec_if_positive(v: &mm->pincount); |
1981 | } |
1982 | |
1983 | /** |
1984 | * intel_vgpu_pin_mm - increase the pin count of a vGPU mm object |
1985 | * @mm: target vgpu mm |
1986 | * |
1987 | * This function is called when user wants to use a vGPU mm object. If this |
1988 | * mm object hasn't been shadowed yet, the shadow will be populated at this |
1989 | * time. |
1990 | * |
1991 | * Returns: |
1992 | * Zero on success, negative error code if failed. |
1993 | */ |
1994 | int intel_vgpu_pin_mm(struct intel_vgpu_mm *mm) |
1995 | { |
1996 | int ret; |
1997 | |
1998 | atomic_inc(v: &mm->pincount); |
1999 | |
2000 | if (mm->type == INTEL_GVT_MM_PPGTT) { |
2001 | ret = shadow_ppgtt_mm(mm); |
2002 | if (ret) |
2003 | return ret; |
2004 | |
2005 | mutex_lock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock); |
2006 | list_move_tail(list: &mm->ppgtt_mm.lru_list, |
2007 | head: &mm->vgpu->gvt->gtt.ppgtt_mm_lru_list_head); |
2008 | mutex_unlock(lock: &mm->vgpu->gvt->gtt.ppgtt_mm_lock); |
2009 | } |
2010 | |
2011 | return 0; |
2012 | } |
2013 | |
2014 | static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt) |
2015 | { |
2016 | struct intel_vgpu_mm *mm; |
2017 | struct list_head *pos, *n; |
2018 | |
2019 | mutex_lock(&gvt->gtt.ppgtt_mm_lock); |
2020 | |
2021 | list_for_each_safe(pos, n, &gvt->gtt.ppgtt_mm_lru_list_head) { |
2022 | mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.lru_list); |
2023 | |
2024 | if (atomic_read(v: &mm->pincount)) |
2025 | continue; |
2026 | |
2027 | list_del_init(entry: &mm->ppgtt_mm.lru_list); |
2028 | mutex_unlock(lock: &gvt->gtt.ppgtt_mm_lock); |
2029 | invalidate_ppgtt_mm(mm); |
2030 | return 1; |
2031 | } |
2032 | mutex_unlock(lock: &gvt->gtt.ppgtt_mm_lock); |
2033 | return 0; |
2034 | } |
2035 | |
2036 | /* |
2037 | * GMA translation APIs. |
2038 | */ |
2039 | static inline int ppgtt_get_next_level_entry(struct intel_vgpu_mm *mm, |
2040 | struct intel_gvt_gtt_entry *e, unsigned long index, bool guest) |
2041 | { |
2042 | struct intel_vgpu *vgpu = mm->vgpu; |
2043 | const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; |
2044 | struct intel_vgpu_ppgtt_spt *s; |
2045 | |
2046 | s = intel_vgpu_find_spt_by_mfn(vgpu, mfn: ops->get_pfn(e)); |
2047 | if (!s) |
2048 | return -ENXIO; |
2049 | |
2050 | if (!guest) |
2051 | ppgtt_get_shadow_entry(s, e, index); |
2052 | else |
2053 | ppgtt_get_guest_entry(s, e, index); |
2054 | return 0; |
2055 | } |
2056 | |
2057 | /** |
2058 | * intel_vgpu_gma_to_gpa - translate a gma to GPA |
2059 | * @mm: mm object. could be a PPGTT or GGTT mm object |
2060 | * @gma: graphics memory address in this mm object |
2061 | * |
2062 | * This function is used to translate a graphics memory address in specific |
2063 | * graphics memory space to guest physical address. |
2064 | * |
2065 | * Returns: |
2066 | * Guest physical address on success, INTEL_GVT_INVALID_ADDR if failed. |
2067 | */ |
2068 | unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma) |
2069 | { |
2070 | struct intel_vgpu *vgpu = mm->vgpu; |
2071 | struct intel_gvt *gvt = vgpu->gvt; |
2072 | const struct intel_gvt_gtt_pte_ops *pte_ops = gvt->gtt.pte_ops; |
2073 | const struct intel_gvt_gtt_gma_ops *gma_ops = gvt->gtt.gma_ops; |
2074 | unsigned long gpa = INTEL_GVT_INVALID_ADDR; |
2075 | unsigned long gma_index[4]; |
2076 | struct intel_gvt_gtt_entry e; |
2077 | int i, levels = 0; |
2078 | int ret; |
2079 | |
2080 | GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT && |
2081 | mm->type != INTEL_GVT_MM_PPGTT); |
2082 | |
2083 | if (mm->type == INTEL_GVT_MM_GGTT) { |
2084 | if (!vgpu_gmadr_is_valid(vgpu, gma)) |
2085 | goto err; |
2086 | |
2087 | ggtt_get_guest_entry(mm, entry: &e, |
2088 | index: gma_ops->gma_to_ggtt_pte_index(gma)); |
2089 | |
2090 | gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT) |
2091 | + (gma & ~I915_GTT_PAGE_MASK); |
2092 | |
2093 | trace_gma_translate(id: vgpu->id, type: "ggtt" , ring_id: 0, root_entry_type: 0, gma, gpa); |
2094 | } else { |
2095 | switch (mm->ppgtt_mm.root_entry_type) { |
2096 | case GTT_TYPE_PPGTT_ROOT_L4_ENTRY: |
2097 | ppgtt_get_shadow_root_entry(mm, entry: &e, index: 0); |
2098 | |
2099 | gma_index[0] = gma_ops->gma_to_pml4_index(gma); |
2100 | gma_index[1] = gma_ops->gma_to_l4_pdp_index(gma); |
2101 | gma_index[2] = gma_ops->gma_to_pde_index(gma); |
2102 | gma_index[3] = gma_ops->gma_to_pte_index(gma); |
2103 | levels = 4; |
2104 | break; |
2105 | case GTT_TYPE_PPGTT_ROOT_L3_ENTRY: |
2106 | ppgtt_get_shadow_root_entry(mm, entry: &e, |
2107 | index: gma_ops->gma_to_l3_pdp_index(gma)); |
2108 | |
2109 | gma_index[0] = gma_ops->gma_to_pde_index(gma); |
2110 | gma_index[1] = gma_ops->gma_to_pte_index(gma); |
2111 | levels = 2; |
2112 | break; |
2113 | default: |
2114 | GEM_BUG_ON(1); |
2115 | } |
2116 | |
2117 | /* walk the shadow page table and get gpa from guest entry */ |
2118 | for (i = 0; i < levels; i++) { |
2119 | ret = ppgtt_get_next_level_entry(mm, e: &e, index: gma_index[i], |
2120 | guest: (i == levels - 1)); |
2121 | if (ret) |
2122 | goto err; |
2123 | |
2124 | if (!pte_ops->test_present(&e)) { |
2125 | gvt_dbg_core("GMA 0x%lx is not present\n" , gma); |
2126 | goto err; |
2127 | } |
2128 | } |
2129 | |
2130 | gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT) + |
2131 | (gma & ~I915_GTT_PAGE_MASK); |
2132 | trace_gma_translate(id: vgpu->id, type: "ppgtt" , ring_id: 0, |
2133 | root_entry_type: mm->ppgtt_mm.root_entry_type, gma, gpa); |
2134 | } |
2135 | |
2136 | return gpa; |
2137 | err: |
2138 | gvt_vgpu_err("invalid mm type: %d gma %lx\n" , mm->type, gma); |
2139 | return INTEL_GVT_INVALID_ADDR; |
2140 | } |
2141 | |
2142 | static int emulate_ggtt_mmio_read(struct intel_vgpu *vgpu, |
2143 | unsigned int off, void *p_data, unsigned int bytes) |
2144 | { |
2145 | struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm; |
2146 | const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; |
2147 | unsigned long index = off >> info->gtt_entry_size_shift; |
2148 | unsigned long gma; |
2149 | struct intel_gvt_gtt_entry e; |
2150 | |
2151 | if (bytes != 4 && bytes != 8) |
2152 | return -EINVAL; |
2153 | |
2154 | gma = index << I915_GTT_PAGE_SHIFT; |
2155 | if (!intel_gvt_ggtt_validate_range(vgpu, |
2156 | addr: gma, size: 1 << I915_GTT_PAGE_SHIFT)) { |
2157 | gvt_dbg_mm("read invalid ggtt at 0x%lx\n" , gma); |
2158 | memset(p_data, 0, bytes); |
2159 | return 0; |
2160 | } |
2161 | |
2162 | ggtt_get_guest_entry(mm: ggtt_mm, entry: &e, index); |
2163 | memcpy(p_data, (void *)&e.val64 + (off & (info->gtt_entry_size - 1)), |
2164 | bytes); |
2165 | return 0; |
2166 | } |
2167 | |
2168 | /** |
2169 | * intel_vgpu_emulate_ggtt_mmio_read - emulate GTT MMIO register read |
2170 | * @vgpu: a vGPU |
2171 | * @off: register offset |
2172 | * @p_data: data will be returned to guest |
2173 | * @bytes: data length |
2174 | * |
2175 | * This function is used to emulate the GTT MMIO register read |
2176 | * |
2177 | * Returns: |
2178 | * Zero on success, error code if failed. |
2179 | */ |
2180 | int intel_vgpu_emulate_ggtt_mmio_read(struct intel_vgpu *vgpu, unsigned int off, |
2181 | void *p_data, unsigned int bytes) |
2182 | { |
2183 | const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; |
2184 | int ret; |
2185 | |
2186 | if (bytes != 4 && bytes != 8) |
2187 | return -EINVAL; |
2188 | |
2189 | off -= info->gtt_start_offset; |
2190 | ret = emulate_ggtt_mmio_read(vgpu, off, p_data, bytes); |
2191 | return ret; |
2192 | } |
2193 | |
2194 | static void ggtt_invalidate_pte(struct intel_vgpu *vgpu, |
2195 | struct intel_gvt_gtt_entry *entry) |
2196 | { |
2197 | const struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops; |
2198 | unsigned long pfn; |
2199 | |
2200 | pfn = pte_ops->get_pfn(entry); |
2201 | if (pfn != vgpu->gvt->gtt.scratch_mfn) |
2202 | intel_gvt_dma_unmap_guest_page(vgpu, dma_addr: pfn << PAGE_SHIFT); |
2203 | } |
2204 | |
2205 | static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, |
2206 | void *p_data, unsigned int bytes) |
2207 | { |
2208 | struct intel_gvt *gvt = vgpu->gvt; |
2209 | const struct intel_gvt_device_info *info = &gvt->device_info; |
2210 | struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm; |
2211 | const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; |
2212 | unsigned long g_gtt_index = off >> info->gtt_entry_size_shift; |
2213 | unsigned long gma, gfn; |
2214 | struct intel_gvt_gtt_entry e = {.val64 = 0, .type = GTT_TYPE_GGTT_PTE}; |
2215 | struct intel_gvt_gtt_entry m = {.val64 = 0, .type = GTT_TYPE_GGTT_PTE}; |
2216 | dma_addr_t dma_addr; |
2217 | int ret; |
2218 | struct intel_gvt_partial_pte *partial_pte, *pos, *n; |
2219 | bool partial_update = false; |
2220 | |
2221 | if (bytes != 4 && bytes != 8) |
2222 | return -EINVAL; |
2223 | |
2224 | gma = g_gtt_index << I915_GTT_PAGE_SHIFT; |
2225 | |
2226 | /* the VM may configure the whole GM space when ballooning is used */ |
2227 | if (!vgpu_gmadr_is_valid(vgpu, gma)) |
2228 | return 0; |
2229 | |
2230 | e.type = GTT_TYPE_GGTT_PTE; |
2231 | memcpy((void *)&e.val64 + (off & (info->gtt_entry_size - 1)), p_data, |
2232 | bytes); |
2233 | |
2234 | /* If ggtt entry size is 8 bytes, and it's split into two 4 bytes |
2235 | * write, save the first 4 bytes in a list and update virtual |
2236 | * PTE. Only update shadow PTE when the second 4 bytes comes. |
2237 | */ |
2238 | if (bytes < info->gtt_entry_size) { |
2239 | bool found = false; |
2240 | |
2241 | list_for_each_entry_safe(pos, n, |
2242 | &ggtt_mm->ggtt_mm.partial_pte_list, list) { |
2243 | if (g_gtt_index == pos->offset >> |
2244 | info->gtt_entry_size_shift) { |
2245 | if (off != pos->offset) { |
2246 | /* the second partial part*/ |
2247 | int last_off = pos->offset & |
2248 | (info->gtt_entry_size - 1); |
2249 | |
2250 | memcpy((void *)&e.val64 + last_off, |
2251 | (void *)&pos->data + last_off, |
2252 | bytes); |
2253 | |
2254 | list_del(entry: &pos->list); |
2255 | kfree(objp: pos); |
2256 | found = true; |
2257 | break; |
2258 | } |
2259 | |
2260 | /* update of the first partial part */ |
2261 | pos->data = e.val64; |
2262 | ggtt_set_guest_entry(mm: ggtt_mm, entry: &e, index: g_gtt_index); |
2263 | return 0; |
2264 | } |
2265 | } |
2266 | |
2267 | if (!found) { |
2268 | /* the first partial part */ |
2269 | partial_pte = kzalloc(size: sizeof(*partial_pte), GFP_KERNEL); |
2270 | if (!partial_pte) |
2271 | return -ENOMEM; |
2272 | partial_pte->offset = off; |
2273 | partial_pte->data = e.val64; |
2274 | list_add_tail(new: &partial_pte->list, |
2275 | head: &ggtt_mm->ggtt_mm.partial_pte_list); |
2276 | partial_update = true; |
2277 | } |
2278 | } |
2279 | |
2280 | if (!partial_update && (ops->test_present(&e))) { |
2281 | gfn = ops->get_pfn(&e); |
2282 | m.val64 = e.val64; |
2283 | m.type = e.type; |
2284 | |
2285 | ret = intel_gvt_dma_map_guest_page(vgpu, gfn, PAGE_SIZE, |
2286 | dma_addr: &dma_addr); |
2287 | if (ret) { |
2288 | gvt_vgpu_err("fail to populate guest ggtt entry\n" ); |
2289 | /* guest driver may read/write the entry when partial |
2290 | * update the entry in this situation p2m will fail |
2291 | * setting the shadow entry to point to a scratch page |
2292 | */ |
2293 | ops->set_pfn(&m, gvt->gtt.scratch_mfn); |
2294 | } else |
2295 | ops->set_pfn(&m, dma_addr >> PAGE_SHIFT); |
2296 | } else { |
2297 | ops->set_pfn(&m, gvt->gtt.scratch_mfn); |
2298 | ops->clear_present(&m); |
2299 | } |
2300 | |
2301 | ggtt_set_guest_entry(mm: ggtt_mm, entry: &e, index: g_gtt_index); |
2302 | |
2303 | ggtt_get_host_entry(mm: ggtt_mm, entry: &e, index: g_gtt_index); |
2304 | ggtt_invalidate_pte(vgpu, entry: &e); |
2305 | |
2306 | ggtt_set_host_entry(mm: ggtt_mm, entry: &m, index: g_gtt_index); |
2307 | ggtt_invalidate(gt: gvt->gt); |
2308 | return 0; |
2309 | } |
2310 | |
2311 | /* |
2312 | * intel_vgpu_emulate_ggtt_mmio_write - emulate GTT MMIO register write |
2313 | * @vgpu: a vGPU |
2314 | * @off: register offset |
2315 | * @p_data: data from guest write |
2316 | * @bytes: data length |
2317 | * |
2318 | * This function is used to emulate the GTT MMIO register write |
2319 | * |
2320 | * Returns: |
2321 | * Zero on success, error code if failed. |
2322 | */ |
2323 | int intel_vgpu_emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, |
2324 | unsigned int off, void *p_data, unsigned int bytes) |
2325 | { |
2326 | const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; |
2327 | int ret; |
2328 | struct intel_vgpu_submission *s = &vgpu->submission; |
2329 | struct intel_engine_cs *engine; |
2330 | int i; |
2331 | |
2332 | if (bytes != 4 && bytes != 8) |
2333 | return -EINVAL; |
2334 | |
2335 | off -= info->gtt_start_offset; |
2336 | ret = emulate_ggtt_mmio_write(vgpu, off, p_data, bytes); |
2337 | |
2338 | /* if ggtt of last submitted context is written, |
2339 | * that context is probably got unpinned. |
2340 | * Set last shadowed ctx to invalid. |
2341 | */ |
2342 | for_each_engine(engine, vgpu->gvt->gt, i) { |
2343 | if (!s->last_ctx[i].valid) |
2344 | continue; |
2345 | |
2346 | if (s->last_ctx[i].lrca == (off >> info->gtt_entry_size_shift)) |
2347 | s->last_ctx[i].valid = false; |
2348 | } |
2349 | return ret; |
2350 | } |
2351 | |
2352 | static int alloc_scratch_pages(struct intel_vgpu *vgpu, |
2353 | enum intel_gvt_gtt_type type) |
2354 | { |
2355 | struct drm_i915_private *i915 = vgpu->gvt->gt->i915; |
2356 | struct intel_vgpu_gtt *gtt = &vgpu->gtt; |
2357 | const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; |
2358 | int page_entry_num = I915_GTT_PAGE_SIZE >> |
2359 | vgpu->gvt->device_info.gtt_entry_size_shift; |
2360 | void *scratch_pt; |
2361 | int i; |
2362 | struct device *dev = vgpu->gvt->gt->i915->drm.dev; |
2363 | dma_addr_t daddr; |
2364 | |
2365 | if (drm_WARN_ON(&i915->drm, |
2366 | type < GTT_TYPE_PPGTT_PTE_PT || type >= GTT_TYPE_MAX)) |
2367 | return -EINVAL; |
2368 | |
2369 | scratch_pt = (void *)get_zeroed_page(GFP_KERNEL); |
2370 | if (!scratch_pt) { |
2371 | gvt_vgpu_err("fail to allocate scratch page\n" ); |
2372 | return -ENOMEM; |
2373 | } |
2374 | |
2375 | daddr = dma_map_page(dev, virt_to_page(scratch_pt), 0, 4096, DMA_BIDIRECTIONAL); |
2376 | if (dma_mapping_error(dev, dma_addr: daddr)) { |
2377 | gvt_vgpu_err("fail to dmamap scratch_pt\n" ); |
2378 | __free_page(virt_to_page(scratch_pt)); |
2379 | return -ENOMEM; |
2380 | } |
2381 | gtt->scratch_pt[type].page_mfn = |
2382 | (unsigned long)(daddr >> I915_GTT_PAGE_SHIFT); |
2383 | gtt->scratch_pt[type].page = virt_to_page(scratch_pt); |
2384 | gvt_dbg_mm("vgpu%d create scratch_pt: type %d mfn=0x%lx\n" , |
2385 | vgpu->id, type, gtt->scratch_pt[type].page_mfn); |
2386 | |
2387 | /* Build the tree by full filled the scratch pt with the entries which |
2388 | * point to the next level scratch pt or scratch page. The |
2389 | * scratch_pt[type] indicate the scratch pt/scratch page used by the |
2390 | * 'type' pt. |
2391 | * e.g. scratch_pt[GTT_TYPE_PPGTT_PDE_PT] is used by |
2392 | * GTT_TYPE_PPGTT_PDE_PT level pt, that means this scratch_pt it self |
2393 | * is GTT_TYPE_PPGTT_PTE_PT, and full filled by scratch page mfn. |
2394 | */ |
2395 | if (type > GTT_TYPE_PPGTT_PTE_PT) { |
2396 | struct intel_gvt_gtt_entry se; |
2397 | |
2398 | memset(&se, 0, sizeof(struct intel_gvt_gtt_entry)); |
2399 | se.type = get_entry_type(type: type - 1); |
2400 | ops->set_pfn(&se, gtt->scratch_pt[type - 1].page_mfn); |
2401 | |
2402 | /* The entry parameters like present/writeable/cache type |
2403 | * set to the same as i915's scratch page tree. |
2404 | */ |
2405 | se.val64 |= GEN8_PAGE_PRESENT | GEN8_PAGE_RW; |
2406 | if (type == GTT_TYPE_PPGTT_PDE_PT) |
2407 | se.val64 |= PPAT_CACHED; |
2408 | |
2409 | for (i = 0; i < page_entry_num; i++) |
2410 | ops->set_entry(scratch_pt, &se, i, false, 0, vgpu); |
2411 | } |
2412 | |
2413 | return 0; |
2414 | } |
2415 | |
2416 | static int release_scratch_page_tree(struct intel_vgpu *vgpu) |
2417 | { |
2418 | int i; |
2419 | struct device *dev = vgpu->gvt->gt->i915->drm.dev; |
2420 | dma_addr_t daddr; |
2421 | |
2422 | for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) { |
2423 | if (vgpu->gtt.scratch_pt[i].page != NULL) { |
2424 | daddr = (dma_addr_t)(vgpu->gtt.scratch_pt[i].page_mfn << |
2425 | I915_GTT_PAGE_SHIFT); |
2426 | dma_unmap_page(dev, daddr, 4096, DMA_BIDIRECTIONAL); |
2427 | __free_page(vgpu->gtt.scratch_pt[i].page); |
2428 | vgpu->gtt.scratch_pt[i].page = NULL; |
2429 | vgpu->gtt.scratch_pt[i].page_mfn = 0; |
2430 | } |
2431 | } |
2432 | |
2433 | return 0; |
2434 | } |
2435 | |
2436 | static int create_scratch_page_tree(struct intel_vgpu *vgpu) |
2437 | { |
2438 | int i, ret; |
2439 | |
2440 | for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) { |
2441 | ret = alloc_scratch_pages(vgpu, type: i); |
2442 | if (ret) |
2443 | goto err; |
2444 | } |
2445 | |
2446 | return 0; |
2447 | |
2448 | err: |
2449 | release_scratch_page_tree(vgpu); |
2450 | return ret; |
2451 | } |
2452 | |
2453 | /** |
2454 | * intel_vgpu_init_gtt - initialize per-vGPU graphics memory virulization |
2455 | * @vgpu: a vGPU |
2456 | * |
2457 | * This function is used to initialize per-vGPU graphics memory virtualization |
2458 | * components. |
2459 | * |
2460 | * Returns: |
2461 | * Zero on success, error code if failed. |
2462 | */ |
2463 | int intel_vgpu_init_gtt(struct intel_vgpu *vgpu) |
2464 | { |
2465 | struct intel_vgpu_gtt *gtt = &vgpu->gtt; |
2466 | |
2467 | INIT_RADIX_TREE(>t->spt_tree, GFP_KERNEL); |
2468 | |
2469 | INIT_LIST_HEAD(list: >t->ppgtt_mm_list_head); |
2470 | INIT_LIST_HEAD(list: >t->oos_page_list_head); |
2471 | INIT_LIST_HEAD(list: >t->post_shadow_list_head); |
2472 | |
2473 | gtt->ggtt_mm = intel_vgpu_create_ggtt_mm(vgpu); |
2474 | if (IS_ERR(ptr: gtt->ggtt_mm)) { |
2475 | gvt_vgpu_err("fail to create mm for ggtt.\n" ); |
2476 | return PTR_ERR(ptr: gtt->ggtt_mm); |
2477 | } |
2478 | |
2479 | intel_vgpu_reset_ggtt(vgpu, invalidate_old: false); |
2480 | |
2481 | INIT_LIST_HEAD(list: >t->ggtt_mm->ggtt_mm.partial_pte_list); |
2482 | |
2483 | return create_scratch_page_tree(vgpu); |
2484 | } |
2485 | |
2486 | void intel_vgpu_destroy_all_ppgtt_mm(struct intel_vgpu *vgpu) |
2487 | { |
2488 | struct list_head *pos, *n; |
2489 | struct intel_vgpu_mm *mm; |
2490 | |
2491 | list_for_each_safe(pos, n, &vgpu->gtt.ppgtt_mm_list_head) { |
2492 | mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list); |
2493 | intel_vgpu_destroy_mm(mm); |
2494 | } |
2495 | |
2496 | if (GEM_WARN_ON(!list_empty(&vgpu->gtt.ppgtt_mm_list_head))) |
2497 | gvt_err("vgpu ppgtt mm is not fully destroyed\n" ); |
2498 | |
2499 | if (GEM_WARN_ON(!radix_tree_empty(&vgpu->gtt.spt_tree))) { |
2500 | gvt_err("Why we still has spt not freed?\n" ); |
2501 | ppgtt_free_all_spt(vgpu); |
2502 | } |
2503 | } |
2504 | |
2505 | static void intel_vgpu_destroy_ggtt_mm(struct intel_vgpu *vgpu) |
2506 | { |
2507 | struct intel_gvt_partial_pte *pos, *next; |
2508 | |
2509 | list_for_each_entry_safe(pos, next, |
2510 | &vgpu->gtt.ggtt_mm->ggtt_mm.partial_pte_list, |
2511 | list) { |
2512 | gvt_dbg_mm("partial PTE update on hold 0x%lx : 0x%llx\n" , |
2513 | pos->offset, pos->data); |
2514 | kfree(objp: pos); |
2515 | } |
2516 | intel_vgpu_destroy_mm(mm: vgpu->gtt.ggtt_mm); |
2517 | vgpu->gtt.ggtt_mm = NULL; |
2518 | } |
2519 | |
2520 | /** |
2521 | * intel_vgpu_clean_gtt - clean up per-vGPU graphics memory virulization |
2522 | * @vgpu: a vGPU |
2523 | * |
2524 | * This function is used to clean up per-vGPU graphics memory virtualization |
2525 | * components. |
2526 | * |
2527 | * Returns: |
2528 | * Zero on success, error code if failed. |
2529 | */ |
2530 | void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu) |
2531 | { |
2532 | intel_vgpu_destroy_all_ppgtt_mm(vgpu); |
2533 | intel_vgpu_destroy_ggtt_mm(vgpu); |
2534 | release_scratch_page_tree(vgpu); |
2535 | } |
2536 | |
2537 | static void clean_spt_oos(struct intel_gvt *gvt) |
2538 | { |
2539 | struct intel_gvt_gtt *gtt = &gvt->gtt; |
2540 | struct list_head *pos, *n; |
2541 | struct intel_vgpu_oos_page *oos_page; |
2542 | |
2543 | WARN(!list_empty(>t->oos_page_use_list_head), |
2544 | "someone is still using oos page\n" ); |
2545 | |
2546 | list_for_each_safe(pos, n, >t->oos_page_free_list_head) { |
2547 | oos_page = container_of(pos, struct intel_vgpu_oos_page, list); |
2548 | list_del(entry: &oos_page->list); |
2549 | free_page((unsigned long)oos_page->mem); |
2550 | kfree(objp: oos_page); |
2551 | } |
2552 | } |
2553 | |
2554 | static int setup_spt_oos(struct intel_gvt *gvt) |
2555 | { |
2556 | struct intel_gvt_gtt *gtt = &gvt->gtt; |
2557 | struct intel_vgpu_oos_page *oos_page; |
2558 | int i; |
2559 | int ret; |
2560 | |
2561 | INIT_LIST_HEAD(list: >t->oos_page_free_list_head); |
2562 | INIT_LIST_HEAD(list: >t->oos_page_use_list_head); |
2563 | |
2564 | for (i = 0; i < preallocated_oos_pages; i++) { |
2565 | oos_page = kzalloc(size: sizeof(*oos_page), GFP_KERNEL); |
2566 | if (!oos_page) { |
2567 | ret = -ENOMEM; |
2568 | goto fail; |
2569 | } |
2570 | oos_page->mem = (void *)__get_free_pages(GFP_KERNEL, order: 0); |
2571 | if (!oos_page->mem) { |
2572 | ret = -ENOMEM; |
2573 | kfree(objp: oos_page); |
2574 | goto fail; |
2575 | } |
2576 | |
2577 | INIT_LIST_HEAD(list: &oos_page->list); |
2578 | INIT_LIST_HEAD(list: &oos_page->vm_list); |
2579 | oos_page->id = i; |
2580 | list_add_tail(new: &oos_page->list, head: >t->oos_page_free_list_head); |
2581 | } |
2582 | |
2583 | gvt_dbg_mm("%d oos pages preallocated\n" , i); |
2584 | |
2585 | return 0; |
2586 | fail: |
2587 | clean_spt_oos(gvt); |
2588 | return ret; |
2589 | } |
2590 | |
2591 | /** |
2592 | * intel_vgpu_find_ppgtt_mm - find a PPGTT mm object |
2593 | * @vgpu: a vGPU |
2594 | * @pdps: pdp root array |
2595 | * |
2596 | * This function is used to find a PPGTT mm object from mm object pool |
2597 | * |
2598 | * Returns: |
2599 | * pointer to mm object on success, NULL if failed. |
2600 | */ |
2601 | struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu, |
2602 | u64 pdps[]) |
2603 | { |
2604 | struct intel_vgpu_mm *mm; |
2605 | struct list_head *pos; |
2606 | |
2607 | list_for_each(pos, &vgpu->gtt.ppgtt_mm_list_head) { |
2608 | mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list); |
2609 | |
2610 | switch (mm->ppgtt_mm.root_entry_type) { |
2611 | case GTT_TYPE_PPGTT_ROOT_L4_ENTRY: |
2612 | if (pdps[0] == mm->ppgtt_mm.guest_pdps[0]) |
2613 | return mm; |
2614 | break; |
2615 | case GTT_TYPE_PPGTT_ROOT_L3_ENTRY: |
2616 | if (!memcmp(p: pdps, q: mm->ppgtt_mm.guest_pdps, |
2617 | size: sizeof(mm->ppgtt_mm.guest_pdps))) |
2618 | return mm; |
2619 | break; |
2620 | default: |
2621 | GEM_BUG_ON(1); |
2622 | } |
2623 | } |
2624 | return NULL; |
2625 | } |
2626 | |
2627 | /** |
2628 | * intel_vgpu_get_ppgtt_mm - get or create a PPGTT mm object. |
2629 | * @vgpu: a vGPU |
2630 | * @root_entry_type: ppgtt root entry type |
2631 | * @pdps: guest pdps |
2632 | * |
2633 | * This function is used to find or create a PPGTT mm object from a guest. |
2634 | * |
2635 | * Returns: |
2636 | * Zero on success, negative error code if failed. |
2637 | */ |
2638 | struct intel_vgpu_mm *intel_vgpu_get_ppgtt_mm(struct intel_vgpu *vgpu, |
2639 | enum intel_gvt_gtt_type root_entry_type, u64 pdps[]) |
2640 | { |
2641 | struct intel_vgpu_mm *mm; |
2642 | |
2643 | mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps); |
2644 | if (mm) { |
2645 | intel_vgpu_mm_get(mm); |
2646 | } else { |
2647 | mm = intel_vgpu_create_ppgtt_mm(vgpu, root_entry_type, pdps); |
2648 | if (IS_ERR(ptr: mm)) |
2649 | gvt_vgpu_err("fail to create mm\n" ); |
2650 | } |
2651 | return mm; |
2652 | } |
2653 | |
2654 | /** |
2655 | * intel_vgpu_put_ppgtt_mm - find and put a PPGTT mm object. |
2656 | * @vgpu: a vGPU |
2657 | * @pdps: guest pdps |
2658 | * |
2659 | * This function is used to find a PPGTT mm object from a guest and destroy it. |
2660 | * |
2661 | * Returns: |
2662 | * Zero on success, negative error code if failed. |
2663 | */ |
2664 | int intel_vgpu_put_ppgtt_mm(struct intel_vgpu *vgpu, u64 pdps[]) |
2665 | { |
2666 | struct intel_vgpu_mm *mm; |
2667 | |
2668 | mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps); |
2669 | if (!mm) { |
2670 | gvt_vgpu_err("fail to find ppgtt instance.\n" ); |
2671 | return -EINVAL; |
2672 | } |
2673 | intel_vgpu_mm_put(mm); |
2674 | return 0; |
2675 | } |
2676 | |
2677 | /** |
2678 | * intel_gvt_init_gtt - initialize mm components of a GVT device |
2679 | * @gvt: GVT device |
2680 | * |
2681 | * This function is called at the initialization stage, to initialize |
2682 | * the mm components of a GVT device. |
2683 | * |
2684 | * Returns: |
2685 | * zero on success, negative error code if failed. |
2686 | */ |
2687 | int intel_gvt_init_gtt(struct intel_gvt *gvt) |
2688 | { |
2689 | int ret; |
2690 | void *page; |
2691 | struct device *dev = gvt->gt->i915->drm.dev; |
2692 | dma_addr_t daddr; |
2693 | |
2694 | gvt_dbg_core("init gtt\n" ); |
2695 | |
2696 | gvt->gtt.pte_ops = &gen8_gtt_pte_ops; |
2697 | gvt->gtt.gma_ops = &gen8_gtt_gma_ops; |
2698 | |
2699 | page = (void *)get_zeroed_page(GFP_KERNEL); |
2700 | if (!page) { |
2701 | gvt_err("fail to allocate scratch ggtt page\n" ); |
2702 | return -ENOMEM; |
2703 | } |
2704 | |
2705 | daddr = dma_map_page(dev, virt_to_page(page), 0, |
2706 | 4096, DMA_BIDIRECTIONAL); |
2707 | if (dma_mapping_error(dev, dma_addr: daddr)) { |
2708 | gvt_err("fail to dmamap scratch ggtt page\n" ); |
2709 | __free_page(virt_to_page(page)); |
2710 | return -ENOMEM; |
2711 | } |
2712 | |
2713 | gvt->gtt.scratch_page = virt_to_page(page); |
2714 | gvt->gtt.scratch_mfn = (unsigned long)(daddr >> I915_GTT_PAGE_SHIFT); |
2715 | |
2716 | if (enable_out_of_sync) { |
2717 | ret = setup_spt_oos(gvt); |
2718 | if (ret) { |
2719 | gvt_err("fail to initialize SPT oos\n" ); |
2720 | dma_unmap_page(dev, daddr, 4096, DMA_BIDIRECTIONAL); |
2721 | __free_page(gvt->gtt.scratch_page); |
2722 | return ret; |
2723 | } |
2724 | } |
2725 | INIT_LIST_HEAD(list: &gvt->gtt.ppgtt_mm_lru_list_head); |
2726 | mutex_init(&gvt->gtt.ppgtt_mm_lock); |
2727 | return 0; |
2728 | } |
2729 | |
2730 | /** |
2731 | * intel_gvt_clean_gtt - clean up mm components of a GVT device |
2732 | * @gvt: GVT device |
2733 | * |
2734 | * This function is called at the driver unloading stage, to clean up |
2735 | * the mm components of a GVT device. |
2736 | * |
2737 | */ |
2738 | void intel_gvt_clean_gtt(struct intel_gvt *gvt) |
2739 | { |
2740 | struct device *dev = gvt->gt->i915->drm.dev; |
2741 | dma_addr_t daddr = (dma_addr_t)(gvt->gtt.scratch_mfn << |
2742 | I915_GTT_PAGE_SHIFT); |
2743 | |
2744 | dma_unmap_page(dev, daddr, 4096, DMA_BIDIRECTIONAL); |
2745 | |
2746 | __free_page(gvt->gtt.scratch_page); |
2747 | |
2748 | if (enable_out_of_sync) |
2749 | clean_spt_oos(gvt); |
2750 | } |
2751 | |
2752 | /** |
2753 | * intel_vgpu_invalidate_ppgtt - invalidate PPGTT instances |
2754 | * @vgpu: a vGPU |
2755 | * |
2756 | * This function is called when invalidate all PPGTT instances of a vGPU. |
2757 | * |
2758 | */ |
2759 | void intel_vgpu_invalidate_ppgtt(struct intel_vgpu *vgpu) |
2760 | { |
2761 | struct list_head *pos, *n; |
2762 | struct intel_vgpu_mm *mm; |
2763 | |
2764 | list_for_each_safe(pos, n, &vgpu->gtt.ppgtt_mm_list_head) { |
2765 | mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list); |
2766 | if (mm->type == INTEL_GVT_MM_PPGTT) { |
2767 | mutex_lock(&vgpu->gvt->gtt.ppgtt_mm_lock); |
2768 | list_del_init(entry: &mm->ppgtt_mm.lru_list); |
2769 | mutex_unlock(lock: &vgpu->gvt->gtt.ppgtt_mm_lock); |
2770 | if (mm->ppgtt_mm.shadowed) |
2771 | invalidate_ppgtt_mm(mm); |
2772 | } |
2773 | } |
2774 | } |
2775 | |
2776 | /** |
2777 | * intel_vgpu_reset_ggtt - reset the GGTT entry |
2778 | * @vgpu: a vGPU |
2779 | * @invalidate_old: invalidate old entries |
2780 | * |
2781 | * This function is called at the vGPU create stage |
2782 | * to reset all the GGTT entries. |
2783 | * |
2784 | */ |
2785 | void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu, bool invalidate_old) |
2786 | { |
2787 | struct intel_gvt *gvt = vgpu->gvt; |
2788 | const struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops; |
2789 | struct intel_gvt_gtt_entry entry = {.type = GTT_TYPE_GGTT_PTE}; |
2790 | struct intel_gvt_gtt_entry old_entry; |
2791 | u32 index; |
2792 | u32 num_entries; |
2793 | |
2794 | pte_ops->set_pfn(&entry, gvt->gtt.scratch_mfn); |
2795 | pte_ops->set_present(&entry); |
2796 | |
2797 | index = vgpu_aperture_gmadr_base(vgpu) >> PAGE_SHIFT; |
2798 | num_entries = vgpu_aperture_sz(vgpu) >> PAGE_SHIFT; |
2799 | while (num_entries--) { |
2800 | if (invalidate_old) { |
2801 | ggtt_get_host_entry(mm: vgpu->gtt.ggtt_mm, entry: &old_entry, index); |
2802 | ggtt_invalidate_pte(vgpu, entry: &old_entry); |
2803 | } |
2804 | ggtt_set_host_entry(mm: vgpu->gtt.ggtt_mm, entry: &entry, index: index++); |
2805 | } |
2806 | |
2807 | index = vgpu_hidden_gmadr_base(vgpu) >> PAGE_SHIFT; |
2808 | num_entries = vgpu_hidden_sz(vgpu) >> PAGE_SHIFT; |
2809 | while (num_entries--) { |
2810 | if (invalidate_old) { |
2811 | ggtt_get_host_entry(mm: vgpu->gtt.ggtt_mm, entry: &old_entry, index); |
2812 | ggtt_invalidate_pte(vgpu, entry: &old_entry); |
2813 | } |
2814 | ggtt_set_host_entry(mm: vgpu->gtt.ggtt_mm, entry: &entry, index: index++); |
2815 | } |
2816 | |
2817 | ggtt_invalidate(gt: gvt->gt); |
2818 | } |
2819 | |
2820 | /** |
2821 | * intel_gvt_restore_ggtt - restore all vGPU's ggtt entries |
2822 | * @gvt: intel gvt device |
2823 | * |
2824 | * This function is called at driver resume stage to restore |
2825 | * GGTT entries of every vGPU. |
2826 | * |
2827 | */ |
2828 | void intel_gvt_restore_ggtt(struct intel_gvt *gvt) |
2829 | { |
2830 | struct intel_vgpu *vgpu; |
2831 | struct intel_vgpu_mm *mm; |
2832 | int id; |
2833 | gen8_pte_t pte; |
2834 | u32 idx, num_low, num_hi, offset; |
2835 | |
2836 | /* Restore dirty host ggtt for all vGPUs */ |
2837 | idr_for_each_entry(&(gvt)->vgpu_idr, vgpu, id) { |
2838 | mm = vgpu->gtt.ggtt_mm; |
2839 | |
2840 | num_low = vgpu_aperture_sz(vgpu) >> PAGE_SHIFT; |
2841 | offset = vgpu_aperture_gmadr_base(vgpu) >> PAGE_SHIFT; |
2842 | for (idx = 0; idx < num_low; idx++) { |
2843 | pte = mm->ggtt_mm.host_ggtt_aperture[idx]; |
2844 | if (pte & GEN8_PAGE_PRESENT) |
2845 | write_pte64(ggtt: vgpu->gvt->gt->ggtt, index: offset + idx, pte); |
2846 | } |
2847 | |
2848 | num_hi = vgpu_hidden_sz(vgpu) >> PAGE_SHIFT; |
2849 | offset = vgpu_hidden_gmadr_base(vgpu) >> PAGE_SHIFT; |
2850 | for (idx = 0; idx < num_hi; idx++) { |
2851 | pte = mm->ggtt_mm.host_ggtt_hidden[idx]; |
2852 | if (pte & GEN8_PAGE_PRESENT) |
2853 | write_pte64(ggtt: vgpu->gvt->gt->ggtt, index: offset + idx, pte); |
2854 | } |
2855 | } |
2856 | } |
2857 | |