1 | /* |
2 | * Copyright(c) 2011-2016 Intel Corporation. All rights reserved. |
3 | * |
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
5 | * copy of this software and associated documentation files (the "Software"), |
6 | * to deal in the Software without restriction, including without limitation |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
8 | * and/or sell copies of the Software, and to permit persons to whom the |
9 | * Software is furnished to do so, subject to the following conditions: |
10 | * |
11 | * The above copyright notice and this permission notice (including the next |
12 | * paragraph) shall be included in all copies or substantial portions of the |
13 | * Software. |
14 | * |
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
21 | * SOFTWARE. |
22 | * |
23 | * Authors: |
24 | * Zhi Wang <zhi.a.wang@intel.com> |
25 | * |
26 | * Contributors: |
27 | * Ping Gao <ping.a.gao@intel.com> |
28 | * Tina Zhang <tina.zhang@intel.com> |
29 | * Chanbin Du <changbin.du@intel.com> |
30 | * Min He <min.he@intel.com> |
31 | * Bing Niu <bing.niu@intel.com> |
32 | * Zhenyu Wang <zhenyuw@linux.intel.com> |
33 | * |
34 | */ |
35 | |
36 | #include <linux/kthread.h> |
37 | |
38 | #include "gem/i915_gem_pm.h" |
39 | #include "gt/intel_context.h" |
40 | #include "gt/intel_execlists_submission.h" |
41 | #include "gt/intel_gt_regs.h" |
42 | #include "gt/intel_lrc.h" |
43 | #include "gt/intel_ring.h" |
44 | |
45 | #include "i915_drv.h" |
46 | #include "i915_gem_gtt.h" |
47 | #include "i915_perf_oa_regs.h" |
48 | #include "gvt.h" |
49 | |
50 | #define RING_CTX_OFF(x) \ |
51 | offsetof(struct execlist_ring_context, x) |
52 | |
53 | static void set_context_pdp_root_pointer( |
54 | struct execlist_ring_context *ring_context, |
55 | u32 pdp[8]) |
56 | { |
57 | int i; |
58 | |
59 | for (i = 0; i < 8; i++) |
60 | ring_context->pdps[i].val = pdp[7 - i]; |
61 | } |
62 | |
63 | static void update_shadow_pdps(struct intel_vgpu_workload *workload) |
64 | { |
65 | struct execlist_ring_context *shadow_ring_context; |
66 | struct intel_context *ctx = workload->req->context; |
67 | |
68 | if (WARN_ON(!workload->shadow_mm)) |
69 | return; |
70 | |
71 | if (WARN_ON(!atomic_read(&workload->shadow_mm->pincount))) |
72 | return; |
73 | |
74 | shadow_ring_context = (struct execlist_ring_context *)ctx->lrc_reg_state; |
75 | set_context_pdp_root_pointer(ring_context: shadow_ring_context, |
76 | pdp: (void *)workload->shadow_mm->ppgtt_mm.shadow_pdps); |
77 | } |
78 | |
79 | /* |
80 | * when populating shadow ctx from guest, we should not overrride oa related |
81 | * registers, so that they will not be overlapped by guest oa configs. Thus |
82 | * made it possible to capture oa data from host for both host and guests. |
83 | */ |
84 | static void sr_oa_regs(struct intel_vgpu_workload *workload, |
85 | u32 *reg_state, bool save) |
86 | { |
87 | struct drm_i915_private *dev_priv = workload->vgpu->gvt->gt->i915; |
88 | u32 ctx_oactxctrl = dev_priv->perf.ctx_oactxctrl_offset; |
89 | u32 ctx_flexeu0 = dev_priv->perf.ctx_flexeu0_offset; |
90 | int i = 0; |
91 | u32 flex_mmio[] = { |
92 | i915_mmio_reg_offset(EU_PERF_CNTL0), |
93 | i915_mmio_reg_offset(EU_PERF_CNTL1), |
94 | i915_mmio_reg_offset(EU_PERF_CNTL2), |
95 | i915_mmio_reg_offset(EU_PERF_CNTL3), |
96 | i915_mmio_reg_offset(EU_PERF_CNTL4), |
97 | i915_mmio_reg_offset(EU_PERF_CNTL5), |
98 | i915_mmio_reg_offset(EU_PERF_CNTL6), |
99 | }; |
100 | |
101 | if (workload->engine->id != RCS0) |
102 | return; |
103 | |
104 | if (save) { |
105 | workload->oactxctrl = reg_state[ctx_oactxctrl + 1]; |
106 | |
107 | for (i = 0; i < ARRAY_SIZE(workload->flex_mmio); i++) { |
108 | u32 state_offset = ctx_flexeu0 + i * 2; |
109 | |
110 | workload->flex_mmio[i] = reg_state[state_offset + 1]; |
111 | } |
112 | } else { |
113 | reg_state[ctx_oactxctrl] = |
114 | i915_mmio_reg_offset(GEN8_OACTXCONTROL); |
115 | reg_state[ctx_oactxctrl + 1] = workload->oactxctrl; |
116 | |
117 | for (i = 0; i < ARRAY_SIZE(workload->flex_mmio); i++) { |
118 | u32 state_offset = ctx_flexeu0 + i * 2; |
119 | u32 mmio = flex_mmio[i]; |
120 | |
121 | reg_state[state_offset] = mmio; |
122 | reg_state[state_offset + 1] = workload->flex_mmio[i]; |
123 | } |
124 | } |
125 | } |
126 | |
127 | static int populate_shadow_context(struct intel_vgpu_workload *workload) |
128 | { |
129 | struct intel_vgpu *vgpu = workload->vgpu; |
130 | struct intel_gvt *gvt = vgpu->gvt; |
131 | struct intel_context *ctx = workload->req->context; |
132 | struct execlist_ring_context *shadow_ring_context; |
133 | void *dst; |
134 | void *context_base; |
135 | unsigned long context_gpa, context_page_num; |
136 | unsigned long gpa_base; /* first gpa of consecutive GPAs */ |
137 | unsigned long gpa_size; /* size of consecutive GPAs */ |
138 | struct intel_vgpu_submission *s = &vgpu->submission; |
139 | int i; |
140 | bool skip = false; |
141 | int ring_id = workload->engine->id; |
142 | int ret; |
143 | |
144 | GEM_BUG_ON(!intel_context_is_pinned(ctx)); |
145 | |
146 | context_base = (void *) ctx->lrc_reg_state - |
147 | (LRC_STATE_PN << I915_GTT_PAGE_SHIFT); |
148 | |
149 | shadow_ring_context = (void *) ctx->lrc_reg_state; |
150 | |
151 | sr_oa_regs(workload, reg_state: (u32 *)shadow_ring_context, save: true); |
152 | #define COPY_REG(name) \ |
153 | intel_gvt_read_gpa(vgpu, workload->ring_context_gpa \ |
154 | + RING_CTX_OFF(name.val), &shadow_ring_context->name.val, 4) |
155 | #define COPY_REG_MASKED(name) {\ |
156 | intel_gvt_read_gpa(vgpu, workload->ring_context_gpa \ |
157 | + RING_CTX_OFF(name.val),\ |
158 | &shadow_ring_context->name.val, 4);\ |
159 | shadow_ring_context->name.val |= 0xffff << 16;\ |
160 | } |
161 | |
162 | COPY_REG_MASKED(ctx_ctrl); |
163 | COPY_REG(ctx_timestamp); |
164 | |
165 | if (workload->engine->id == RCS0) { |
166 | COPY_REG(bb_per_ctx_ptr); |
167 | COPY_REG(rcs_indirect_ctx); |
168 | COPY_REG(rcs_indirect_ctx_offset); |
169 | } else if (workload->engine->id == BCS0) |
170 | intel_gvt_read_gpa(vgpu, |
171 | gpa: workload->ring_context_gpa + |
172 | BCS_TILE_REGISTER_VAL_OFFSET, |
173 | buf: (void *)shadow_ring_context + |
174 | BCS_TILE_REGISTER_VAL_OFFSET, len: 4); |
175 | #undef COPY_REG |
176 | #undef COPY_REG_MASKED |
177 | |
178 | /* don't copy Ring Context (the first 0x50 dwords), |
179 | * only copy the Engine Context part from guest |
180 | */ |
181 | intel_gvt_read_gpa(vgpu, |
182 | gpa: workload->ring_context_gpa + |
183 | RING_CTX_SIZE, |
184 | buf: (void *)shadow_ring_context + |
185 | RING_CTX_SIZE, |
186 | I915_GTT_PAGE_SIZE - RING_CTX_SIZE); |
187 | |
188 | sr_oa_regs(workload, reg_state: (u32 *)shadow_ring_context, save: false); |
189 | |
190 | gvt_dbg_sched("ring %s workload lrca %x, ctx_id %x, ctx gpa %llx" , |
191 | workload->engine->name, workload->ctx_desc.lrca, |
192 | workload->ctx_desc.context_id, |
193 | workload->ring_context_gpa); |
194 | |
195 | /* only need to ensure this context is not pinned/unpinned during the |
196 | * period from last submission to this this submission. |
197 | * Upon reaching this function, the currently submitted context is not |
198 | * supposed to get unpinned. If a misbehaving guest driver ever does |
199 | * this, it would corrupt itself. |
200 | */ |
201 | if (s->last_ctx[ring_id].valid && |
202 | (s->last_ctx[ring_id].lrca == |
203 | workload->ctx_desc.lrca) && |
204 | (s->last_ctx[ring_id].ring_context_gpa == |
205 | workload->ring_context_gpa)) |
206 | skip = true; |
207 | |
208 | s->last_ctx[ring_id].lrca = workload->ctx_desc.lrca; |
209 | s->last_ctx[ring_id].ring_context_gpa = workload->ring_context_gpa; |
210 | |
211 | if (IS_RESTORE_INHIBIT(shadow_ring_context->ctx_ctrl.val) || skip) |
212 | return 0; |
213 | |
214 | s->last_ctx[ring_id].valid = false; |
215 | context_page_num = workload->engine->context_size; |
216 | context_page_num = context_page_num >> PAGE_SHIFT; |
217 | |
218 | if (IS_BROADWELL(gvt->gt->i915) && workload->engine->id == RCS0) |
219 | context_page_num = 19; |
220 | |
221 | /* find consecutive GPAs from gma until the first inconsecutive GPA. |
222 | * read from the continuous GPAs into dst virtual address |
223 | */ |
224 | gpa_size = 0; |
225 | for (i = 2; i < context_page_num; i++) { |
226 | context_gpa = intel_vgpu_gma_to_gpa(mm: vgpu->gtt.ggtt_mm, |
227 | gma: (u32)((workload->ctx_desc.lrca + i) << |
228 | I915_GTT_PAGE_SHIFT)); |
229 | if (context_gpa == INTEL_GVT_INVALID_ADDR) { |
230 | gvt_vgpu_err("Invalid guest context descriptor\n" ); |
231 | return -EFAULT; |
232 | } |
233 | |
234 | if (gpa_size == 0) { |
235 | gpa_base = context_gpa; |
236 | dst = context_base + (i << I915_GTT_PAGE_SHIFT); |
237 | } else if (context_gpa != gpa_base + gpa_size) |
238 | goto read; |
239 | |
240 | gpa_size += I915_GTT_PAGE_SIZE; |
241 | |
242 | if (i == context_page_num - 1) |
243 | goto read; |
244 | |
245 | continue; |
246 | |
247 | read: |
248 | intel_gvt_read_gpa(vgpu, gpa: gpa_base, buf: dst, len: gpa_size); |
249 | gpa_base = context_gpa; |
250 | gpa_size = I915_GTT_PAGE_SIZE; |
251 | dst = context_base + (i << I915_GTT_PAGE_SHIFT); |
252 | } |
253 | ret = intel_gvt_scan_engine_context(workload); |
254 | if (ret) { |
255 | gvt_vgpu_err("invalid cmd found in guest context pages\n" ); |
256 | return ret; |
257 | } |
258 | s->last_ctx[ring_id].valid = true; |
259 | return 0; |
260 | } |
261 | |
262 | static inline bool is_gvt_request(struct i915_request *rq) |
263 | { |
264 | return intel_context_force_single_submission(ce: rq->context); |
265 | } |
266 | |
267 | static void save_ring_hw_state(struct intel_vgpu *vgpu, |
268 | const struct intel_engine_cs *engine) |
269 | { |
270 | struct intel_uncore *uncore = engine->uncore; |
271 | i915_reg_t reg; |
272 | |
273 | reg = RING_INSTDONE(engine->mmio_base); |
274 | vgpu_vreg(vgpu, i915_mmio_reg_offset(reg)) = |
275 | intel_uncore_read(uncore, reg); |
276 | |
277 | reg = RING_ACTHD(engine->mmio_base); |
278 | vgpu_vreg(vgpu, i915_mmio_reg_offset(reg)) = |
279 | intel_uncore_read(uncore, reg); |
280 | |
281 | reg = RING_ACTHD_UDW(engine->mmio_base); |
282 | vgpu_vreg(vgpu, i915_mmio_reg_offset(reg)) = |
283 | intel_uncore_read(uncore, reg); |
284 | } |
285 | |
286 | static int shadow_context_status_change(struct notifier_block *nb, |
287 | unsigned long action, void *data) |
288 | { |
289 | struct i915_request *rq = data; |
290 | struct intel_gvt *gvt = container_of(nb, struct intel_gvt, |
291 | shadow_ctx_notifier_block[rq->engine->id]); |
292 | struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; |
293 | enum intel_engine_id ring_id = rq->engine->id; |
294 | struct intel_vgpu_workload *workload; |
295 | unsigned long flags; |
296 | |
297 | if (!is_gvt_request(rq)) { |
298 | spin_lock_irqsave(&scheduler->mmio_context_lock, flags); |
299 | if (action == INTEL_CONTEXT_SCHEDULE_IN && |
300 | scheduler->engine_owner[ring_id]) { |
301 | /* Switch ring from vGPU to host. */ |
302 | intel_gvt_switch_mmio(pre: scheduler->engine_owner[ring_id], |
303 | NULL, engine: rq->engine); |
304 | scheduler->engine_owner[ring_id] = NULL; |
305 | } |
306 | spin_unlock_irqrestore(lock: &scheduler->mmio_context_lock, flags); |
307 | |
308 | return NOTIFY_OK; |
309 | } |
310 | |
311 | workload = scheduler->current_workload[ring_id]; |
312 | if (unlikely(!workload)) |
313 | return NOTIFY_OK; |
314 | |
315 | switch (action) { |
316 | case INTEL_CONTEXT_SCHEDULE_IN: |
317 | spin_lock_irqsave(&scheduler->mmio_context_lock, flags); |
318 | if (workload->vgpu != scheduler->engine_owner[ring_id]) { |
319 | /* Switch ring from host to vGPU or vGPU to vGPU. */ |
320 | intel_gvt_switch_mmio(pre: scheduler->engine_owner[ring_id], |
321 | next: workload->vgpu, engine: rq->engine); |
322 | scheduler->engine_owner[ring_id] = workload->vgpu; |
323 | } else |
324 | gvt_dbg_sched("skip ring %d mmio switch for vgpu%d\n" , |
325 | ring_id, workload->vgpu->id); |
326 | spin_unlock_irqrestore(lock: &scheduler->mmio_context_lock, flags); |
327 | atomic_set(v: &workload->shadow_ctx_active, i: 1); |
328 | break; |
329 | case INTEL_CONTEXT_SCHEDULE_OUT: |
330 | save_ring_hw_state(vgpu: workload->vgpu, engine: rq->engine); |
331 | atomic_set(v: &workload->shadow_ctx_active, i: 0); |
332 | break; |
333 | case INTEL_CONTEXT_SCHEDULE_PREEMPTED: |
334 | save_ring_hw_state(vgpu: workload->vgpu, engine: rq->engine); |
335 | break; |
336 | default: |
337 | WARN_ON(1); |
338 | return NOTIFY_OK; |
339 | } |
340 | wake_up(&workload->shadow_ctx_status_wq); |
341 | return NOTIFY_OK; |
342 | } |
343 | |
344 | static void |
345 | shadow_context_descriptor_update(struct intel_context *ce, |
346 | struct intel_vgpu_workload *workload) |
347 | { |
348 | u64 desc = ce->lrc.desc; |
349 | |
350 | /* |
351 | * Update bits 0-11 of the context descriptor which includes flags |
352 | * like GEN8_CTX_* cached in desc_template |
353 | */ |
354 | desc &= ~(0x3ull << GEN8_CTX_ADDRESSING_MODE_SHIFT); |
355 | desc |= (u64)workload->ctx_desc.addressing_mode << |
356 | GEN8_CTX_ADDRESSING_MODE_SHIFT; |
357 | |
358 | ce->lrc.desc = desc; |
359 | } |
360 | |
361 | static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload) |
362 | { |
363 | struct intel_vgpu *vgpu = workload->vgpu; |
364 | struct i915_request *req = workload->req; |
365 | void *shadow_ring_buffer_va; |
366 | u32 *cs; |
367 | int err; |
368 | |
369 | if (GRAPHICS_VER(req->engine->i915) == 9 && is_inhibit_context(ce: req->context)) |
370 | intel_vgpu_restore_inhibit_context(vgpu, req); |
371 | |
372 | /* |
373 | * To track whether a request has started on HW, we can emit a |
374 | * breadcrumb at the beginning of the request and check its |
375 | * timeline's HWSP to see if the breadcrumb has advanced past the |
376 | * start of this request. Actually, the request must have the |
377 | * init_breadcrumb if its timeline set has_init_bread_crumb, or the |
378 | * scheduler might get a wrong state of it during reset. Since the |
379 | * requests from gvt always set the has_init_breadcrumb flag, here |
380 | * need to do the emit_init_breadcrumb for all the requests. |
381 | */ |
382 | if (req->engine->emit_init_breadcrumb) { |
383 | err = req->engine->emit_init_breadcrumb(req); |
384 | if (err) { |
385 | gvt_vgpu_err("fail to emit init breadcrumb\n" ); |
386 | return err; |
387 | } |
388 | } |
389 | |
390 | /* allocate shadow ring buffer */ |
391 | cs = intel_ring_begin(rq: workload->req, num_dwords: workload->rb_len / sizeof(u32)); |
392 | if (IS_ERR(ptr: cs)) { |
393 | gvt_vgpu_err("fail to alloc size =%ld shadow ring buffer\n" , |
394 | workload->rb_len); |
395 | return PTR_ERR(ptr: cs); |
396 | } |
397 | |
398 | shadow_ring_buffer_va = workload->shadow_ring_buffer_va; |
399 | |
400 | /* get shadow ring buffer va */ |
401 | workload->shadow_ring_buffer_va = cs; |
402 | |
403 | memcpy(cs, shadow_ring_buffer_va, |
404 | workload->rb_len); |
405 | |
406 | cs += workload->rb_len / sizeof(u32); |
407 | intel_ring_advance(rq: workload->req, cs); |
408 | |
409 | return 0; |
410 | } |
411 | |
412 | static void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) |
413 | { |
414 | if (!wa_ctx->indirect_ctx.obj) |
415 | return; |
416 | |
417 | i915_gem_object_lock(obj: wa_ctx->indirect_ctx.obj, NULL); |
418 | i915_gem_object_unpin_map(obj: wa_ctx->indirect_ctx.obj); |
419 | i915_gem_object_unlock(obj: wa_ctx->indirect_ctx.obj); |
420 | i915_gem_object_put(obj: wa_ctx->indirect_ctx.obj); |
421 | |
422 | wa_ctx->indirect_ctx.obj = NULL; |
423 | wa_ctx->indirect_ctx.shadow_va = NULL; |
424 | } |
425 | |
426 | static void set_dma_address(struct i915_page_directory *pd, dma_addr_t addr) |
427 | { |
428 | struct scatterlist *sg = pd->pt.base->mm.pages->sgl; |
429 | |
430 | /* This is not a good idea */ |
431 | sg->dma_address = addr; |
432 | } |
433 | |
434 | static void set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload, |
435 | struct intel_context *ce) |
436 | { |
437 | struct intel_vgpu_mm *mm = workload->shadow_mm; |
438 | struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm: ce->vm); |
439 | int i = 0; |
440 | |
441 | if (mm->ppgtt_mm.root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) { |
442 | set_dma_address(pd: ppgtt->pd, addr: mm->ppgtt_mm.shadow_pdps[0]); |
443 | } else { |
444 | for (i = 0; i < GVT_RING_CTX_NR_PDPS; i++) { |
445 | struct i915_page_directory * const pd = |
446 | i915_pd_entry(pdp: ppgtt->pd, n: i); |
447 | /* skip now as current i915 ppgtt alloc won't allocate |
448 | top level pdp for non 4-level table, won't impact |
449 | shadow ppgtt. */ |
450 | if (!pd) |
451 | break; |
452 | |
453 | set_dma_address(pd, addr: mm->ppgtt_mm.shadow_pdps[i]); |
454 | } |
455 | } |
456 | } |
457 | |
458 | static int |
459 | intel_gvt_workload_req_alloc(struct intel_vgpu_workload *workload) |
460 | { |
461 | struct intel_vgpu *vgpu = workload->vgpu; |
462 | struct intel_vgpu_submission *s = &vgpu->submission; |
463 | struct i915_request *rq; |
464 | |
465 | if (workload->req) |
466 | return 0; |
467 | |
468 | rq = i915_request_create(ce: s->shadow[workload->engine->id]); |
469 | if (IS_ERR(ptr: rq)) { |
470 | gvt_vgpu_err("fail to allocate gem request\n" ); |
471 | return PTR_ERR(ptr: rq); |
472 | } |
473 | |
474 | workload->req = i915_request_get(rq); |
475 | return 0; |
476 | } |
477 | |
478 | /** |
479 | * intel_gvt_scan_and_shadow_workload - audit the workload by scanning and |
480 | * shadow it as well, include ringbuffer,wa_ctx and ctx. |
481 | * @workload: an abstract entity for each execlist submission. |
482 | * |
483 | * This function is called before the workload submitting to i915, to make |
484 | * sure the content of the workload is valid. |
485 | */ |
486 | int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) |
487 | { |
488 | struct intel_vgpu *vgpu = workload->vgpu; |
489 | struct intel_vgpu_submission *s = &vgpu->submission; |
490 | int ret; |
491 | |
492 | lockdep_assert_held(&vgpu->vgpu_lock); |
493 | |
494 | if (workload->shadow) |
495 | return 0; |
496 | |
497 | if (!test_and_set_bit(nr: workload->engine->id, addr: s->shadow_ctx_desc_updated)) |
498 | shadow_context_descriptor_update(ce: s->shadow[workload->engine->id], |
499 | workload); |
500 | |
501 | ret = intel_gvt_scan_and_shadow_ringbuffer(workload); |
502 | if (ret) |
503 | return ret; |
504 | |
505 | if (workload->engine->id == RCS0 && |
506 | workload->wa_ctx.indirect_ctx.size) { |
507 | ret = intel_gvt_scan_and_shadow_wa_ctx(wa_ctx: &workload->wa_ctx); |
508 | if (ret) |
509 | goto err_shadow; |
510 | } |
511 | |
512 | workload->shadow = true; |
513 | return 0; |
514 | |
515 | err_shadow: |
516 | release_shadow_wa_ctx(wa_ctx: &workload->wa_ctx); |
517 | return ret; |
518 | } |
519 | |
520 | static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload); |
521 | |
522 | static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload) |
523 | { |
524 | struct intel_gvt *gvt = workload->vgpu->gvt; |
525 | const int gmadr_bytes = gvt->device_info.gmadr_bytes_in_cmd; |
526 | struct intel_vgpu_shadow_bb *bb; |
527 | struct i915_gem_ww_ctx ww; |
528 | int ret; |
529 | |
530 | list_for_each_entry(bb, &workload->shadow_bb, list) { |
531 | /* For privilge batch buffer and not wa_ctx, the bb_start_cmd_va |
532 | * is only updated into ring_scan_buffer, not real ring address |
533 | * allocated in later copy_workload_to_ring_buffer. pls be noted |
534 | * shadow_ring_buffer_va is now pointed to real ring buffer va |
535 | * in copy_workload_to_ring_buffer. |
536 | */ |
537 | |
538 | if (bb->bb_offset) |
539 | bb->bb_start_cmd_va = workload->shadow_ring_buffer_va |
540 | + bb->bb_offset; |
541 | |
542 | /* |
543 | * For non-priv bb, scan&shadow is only for |
544 | * debugging purpose, so the content of shadow bb |
545 | * is the same as original bb. Therefore, |
546 | * here, rather than switch to shadow bb's gma |
547 | * address, we directly use original batch buffer's |
548 | * gma address, and send original bb to hardware |
549 | * directly |
550 | */ |
551 | if (!bb->ppgtt) { |
552 | i915_gem_ww_ctx_init(ctx: &ww, intr: false); |
553 | retry: |
554 | i915_gem_object_lock(obj: bb->obj, ww: &ww); |
555 | |
556 | bb->vma = i915_gem_object_ggtt_pin_ww(obj: bb->obj, ww: &ww, |
557 | NULL, size: 0, alignment: 0, flags: 0); |
558 | if (IS_ERR(ptr: bb->vma)) { |
559 | ret = PTR_ERR(ptr: bb->vma); |
560 | if (ret == -EDEADLK) { |
561 | ret = i915_gem_ww_ctx_backoff(ctx: &ww); |
562 | if (!ret) |
563 | goto retry; |
564 | } |
565 | goto err; |
566 | } |
567 | |
568 | /* relocate shadow batch buffer */ |
569 | bb->bb_start_cmd_va[1] = i915_ggtt_offset(vma: bb->vma); |
570 | if (gmadr_bytes == 8) |
571 | bb->bb_start_cmd_va[2] = 0; |
572 | |
573 | ret = i915_vma_move_to_active(vma: bb->vma, rq: workload->req, |
574 | __EXEC_OBJECT_NO_REQUEST_AWAIT); |
575 | if (ret) |
576 | goto err; |
577 | |
578 | /* No one is going to touch shadow bb from now on. */ |
579 | i915_gem_object_flush_map(obj: bb->obj); |
580 | i915_gem_ww_ctx_fini(ctx: &ww); |
581 | } |
582 | } |
583 | return 0; |
584 | err: |
585 | i915_gem_ww_ctx_fini(ctx: &ww); |
586 | release_shadow_batch_buffer(workload); |
587 | return ret; |
588 | } |
589 | |
590 | static void update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx) |
591 | { |
592 | struct intel_vgpu_workload *workload = |
593 | container_of(wa_ctx, struct intel_vgpu_workload, wa_ctx); |
594 | struct i915_request *rq = workload->req; |
595 | struct execlist_ring_context *shadow_ring_context = |
596 | (struct execlist_ring_context *)rq->context->lrc_reg_state; |
597 | |
598 | shadow_ring_context->bb_per_ctx_ptr.val = |
599 | (shadow_ring_context->bb_per_ctx_ptr.val & |
600 | (~PER_CTX_ADDR_MASK)) | wa_ctx->per_ctx.shadow_gma; |
601 | shadow_ring_context->rcs_indirect_ctx.val = |
602 | (shadow_ring_context->rcs_indirect_ctx.val & |
603 | (~INDIRECT_CTX_ADDR_MASK)) | wa_ctx->indirect_ctx.shadow_gma; |
604 | } |
605 | |
606 | static int prepare_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) |
607 | { |
608 | struct i915_vma *vma; |
609 | unsigned char *per_ctx_va = |
610 | (unsigned char *)wa_ctx->indirect_ctx.shadow_va + |
611 | wa_ctx->indirect_ctx.size; |
612 | struct i915_gem_ww_ctx ww; |
613 | int ret; |
614 | |
615 | if (wa_ctx->indirect_ctx.size == 0) |
616 | return 0; |
617 | |
618 | i915_gem_ww_ctx_init(ctx: &ww, intr: false); |
619 | retry: |
620 | i915_gem_object_lock(obj: wa_ctx->indirect_ctx.obj, ww: &ww); |
621 | |
622 | vma = i915_gem_object_ggtt_pin_ww(obj: wa_ctx->indirect_ctx.obj, ww: &ww, NULL, |
623 | size: 0, CACHELINE_BYTES, flags: 0); |
624 | if (IS_ERR(ptr: vma)) { |
625 | ret = PTR_ERR(ptr: vma); |
626 | if (ret == -EDEADLK) { |
627 | ret = i915_gem_ww_ctx_backoff(ctx: &ww); |
628 | if (!ret) |
629 | goto retry; |
630 | } |
631 | return ret; |
632 | } |
633 | |
634 | i915_gem_ww_ctx_fini(ctx: &ww); |
635 | |
636 | /* FIXME: we are not tracking our pinned VMA leaving it |
637 | * up to the core to fix up the stray pin_count upon |
638 | * free. |
639 | */ |
640 | |
641 | wa_ctx->indirect_ctx.shadow_gma = i915_ggtt_offset(vma); |
642 | |
643 | wa_ctx->per_ctx.shadow_gma = *((unsigned int *)per_ctx_va + 1); |
644 | memset(per_ctx_va, 0, CACHELINE_BYTES); |
645 | |
646 | update_wa_ctx_2_shadow_ctx(wa_ctx); |
647 | return 0; |
648 | } |
649 | |
650 | static void update_vreg_in_ctx(struct intel_vgpu_workload *workload) |
651 | { |
652 | vgpu_vreg_t(workload->vgpu, RING_START(workload->engine->mmio_base)) = |
653 | workload->rb_start; |
654 | } |
655 | |
656 | static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload) |
657 | { |
658 | struct intel_vgpu_shadow_bb *bb, *pos; |
659 | |
660 | if (list_empty(head: &workload->shadow_bb)) |
661 | return; |
662 | |
663 | bb = list_first_entry(&workload->shadow_bb, |
664 | struct intel_vgpu_shadow_bb, list); |
665 | |
666 | list_for_each_entry_safe(bb, pos, &workload->shadow_bb, list) { |
667 | if (bb->obj) { |
668 | i915_gem_object_lock(obj: bb->obj, NULL); |
669 | if (bb->va && !IS_ERR(ptr: bb->va)) |
670 | i915_gem_object_unpin_map(obj: bb->obj); |
671 | |
672 | if (bb->vma && !IS_ERR(ptr: bb->vma)) |
673 | i915_vma_unpin(vma: bb->vma); |
674 | |
675 | i915_gem_object_unlock(obj: bb->obj); |
676 | i915_gem_object_put(obj: bb->obj); |
677 | } |
678 | list_del(entry: &bb->list); |
679 | kfree(objp: bb); |
680 | } |
681 | } |
682 | |
683 | static int |
684 | intel_vgpu_shadow_mm_pin(struct intel_vgpu_workload *workload) |
685 | { |
686 | struct intel_vgpu *vgpu = workload->vgpu; |
687 | struct intel_vgpu_mm *m; |
688 | int ret = 0; |
689 | |
690 | ret = intel_vgpu_pin_mm(mm: workload->shadow_mm); |
691 | if (ret) { |
692 | gvt_vgpu_err("fail to vgpu pin mm\n" ); |
693 | return ret; |
694 | } |
695 | |
696 | if (workload->shadow_mm->type != INTEL_GVT_MM_PPGTT || |
697 | !workload->shadow_mm->ppgtt_mm.shadowed) { |
698 | intel_vgpu_unpin_mm(mm: workload->shadow_mm); |
699 | gvt_vgpu_err("workload shadow ppgtt isn't ready\n" ); |
700 | return -EINVAL; |
701 | } |
702 | |
703 | if (!list_empty(head: &workload->lri_shadow_mm)) { |
704 | list_for_each_entry(m, &workload->lri_shadow_mm, |
705 | ppgtt_mm.link) { |
706 | ret = intel_vgpu_pin_mm(mm: m); |
707 | if (ret) { |
708 | list_for_each_entry_from_reverse(m, |
709 | &workload->lri_shadow_mm, |
710 | ppgtt_mm.link) |
711 | intel_vgpu_unpin_mm(mm: m); |
712 | gvt_vgpu_err("LRI shadow ppgtt fail to pin\n" ); |
713 | break; |
714 | } |
715 | } |
716 | } |
717 | |
718 | if (ret) |
719 | intel_vgpu_unpin_mm(mm: workload->shadow_mm); |
720 | |
721 | return ret; |
722 | } |
723 | |
724 | static void |
725 | intel_vgpu_shadow_mm_unpin(struct intel_vgpu_workload *workload) |
726 | { |
727 | struct intel_vgpu_mm *m; |
728 | |
729 | if (!list_empty(head: &workload->lri_shadow_mm)) { |
730 | list_for_each_entry(m, &workload->lri_shadow_mm, |
731 | ppgtt_mm.link) |
732 | intel_vgpu_unpin_mm(mm: m); |
733 | } |
734 | intel_vgpu_unpin_mm(mm: workload->shadow_mm); |
735 | } |
736 | |
737 | static int prepare_workload(struct intel_vgpu_workload *workload) |
738 | { |
739 | struct intel_vgpu *vgpu = workload->vgpu; |
740 | struct intel_vgpu_submission *s = &vgpu->submission; |
741 | int ret = 0; |
742 | |
743 | ret = intel_vgpu_shadow_mm_pin(workload); |
744 | if (ret) { |
745 | gvt_vgpu_err("fail to pin shadow mm\n" ); |
746 | return ret; |
747 | } |
748 | |
749 | update_shadow_pdps(workload); |
750 | |
751 | set_context_ppgtt_from_shadow(workload, ce: s->shadow[workload->engine->id]); |
752 | |
753 | ret = intel_vgpu_sync_oos_pages(vgpu: workload->vgpu); |
754 | if (ret) { |
755 | gvt_vgpu_err("fail to vgpu sync oos pages\n" ); |
756 | goto err_unpin_mm; |
757 | } |
758 | |
759 | ret = intel_vgpu_flush_post_shadow(vgpu: workload->vgpu); |
760 | if (ret) { |
761 | gvt_vgpu_err("fail to flush post shadow\n" ); |
762 | goto err_unpin_mm; |
763 | } |
764 | |
765 | ret = copy_workload_to_ring_buffer(workload); |
766 | if (ret) { |
767 | gvt_vgpu_err("fail to generate request\n" ); |
768 | goto err_unpin_mm; |
769 | } |
770 | |
771 | ret = prepare_shadow_batch_buffer(workload); |
772 | if (ret) { |
773 | gvt_vgpu_err("fail to prepare_shadow_batch_buffer\n" ); |
774 | goto err_unpin_mm; |
775 | } |
776 | |
777 | ret = prepare_shadow_wa_ctx(wa_ctx: &workload->wa_ctx); |
778 | if (ret) { |
779 | gvt_vgpu_err("fail to prepare_shadow_wa_ctx\n" ); |
780 | goto err_shadow_batch; |
781 | } |
782 | |
783 | if (workload->prepare) { |
784 | ret = workload->prepare(workload); |
785 | if (ret) |
786 | goto err_shadow_wa_ctx; |
787 | } |
788 | |
789 | return 0; |
790 | err_shadow_wa_ctx: |
791 | release_shadow_wa_ctx(wa_ctx: &workload->wa_ctx); |
792 | err_shadow_batch: |
793 | release_shadow_batch_buffer(workload); |
794 | err_unpin_mm: |
795 | intel_vgpu_shadow_mm_unpin(workload); |
796 | return ret; |
797 | } |
798 | |
799 | static int dispatch_workload(struct intel_vgpu_workload *workload) |
800 | { |
801 | struct intel_vgpu *vgpu = workload->vgpu; |
802 | struct i915_request *rq; |
803 | int ret; |
804 | |
805 | gvt_dbg_sched("ring id %s prepare to dispatch workload %p\n" , |
806 | workload->engine->name, workload); |
807 | |
808 | mutex_lock(&vgpu->vgpu_lock); |
809 | |
810 | ret = intel_gvt_workload_req_alloc(workload); |
811 | if (ret) |
812 | goto err_req; |
813 | |
814 | ret = intel_gvt_scan_and_shadow_workload(workload); |
815 | if (ret) |
816 | goto out; |
817 | |
818 | ret = populate_shadow_context(workload); |
819 | if (ret) { |
820 | release_shadow_wa_ctx(wa_ctx: &workload->wa_ctx); |
821 | goto out; |
822 | } |
823 | |
824 | ret = prepare_workload(workload); |
825 | out: |
826 | if (ret) { |
827 | /* We might still need to add request with |
828 | * clean ctx to retire it properly.. |
829 | */ |
830 | rq = fetch_and_zero(&workload->req); |
831 | i915_request_put(rq); |
832 | } |
833 | |
834 | if (!IS_ERR_OR_NULL(ptr: workload->req)) { |
835 | gvt_dbg_sched("ring id %s submit workload to i915 %p\n" , |
836 | workload->engine->name, workload->req); |
837 | i915_request_add(rq: workload->req); |
838 | workload->dispatched = true; |
839 | } |
840 | err_req: |
841 | if (ret) |
842 | workload->status = ret; |
843 | mutex_unlock(lock: &vgpu->vgpu_lock); |
844 | return ret; |
845 | } |
846 | |
847 | static struct intel_vgpu_workload * |
848 | pick_next_workload(struct intel_gvt *gvt, struct intel_engine_cs *engine) |
849 | { |
850 | struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; |
851 | struct intel_vgpu_workload *workload = NULL; |
852 | |
853 | mutex_lock(&gvt->sched_lock); |
854 | |
855 | /* |
856 | * no current vgpu / will be scheduled out / no workload |
857 | * bail out |
858 | */ |
859 | if (!scheduler->current_vgpu) { |
860 | gvt_dbg_sched("ring %s stop - no current vgpu\n" , engine->name); |
861 | goto out; |
862 | } |
863 | |
864 | if (scheduler->need_reschedule) { |
865 | gvt_dbg_sched("ring %s stop - will reschedule\n" , engine->name); |
866 | goto out; |
867 | } |
868 | |
869 | if (!test_bit(INTEL_VGPU_STATUS_ACTIVE, |
870 | scheduler->current_vgpu->status) || |
871 | list_empty(workload_q_head(scheduler->current_vgpu, engine))) |
872 | goto out; |
873 | |
874 | /* |
875 | * still have current workload, maybe the workload disptacher |
876 | * fail to submit it for some reason, resubmit it. |
877 | */ |
878 | if (scheduler->current_workload[engine->id]) { |
879 | workload = scheduler->current_workload[engine->id]; |
880 | gvt_dbg_sched("ring %s still have current workload %p\n" , |
881 | engine->name, workload); |
882 | goto out; |
883 | } |
884 | |
885 | /* |
886 | * pick a workload as current workload |
887 | * once current workload is set, schedule policy routines |
888 | * will wait the current workload is finished when trying to |
889 | * schedule out a vgpu. |
890 | */ |
891 | scheduler->current_workload[engine->id] = |
892 | list_first_entry(workload_q_head(scheduler->current_vgpu, |
893 | engine), |
894 | struct intel_vgpu_workload, list); |
895 | |
896 | workload = scheduler->current_workload[engine->id]; |
897 | |
898 | gvt_dbg_sched("ring %s pick new workload %p\n" , engine->name, workload); |
899 | |
900 | atomic_inc(v: &workload->vgpu->submission.running_workload_num); |
901 | out: |
902 | mutex_unlock(lock: &gvt->sched_lock); |
903 | return workload; |
904 | } |
905 | |
906 | static void update_guest_pdps(struct intel_vgpu *vgpu, |
907 | u64 ring_context_gpa, u32 pdp[8]) |
908 | { |
909 | u64 gpa; |
910 | int i; |
911 | |
912 | gpa = ring_context_gpa + RING_CTX_OFF(pdps[0].val); |
913 | |
914 | for (i = 0; i < 8; i++) |
915 | intel_gvt_write_gpa(vgpu, gpa: gpa + i * 8, buf: &pdp[7 - i], len: 4); |
916 | } |
917 | |
918 | static __maybe_unused bool |
919 | check_shadow_context_ppgtt(struct execlist_ring_context *c, struct intel_vgpu_mm *m) |
920 | { |
921 | if (m->ppgtt_mm.root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) { |
922 | u64 shadow_pdp = c->pdps[7].val | (u64) c->pdps[6].val << 32; |
923 | |
924 | if (shadow_pdp != m->ppgtt_mm.shadow_pdps[0]) { |
925 | gvt_dbg_mm("4-level context ppgtt not match LRI command\n" ); |
926 | return false; |
927 | } |
928 | return true; |
929 | } else { |
930 | /* see comment in LRI handler in cmd_parser.c */ |
931 | gvt_dbg_mm("invalid shadow mm type\n" ); |
932 | return false; |
933 | } |
934 | } |
935 | |
936 | static void update_guest_context(struct intel_vgpu_workload *workload) |
937 | { |
938 | struct i915_request *rq = workload->req; |
939 | struct intel_vgpu *vgpu = workload->vgpu; |
940 | struct execlist_ring_context *shadow_ring_context; |
941 | struct intel_context *ctx = workload->req->context; |
942 | void *context_base; |
943 | void *src; |
944 | unsigned long context_gpa, context_page_num; |
945 | unsigned long gpa_base; /* first gpa of consecutive GPAs */ |
946 | unsigned long gpa_size; /* size of consecutive GPAs*/ |
947 | int i; |
948 | u32 ring_base; |
949 | u32 head, tail; |
950 | u16 wrap_count; |
951 | |
952 | gvt_dbg_sched("ring id %d workload lrca %x\n" , rq->engine->id, |
953 | workload->ctx_desc.lrca); |
954 | |
955 | GEM_BUG_ON(!intel_context_is_pinned(ctx)); |
956 | |
957 | head = workload->rb_head; |
958 | tail = workload->rb_tail; |
959 | wrap_count = workload->guest_rb_head >> RB_HEAD_WRAP_CNT_OFF; |
960 | |
961 | if (tail < head) { |
962 | if (wrap_count == RB_HEAD_WRAP_CNT_MAX) |
963 | wrap_count = 0; |
964 | else |
965 | wrap_count += 1; |
966 | } |
967 | |
968 | head = (wrap_count << RB_HEAD_WRAP_CNT_OFF) | tail; |
969 | |
970 | ring_base = rq->engine->mmio_base; |
971 | vgpu_vreg_t(vgpu, RING_TAIL(ring_base)) = tail; |
972 | vgpu_vreg_t(vgpu, RING_HEAD(ring_base)) = head; |
973 | |
974 | context_page_num = rq->engine->context_size; |
975 | context_page_num = context_page_num >> PAGE_SHIFT; |
976 | |
977 | if (IS_BROADWELL(rq->i915) && rq->engine->id == RCS0) |
978 | context_page_num = 19; |
979 | |
980 | context_base = (void *) ctx->lrc_reg_state - |
981 | (LRC_STATE_PN << I915_GTT_PAGE_SHIFT); |
982 | |
983 | /* find consecutive GPAs from gma until the first inconsecutive GPA. |
984 | * write to the consecutive GPAs from src virtual address |
985 | */ |
986 | gpa_size = 0; |
987 | for (i = 2; i < context_page_num; i++) { |
988 | context_gpa = intel_vgpu_gma_to_gpa(mm: vgpu->gtt.ggtt_mm, |
989 | gma: (u32)((workload->ctx_desc.lrca + i) << |
990 | I915_GTT_PAGE_SHIFT)); |
991 | if (context_gpa == INTEL_GVT_INVALID_ADDR) { |
992 | gvt_vgpu_err("invalid guest context descriptor\n" ); |
993 | return; |
994 | } |
995 | |
996 | if (gpa_size == 0) { |
997 | gpa_base = context_gpa; |
998 | src = context_base + (i << I915_GTT_PAGE_SHIFT); |
999 | } else if (context_gpa != gpa_base + gpa_size) |
1000 | goto write; |
1001 | |
1002 | gpa_size += I915_GTT_PAGE_SIZE; |
1003 | |
1004 | if (i == context_page_num - 1) |
1005 | goto write; |
1006 | |
1007 | continue; |
1008 | |
1009 | write: |
1010 | intel_gvt_write_gpa(vgpu, gpa: gpa_base, buf: src, len: gpa_size); |
1011 | gpa_base = context_gpa; |
1012 | gpa_size = I915_GTT_PAGE_SIZE; |
1013 | src = context_base + (i << I915_GTT_PAGE_SHIFT); |
1014 | } |
1015 | |
1016 | intel_gvt_write_gpa(vgpu, gpa: workload->ring_context_gpa + |
1017 | RING_CTX_OFF(ring_header.val), buf: &workload->rb_tail, len: 4); |
1018 | |
1019 | shadow_ring_context = (void *) ctx->lrc_reg_state; |
1020 | |
1021 | if (!list_empty(head: &workload->lri_shadow_mm)) { |
1022 | struct intel_vgpu_mm *m = list_last_entry(&workload->lri_shadow_mm, |
1023 | struct intel_vgpu_mm, |
1024 | ppgtt_mm.link); |
1025 | GEM_BUG_ON(!check_shadow_context_ppgtt(shadow_ring_context, m)); |
1026 | update_guest_pdps(vgpu, ring_context_gpa: workload->ring_context_gpa, |
1027 | pdp: (void *)m->ppgtt_mm.guest_pdps); |
1028 | } |
1029 | |
1030 | #define COPY_REG(name) \ |
1031 | intel_gvt_write_gpa(vgpu, workload->ring_context_gpa + \ |
1032 | RING_CTX_OFF(name.val), &shadow_ring_context->name.val, 4) |
1033 | |
1034 | COPY_REG(ctx_ctrl); |
1035 | COPY_REG(ctx_timestamp); |
1036 | |
1037 | #undef COPY_REG |
1038 | |
1039 | intel_gvt_write_gpa(vgpu, |
1040 | gpa: workload->ring_context_gpa + |
1041 | sizeof(*shadow_ring_context), |
1042 | buf: (void *)shadow_ring_context + |
1043 | sizeof(*shadow_ring_context), |
1044 | I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context)); |
1045 | } |
1046 | |
1047 | void intel_vgpu_clean_workloads(struct intel_vgpu *vgpu, |
1048 | intel_engine_mask_t engine_mask) |
1049 | { |
1050 | struct intel_vgpu_submission *s = &vgpu->submission; |
1051 | struct intel_engine_cs *engine; |
1052 | struct intel_vgpu_workload *pos, *n; |
1053 | intel_engine_mask_t tmp; |
1054 | |
1055 | /* free the unsubmited workloads in the queues. */ |
1056 | for_each_engine_masked(engine, vgpu->gvt->gt, engine_mask, tmp) { |
1057 | list_for_each_entry_safe(pos, n, |
1058 | &s->workload_q_head[engine->id], list) { |
1059 | list_del_init(entry: &pos->list); |
1060 | intel_vgpu_destroy_workload(workload: pos); |
1061 | } |
1062 | clear_bit(nr: engine->id, addr: s->shadow_ctx_desc_updated); |
1063 | } |
1064 | } |
1065 | |
1066 | static void complete_current_workload(struct intel_gvt *gvt, int ring_id) |
1067 | { |
1068 | struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; |
1069 | struct intel_vgpu_workload *workload = |
1070 | scheduler->current_workload[ring_id]; |
1071 | struct intel_vgpu *vgpu = workload->vgpu; |
1072 | struct intel_vgpu_submission *s = &vgpu->submission; |
1073 | struct i915_request *rq = workload->req; |
1074 | int event; |
1075 | |
1076 | mutex_lock(&vgpu->vgpu_lock); |
1077 | mutex_lock(&gvt->sched_lock); |
1078 | |
1079 | /* For the workload w/ request, needs to wait for the context |
1080 | * switch to make sure request is completed. |
1081 | * For the workload w/o request, directly complete the workload. |
1082 | */ |
1083 | if (rq) { |
1084 | wait_event(workload->shadow_ctx_status_wq, |
1085 | !atomic_read(&workload->shadow_ctx_active)); |
1086 | |
1087 | /* If this request caused GPU hang, req->fence.error will |
1088 | * be set to -EIO. Use -EIO to set workload status so |
1089 | * that when this request caused GPU hang, didn't trigger |
1090 | * context switch interrupt to guest. |
1091 | */ |
1092 | if (likely(workload->status == -EINPROGRESS)) { |
1093 | if (workload->req->fence.error == -EIO) |
1094 | workload->status = -EIO; |
1095 | else |
1096 | workload->status = 0; |
1097 | } |
1098 | |
1099 | if (!workload->status && |
1100 | !(vgpu->resetting_eng & BIT(ring_id))) { |
1101 | update_guest_context(workload); |
1102 | |
1103 | for_each_set_bit(event, workload->pending_events, |
1104 | INTEL_GVT_EVENT_MAX) |
1105 | intel_vgpu_trigger_virtual_event(vgpu, event); |
1106 | } |
1107 | |
1108 | i915_request_put(fetch_and_zero(&workload->req)); |
1109 | } |
1110 | |
1111 | gvt_dbg_sched("ring id %d complete workload %p status %d\n" , |
1112 | ring_id, workload, workload->status); |
1113 | |
1114 | scheduler->current_workload[ring_id] = NULL; |
1115 | |
1116 | list_del_init(entry: &workload->list); |
1117 | |
1118 | if (workload->status || vgpu->resetting_eng & BIT(ring_id)) { |
1119 | /* if workload->status is not successful means HW GPU |
1120 | * has occurred GPU hang or something wrong with i915/GVT, |
1121 | * and GVT won't inject context switch interrupt to guest. |
1122 | * So this error is a vGPU hang actually to the guest. |
1123 | * According to this we should emunlate a vGPU hang. If |
1124 | * there are pending workloads which are already submitted |
1125 | * from guest, we should clean them up like HW GPU does. |
1126 | * |
1127 | * if it is in middle of engine resetting, the pending |
1128 | * workloads won't be submitted to HW GPU and will be |
1129 | * cleaned up during the resetting process later, so doing |
1130 | * the workload clean up here doesn't have any impact. |
1131 | **/ |
1132 | intel_vgpu_clean_workloads(vgpu, BIT(ring_id)); |
1133 | } |
1134 | |
1135 | workload->complete(workload); |
1136 | |
1137 | intel_vgpu_shadow_mm_unpin(workload); |
1138 | intel_vgpu_destroy_workload(workload); |
1139 | |
1140 | atomic_dec(v: &s->running_workload_num); |
1141 | wake_up(&scheduler->workload_complete_wq); |
1142 | |
1143 | if (gvt->scheduler.need_reschedule) |
1144 | intel_gvt_request_service(gvt, service: INTEL_GVT_REQUEST_EVENT_SCHED); |
1145 | |
1146 | mutex_unlock(lock: &gvt->sched_lock); |
1147 | mutex_unlock(lock: &vgpu->vgpu_lock); |
1148 | } |
1149 | |
1150 | static int workload_thread(void *arg) |
1151 | { |
1152 | struct intel_engine_cs *engine = arg; |
1153 | const bool need_force_wake = GRAPHICS_VER(engine->i915) >= 9; |
1154 | struct intel_gvt *gvt = engine->i915->gvt; |
1155 | struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; |
1156 | struct intel_vgpu_workload *workload = NULL; |
1157 | struct intel_vgpu *vgpu = NULL; |
1158 | int ret; |
1159 | DEFINE_WAIT_FUNC(wait, woken_wake_function); |
1160 | |
1161 | gvt_dbg_core("workload thread for ring %s started\n" , engine->name); |
1162 | |
1163 | while (!kthread_should_stop()) { |
1164 | intel_wakeref_t wakeref; |
1165 | |
1166 | add_wait_queue(wq_head: &scheduler->waitq[engine->id], wq_entry: &wait); |
1167 | do { |
1168 | workload = pick_next_workload(gvt, engine); |
1169 | if (workload) |
1170 | break; |
1171 | wait_woken(wq_entry: &wait, TASK_INTERRUPTIBLE, |
1172 | MAX_SCHEDULE_TIMEOUT); |
1173 | } while (!kthread_should_stop()); |
1174 | remove_wait_queue(wq_head: &scheduler->waitq[engine->id], wq_entry: &wait); |
1175 | |
1176 | if (!workload) |
1177 | break; |
1178 | |
1179 | gvt_dbg_sched("ring %s next workload %p vgpu %d\n" , |
1180 | engine->name, workload, |
1181 | workload->vgpu->id); |
1182 | |
1183 | wakeref = intel_runtime_pm_get(rpm: engine->uncore->rpm); |
1184 | |
1185 | gvt_dbg_sched("ring %s will dispatch workload %p\n" , |
1186 | engine->name, workload); |
1187 | |
1188 | if (need_force_wake) |
1189 | intel_uncore_forcewake_get(uncore: engine->uncore, |
1190 | domains: FORCEWAKE_ALL); |
1191 | /* |
1192 | * Update the vReg of the vGPU which submitted this |
1193 | * workload. The vGPU may use these registers for checking |
1194 | * the context state. The value comes from GPU commands |
1195 | * in this workload. |
1196 | */ |
1197 | update_vreg_in_ctx(workload); |
1198 | |
1199 | ret = dispatch_workload(workload); |
1200 | |
1201 | if (ret) { |
1202 | vgpu = workload->vgpu; |
1203 | gvt_vgpu_err("fail to dispatch workload, skip\n" ); |
1204 | goto complete; |
1205 | } |
1206 | |
1207 | gvt_dbg_sched("ring %s wait workload %p\n" , |
1208 | engine->name, workload); |
1209 | i915_request_wait(rq: workload->req, flags: 0, MAX_SCHEDULE_TIMEOUT); |
1210 | |
1211 | complete: |
1212 | gvt_dbg_sched("will complete workload %p, status: %d\n" , |
1213 | workload, workload->status); |
1214 | |
1215 | complete_current_workload(gvt, ring_id: engine->id); |
1216 | |
1217 | if (need_force_wake) |
1218 | intel_uncore_forcewake_put(uncore: engine->uncore, |
1219 | domains: FORCEWAKE_ALL); |
1220 | |
1221 | intel_runtime_pm_put(rpm: engine->uncore->rpm, wref: wakeref); |
1222 | if (ret && (vgpu_is_vm_unhealthy(ret))) |
1223 | enter_failsafe_mode(vgpu, reason: GVT_FAILSAFE_GUEST_ERR); |
1224 | } |
1225 | return 0; |
1226 | } |
1227 | |
1228 | void intel_gvt_wait_vgpu_idle(struct intel_vgpu *vgpu) |
1229 | { |
1230 | struct intel_vgpu_submission *s = &vgpu->submission; |
1231 | struct intel_gvt *gvt = vgpu->gvt; |
1232 | struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; |
1233 | |
1234 | if (atomic_read(v: &s->running_workload_num)) { |
1235 | gvt_dbg_sched("wait vgpu idle\n" ); |
1236 | |
1237 | wait_event(scheduler->workload_complete_wq, |
1238 | !atomic_read(&s->running_workload_num)); |
1239 | } |
1240 | } |
1241 | |
1242 | void intel_gvt_clean_workload_scheduler(struct intel_gvt *gvt) |
1243 | { |
1244 | struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; |
1245 | struct intel_engine_cs *engine; |
1246 | enum intel_engine_id i; |
1247 | |
1248 | gvt_dbg_core("clean workload scheduler\n" ); |
1249 | |
1250 | for_each_engine(engine, gvt->gt, i) { |
1251 | atomic_notifier_chain_unregister( |
1252 | nh: &engine->context_status_notifier, |
1253 | nb: &gvt->shadow_ctx_notifier_block[i]); |
1254 | kthread_stop(k: scheduler->thread[i]); |
1255 | } |
1256 | } |
1257 | |
1258 | int intel_gvt_init_workload_scheduler(struct intel_gvt *gvt) |
1259 | { |
1260 | struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; |
1261 | struct intel_engine_cs *engine; |
1262 | enum intel_engine_id i; |
1263 | int ret; |
1264 | |
1265 | gvt_dbg_core("init workload scheduler\n" ); |
1266 | |
1267 | init_waitqueue_head(&scheduler->workload_complete_wq); |
1268 | |
1269 | for_each_engine(engine, gvt->gt, i) { |
1270 | init_waitqueue_head(&scheduler->waitq[i]); |
1271 | |
1272 | scheduler->thread[i] = kthread_run(workload_thread, engine, |
1273 | "gvt:%s" , engine->name); |
1274 | if (IS_ERR(ptr: scheduler->thread[i])) { |
1275 | gvt_err("fail to create workload thread\n" ); |
1276 | ret = PTR_ERR(ptr: scheduler->thread[i]); |
1277 | goto err; |
1278 | } |
1279 | |
1280 | gvt->shadow_ctx_notifier_block[i].notifier_call = |
1281 | shadow_context_status_change; |
1282 | atomic_notifier_chain_register(nh: &engine->context_status_notifier, |
1283 | nb: &gvt->shadow_ctx_notifier_block[i]); |
1284 | } |
1285 | |
1286 | return 0; |
1287 | |
1288 | err: |
1289 | intel_gvt_clean_workload_scheduler(gvt); |
1290 | return ret; |
1291 | } |
1292 | |
1293 | static void |
1294 | i915_context_ppgtt_root_restore(struct intel_vgpu_submission *s, |
1295 | struct i915_ppgtt *ppgtt) |
1296 | { |
1297 | int i; |
1298 | |
1299 | if (i915_vm_is_4lvl(vm: &ppgtt->vm)) { |
1300 | set_dma_address(pd: ppgtt->pd, addr: s->i915_context_pml4); |
1301 | } else { |
1302 | for (i = 0; i < GEN8_3LVL_PDPES; i++) { |
1303 | struct i915_page_directory * const pd = |
1304 | i915_pd_entry(pdp: ppgtt->pd, n: i); |
1305 | |
1306 | set_dma_address(pd, addr: s->i915_context_pdps[i]); |
1307 | } |
1308 | } |
1309 | } |
1310 | |
1311 | /** |
1312 | * intel_vgpu_clean_submission - free submission-related resource for vGPU |
1313 | * @vgpu: a vGPU |
1314 | * |
1315 | * This function is called when a vGPU is being destroyed. |
1316 | * |
1317 | */ |
1318 | void intel_vgpu_clean_submission(struct intel_vgpu *vgpu) |
1319 | { |
1320 | struct intel_vgpu_submission *s = &vgpu->submission; |
1321 | struct intel_engine_cs *engine; |
1322 | enum intel_engine_id id; |
1323 | |
1324 | intel_vgpu_select_submission_ops(vgpu, ALL_ENGINES, interface: 0); |
1325 | |
1326 | i915_context_ppgtt_root_restore(s, ppgtt: i915_vm_to_ppgtt(vm: s->shadow[0]->vm)); |
1327 | for_each_engine(engine, vgpu->gvt->gt, id) |
1328 | intel_context_put(ce: s->shadow[id]); |
1329 | |
1330 | kmem_cache_destroy(s: s->workloads); |
1331 | } |
1332 | |
1333 | |
1334 | /** |
1335 | * intel_vgpu_reset_submission - reset submission-related resource for vGPU |
1336 | * @vgpu: a vGPU |
1337 | * @engine_mask: engines expected to be reset |
1338 | * |
1339 | * This function is called when a vGPU is being destroyed. |
1340 | * |
1341 | */ |
1342 | void intel_vgpu_reset_submission(struct intel_vgpu *vgpu, |
1343 | intel_engine_mask_t engine_mask) |
1344 | { |
1345 | struct intel_vgpu_submission *s = &vgpu->submission; |
1346 | |
1347 | if (!s->active) |
1348 | return; |
1349 | |
1350 | intel_vgpu_clean_workloads(vgpu, engine_mask); |
1351 | s->ops->reset(vgpu, engine_mask); |
1352 | } |
1353 | |
1354 | static void |
1355 | i915_context_ppgtt_root_save(struct intel_vgpu_submission *s, |
1356 | struct i915_ppgtt *ppgtt) |
1357 | { |
1358 | int i; |
1359 | |
1360 | if (i915_vm_is_4lvl(vm: &ppgtt->vm)) { |
1361 | s->i915_context_pml4 = px_dma(ppgtt->pd); |
1362 | } else { |
1363 | for (i = 0; i < GEN8_3LVL_PDPES; i++) { |
1364 | struct i915_page_directory * const pd = |
1365 | i915_pd_entry(pdp: ppgtt->pd, n: i); |
1366 | |
1367 | s->i915_context_pdps[i] = px_dma(pd); |
1368 | } |
1369 | } |
1370 | } |
1371 | |
1372 | /** |
1373 | * intel_vgpu_setup_submission - setup submission-related resource for vGPU |
1374 | * @vgpu: a vGPU |
1375 | * |
1376 | * This function is called when a vGPU is being created. |
1377 | * |
1378 | * Returns: |
1379 | * Zero on success, negative error code if failed. |
1380 | * |
1381 | */ |
1382 | int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) |
1383 | { |
1384 | struct drm_i915_private *i915 = vgpu->gvt->gt->i915; |
1385 | struct intel_vgpu_submission *s = &vgpu->submission; |
1386 | struct intel_engine_cs *engine; |
1387 | struct i915_ppgtt *ppgtt; |
1388 | enum intel_engine_id i; |
1389 | int ret; |
1390 | |
1391 | ppgtt = i915_ppgtt_create(gt: to_gt(i915), I915_BO_ALLOC_PM_EARLY); |
1392 | if (IS_ERR(ptr: ppgtt)) |
1393 | return PTR_ERR(ptr: ppgtt); |
1394 | |
1395 | i915_context_ppgtt_root_save(s, ppgtt); |
1396 | |
1397 | for_each_engine(engine, vgpu->gvt->gt, i) { |
1398 | struct intel_context *ce; |
1399 | |
1400 | INIT_LIST_HEAD(list: &s->workload_q_head[i]); |
1401 | s->shadow[i] = ERR_PTR(error: -EINVAL); |
1402 | |
1403 | ce = intel_context_create(engine); |
1404 | if (IS_ERR(ptr: ce)) { |
1405 | ret = PTR_ERR(ptr: ce); |
1406 | goto out_shadow_ctx; |
1407 | } |
1408 | |
1409 | i915_vm_put(vm: ce->vm); |
1410 | ce->vm = i915_vm_get(vm: &ppgtt->vm); |
1411 | intel_context_set_single_submission(ce); |
1412 | |
1413 | /* Max ring buffer size */ |
1414 | if (!intel_uc_wants_guc_submission(uc: &engine->gt->uc)) |
1415 | ce->ring_size = SZ_2M; |
1416 | |
1417 | s->shadow[i] = ce; |
1418 | } |
1419 | |
1420 | bitmap_zero(dst: s->shadow_ctx_desc_updated, nbits: I915_NUM_ENGINES); |
1421 | |
1422 | s->workloads = kmem_cache_create_usercopy(name: "gvt-g_vgpu_workload" , |
1423 | size: sizeof(struct intel_vgpu_workload), align: 0, |
1424 | SLAB_HWCACHE_ALIGN, |
1425 | offsetof(struct intel_vgpu_workload, rb_tail), |
1426 | sizeof_field(struct intel_vgpu_workload, rb_tail), |
1427 | NULL); |
1428 | |
1429 | if (!s->workloads) { |
1430 | ret = -ENOMEM; |
1431 | goto out_shadow_ctx; |
1432 | } |
1433 | |
1434 | atomic_set(v: &s->running_workload_num, i: 0); |
1435 | bitmap_zero(dst: s->tlb_handle_pending, nbits: I915_NUM_ENGINES); |
1436 | |
1437 | memset(s->last_ctx, 0, sizeof(s->last_ctx)); |
1438 | |
1439 | i915_vm_put(vm: &ppgtt->vm); |
1440 | return 0; |
1441 | |
1442 | out_shadow_ctx: |
1443 | i915_context_ppgtt_root_restore(s, ppgtt); |
1444 | for_each_engine(engine, vgpu->gvt->gt, i) { |
1445 | if (IS_ERR(ptr: s->shadow[i])) |
1446 | break; |
1447 | |
1448 | intel_context_put(ce: s->shadow[i]); |
1449 | } |
1450 | i915_vm_put(vm: &ppgtt->vm); |
1451 | return ret; |
1452 | } |
1453 | |
1454 | /** |
1455 | * intel_vgpu_select_submission_ops - select virtual submission interface |
1456 | * @vgpu: a vGPU |
1457 | * @engine_mask: either ALL_ENGINES or target engine mask |
1458 | * @interface: expected vGPU virtual submission interface |
1459 | * |
1460 | * This function is called when guest configures submission interface. |
1461 | * |
1462 | * Returns: |
1463 | * Zero on success, negative error code if failed. |
1464 | * |
1465 | */ |
1466 | int intel_vgpu_select_submission_ops(struct intel_vgpu *vgpu, |
1467 | intel_engine_mask_t engine_mask, |
1468 | unsigned int interface) |
1469 | { |
1470 | struct drm_i915_private *i915 = vgpu->gvt->gt->i915; |
1471 | struct intel_vgpu_submission *s = &vgpu->submission; |
1472 | const struct intel_vgpu_submission_ops *ops[] = { |
1473 | [INTEL_VGPU_EXECLIST_SUBMISSION] = |
1474 | &intel_vgpu_execlist_submission_ops, |
1475 | }; |
1476 | int ret; |
1477 | |
1478 | if (drm_WARN_ON(&i915->drm, interface >= ARRAY_SIZE(ops))) |
1479 | return -EINVAL; |
1480 | |
1481 | if (drm_WARN_ON(&i915->drm, |
1482 | interface == 0 && engine_mask != ALL_ENGINES)) |
1483 | return -EINVAL; |
1484 | |
1485 | if (s->active) |
1486 | s->ops->clean(vgpu, engine_mask); |
1487 | |
1488 | if (interface == 0) { |
1489 | s->ops = NULL; |
1490 | s->virtual_submission_interface = 0; |
1491 | s->active = false; |
1492 | gvt_dbg_core("vgpu%d: remove submission ops\n" , vgpu->id); |
1493 | return 0; |
1494 | } |
1495 | |
1496 | ret = ops[interface]->init(vgpu, engine_mask); |
1497 | if (ret) |
1498 | return ret; |
1499 | |
1500 | s->ops = ops[interface]; |
1501 | s->virtual_submission_interface = interface; |
1502 | s->active = true; |
1503 | |
1504 | gvt_dbg_core("vgpu%d: activate ops [ %s ]\n" , |
1505 | vgpu->id, s->ops->name); |
1506 | |
1507 | return 0; |
1508 | } |
1509 | |
1510 | /** |
1511 | * intel_vgpu_destroy_workload - destroy a vGPU workload |
1512 | * @workload: workload to destroy |
1513 | * |
1514 | * This function is called when destroy a vGPU workload. |
1515 | * |
1516 | */ |
1517 | void intel_vgpu_destroy_workload(struct intel_vgpu_workload *workload) |
1518 | { |
1519 | struct intel_vgpu_submission *s = &workload->vgpu->submission; |
1520 | |
1521 | intel_context_unpin(ce: s->shadow[workload->engine->id]); |
1522 | release_shadow_batch_buffer(workload); |
1523 | release_shadow_wa_ctx(wa_ctx: &workload->wa_ctx); |
1524 | |
1525 | if (!list_empty(head: &workload->lri_shadow_mm)) { |
1526 | struct intel_vgpu_mm *m, *mm; |
1527 | list_for_each_entry_safe(m, mm, &workload->lri_shadow_mm, |
1528 | ppgtt_mm.link) { |
1529 | list_del(entry: &m->ppgtt_mm.link); |
1530 | intel_vgpu_mm_put(mm: m); |
1531 | } |
1532 | } |
1533 | |
1534 | GEM_BUG_ON(!list_empty(&workload->lri_shadow_mm)); |
1535 | if (workload->shadow_mm) |
1536 | intel_vgpu_mm_put(mm: workload->shadow_mm); |
1537 | |
1538 | kmem_cache_free(s: s->workloads, objp: workload); |
1539 | } |
1540 | |
1541 | static struct intel_vgpu_workload * |
1542 | alloc_workload(struct intel_vgpu *vgpu) |
1543 | { |
1544 | struct intel_vgpu_submission *s = &vgpu->submission; |
1545 | struct intel_vgpu_workload *workload; |
1546 | |
1547 | workload = kmem_cache_zalloc(k: s->workloads, GFP_KERNEL); |
1548 | if (!workload) |
1549 | return ERR_PTR(error: -ENOMEM); |
1550 | |
1551 | INIT_LIST_HEAD(list: &workload->list); |
1552 | INIT_LIST_HEAD(list: &workload->shadow_bb); |
1553 | INIT_LIST_HEAD(list: &workload->lri_shadow_mm); |
1554 | |
1555 | init_waitqueue_head(&workload->shadow_ctx_status_wq); |
1556 | atomic_set(v: &workload->shadow_ctx_active, i: 0); |
1557 | |
1558 | workload->status = -EINPROGRESS; |
1559 | workload->vgpu = vgpu; |
1560 | |
1561 | return workload; |
1562 | } |
1563 | |
1564 | #define RING_CTX_OFF(x) \ |
1565 | offsetof(struct execlist_ring_context, x) |
1566 | |
1567 | static void read_guest_pdps(struct intel_vgpu *vgpu, |
1568 | u64 ring_context_gpa, u32 pdp[8]) |
1569 | { |
1570 | u64 gpa; |
1571 | int i; |
1572 | |
1573 | gpa = ring_context_gpa + RING_CTX_OFF(pdps[0].val); |
1574 | |
1575 | for (i = 0; i < 8; i++) |
1576 | intel_gvt_read_gpa(vgpu, |
1577 | gpa: gpa + i * 8, buf: &pdp[7 - i], len: 4); |
1578 | } |
1579 | |
1580 | static int prepare_mm(struct intel_vgpu_workload *workload) |
1581 | { |
1582 | struct execlist_ctx_descriptor_format *desc = &workload->ctx_desc; |
1583 | struct intel_vgpu_mm *mm; |
1584 | struct intel_vgpu *vgpu = workload->vgpu; |
1585 | enum intel_gvt_gtt_type root_entry_type; |
1586 | u64 pdps[GVT_RING_CTX_NR_PDPS]; |
1587 | |
1588 | switch (desc->addressing_mode) { |
1589 | case 1: /* legacy 32-bit */ |
1590 | root_entry_type = GTT_TYPE_PPGTT_ROOT_L3_ENTRY; |
1591 | break; |
1592 | case 3: /* legacy 64-bit */ |
1593 | root_entry_type = GTT_TYPE_PPGTT_ROOT_L4_ENTRY; |
1594 | break; |
1595 | default: |
1596 | gvt_vgpu_err("Advanced Context mode(SVM) is not supported!\n" ); |
1597 | return -EINVAL; |
1598 | } |
1599 | |
1600 | read_guest_pdps(vgpu: workload->vgpu, ring_context_gpa: workload->ring_context_gpa, pdp: (void *)pdps); |
1601 | |
1602 | mm = intel_vgpu_get_ppgtt_mm(vgpu: workload->vgpu, root_entry_type, pdps); |
1603 | if (IS_ERR(ptr: mm)) |
1604 | return PTR_ERR(ptr: mm); |
1605 | |
1606 | workload->shadow_mm = mm; |
1607 | return 0; |
1608 | } |
1609 | |
1610 | #define same_context(a, b) (((a)->context_id == (b)->context_id) && \ |
1611 | ((a)->lrca == (b)->lrca)) |
1612 | |
1613 | /** |
1614 | * intel_vgpu_create_workload - create a vGPU workload |
1615 | * @vgpu: a vGPU |
1616 | * @engine: the engine |
1617 | * @desc: a guest context descriptor |
1618 | * |
1619 | * This function is called when creating a vGPU workload. |
1620 | * |
1621 | * Returns: |
1622 | * struct intel_vgpu_workload * on success, negative error code in |
1623 | * pointer if failed. |
1624 | * |
1625 | */ |
1626 | struct intel_vgpu_workload * |
1627 | intel_vgpu_create_workload(struct intel_vgpu *vgpu, |
1628 | const struct intel_engine_cs *engine, |
1629 | struct execlist_ctx_descriptor_format *desc) |
1630 | { |
1631 | struct intel_vgpu_submission *s = &vgpu->submission; |
1632 | struct list_head *q = workload_q_head(vgpu, engine); |
1633 | struct intel_vgpu_workload *last_workload = NULL; |
1634 | struct intel_vgpu_workload *workload = NULL; |
1635 | u64 ring_context_gpa; |
1636 | u32 head, tail, start, ctl, ctx_ctl, per_ctx, indirect_ctx; |
1637 | u32 guest_head; |
1638 | int ret; |
1639 | |
1640 | ring_context_gpa = intel_vgpu_gma_to_gpa(mm: vgpu->gtt.ggtt_mm, |
1641 | gma: (u32)((desc->lrca + 1) << I915_GTT_PAGE_SHIFT)); |
1642 | if (ring_context_gpa == INTEL_GVT_INVALID_ADDR) { |
1643 | gvt_vgpu_err("invalid guest context LRCA: %x\n" , desc->lrca); |
1644 | return ERR_PTR(error: -EINVAL); |
1645 | } |
1646 | |
1647 | intel_gvt_read_gpa(vgpu, gpa: ring_context_gpa + |
1648 | RING_CTX_OFF(ring_header.val), buf: &head, len: 4); |
1649 | |
1650 | intel_gvt_read_gpa(vgpu, gpa: ring_context_gpa + |
1651 | RING_CTX_OFF(ring_tail.val), buf: &tail, len: 4); |
1652 | |
1653 | guest_head = head; |
1654 | |
1655 | head &= RB_HEAD_OFF_MASK; |
1656 | tail &= RB_TAIL_OFF_MASK; |
1657 | |
1658 | list_for_each_entry_reverse(last_workload, q, list) { |
1659 | |
1660 | if (same_context(&last_workload->ctx_desc, desc)) { |
1661 | gvt_dbg_el("ring %s cur workload == last\n" , |
1662 | engine->name); |
1663 | gvt_dbg_el("ctx head %x real head %lx\n" , head, |
1664 | last_workload->rb_tail); |
1665 | /* |
1666 | * cannot use guest context head pointer here, |
1667 | * as it might not be updated at this time |
1668 | */ |
1669 | head = last_workload->rb_tail; |
1670 | break; |
1671 | } |
1672 | } |
1673 | |
1674 | gvt_dbg_el("ring %s begin a new workload\n" , engine->name); |
1675 | |
1676 | /* record some ring buffer register values for scan and shadow */ |
1677 | intel_gvt_read_gpa(vgpu, gpa: ring_context_gpa + |
1678 | RING_CTX_OFF(rb_start.val), buf: &start, len: 4); |
1679 | intel_gvt_read_gpa(vgpu, gpa: ring_context_gpa + |
1680 | RING_CTX_OFF(rb_ctrl.val), buf: &ctl, len: 4); |
1681 | intel_gvt_read_gpa(vgpu, gpa: ring_context_gpa + |
1682 | RING_CTX_OFF(ctx_ctrl.val), buf: &ctx_ctl, len: 4); |
1683 | |
1684 | if (!intel_gvt_ggtt_validate_range(vgpu, addr: start, |
1685 | _RING_CTL_BUF_SIZE(ctl))) { |
1686 | gvt_vgpu_err("context contain invalid rb at: 0x%x\n" , start); |
1687 | return ERR_PTR(error: -EINVAL); |
1688 | } |
1689 | |
1690 | workload = alloc_workload(vgpu); |
1691 | if (IS_ERR(ptr: workload)) |
1692 | return workload; |
1693 | |
1694 | workload->engine = engine; |
1695 | workload->ctx_desc = *desc; |
1696 | workload->ring_context_gpa = ring_context_gpa; |
1697 | workload->rb_head = head; |
1698 | workload->guest_rb_head = guest_head; |
1699 | workload->rb_tail = tail; |
1700 | workload->rb_start = start; |
1701 | workload->rb_ctl = ctl; |
1702 | |
1703 | if (engine->id == RCS0) { |
1704 | intel_gvt_read_gpa(vgpu, gpa: ring_context_gpa + |
1705 | RING_CTX_OFF(bb_per_ctx_ptr.val), buf: &per_ctx, len: 4); |
1706 | intel_gvt_read_gpa(vgpu, gpa: ring_context_gpa + |
1707 | RING_CTX_OFF(rcs_indirect_ctx.val), buf: &indirect_ctx, len: 4); |
1708 | |
1709 | workload->wa_ctx.indirect_ctx.guest_gma = |
1710 | indirect_ctx & INDIRECT_CTX_ADDR_MASK; |
1711 | workload->wa_ctx.indirect_ctx.size = |
1712 | (indirect_ctx & INDIRECT_CTX_SIZE_MASK) * |
1713 | CACHELINE_BYTES; |
1714 | |
1715 | if (workload->wa_ctx.indirect_ctx.size != 0) { |
1716 | if (!intel_gvt_ggtt_validate_range(vgpu, |
1717 | addr: workload->wa_ctx.indirect_ctx.guest_gma, |
1718 | size: workload->wa_ctx.indirect_ctx.size)) { |
1719 | gvt_vgpu_err("invalid wa_ctx at: 0x%lx\n" , |
1720 | workload->wa_ctx.indirect_ctx.guest_gma); |
1721 | kmem_cache_free(s: s->workloads, objp: workload); |
1722 | return ERR_PTR(error: -EINVAL); |
1723 | } |
1724 | } |
1725 | |
1726 | workload->wa_ctx.per_ctx.guest_gma = |
1727 | per_ctx & PER_CTX_ADDR_MASK; |
1728 | workload->wa_ctx.per_ctx.valid = per_ctx & 1; |
1729 | if (workload->wa_ctx.per_ctx.valid) { |
1730 | if (!intel_gvt_ggtt_validate_range(vgpu, |
1731 | addr: workload->wa_ctx.per_ctx.guest_gma, |
1732 | CACHELINE_BYTES)) { |
1733 | gvt_vgpu_err("invalid per_ctx at: 0x%lx\n" , |
1734 | workload->wa_ctx.per_ctx.guest_gma); |
1735 | kmem_cache_free(s: s->workloads, objp: workload); |
1736 | return ERR_PTR(error: -EINVAL); |
1737 | } |
1738 | } |
1739 | } |
1740 | |
1741 | gvt_dbg_el("workload %p ring %s head %x tail %x start %x ctl %x\n" , |
1742 | workload, engine->name, head, tail, start, ctl); |
1743 | |
1744 | ret = prepare_mm(workload); |
1745 | if (ret) { |
1746 | kmem_cache_free(s: s->workloads, objp: workload); |
1747 | return ERR_PTR(error: ret); |
1748 | } |
1749 | |
1750 | /* Only scan and shadow the first workload in the queue |
1751 | * as there is only one pre-allocated buf-obj for shadow. |
1752 | */ |
1753 | if (list_empty(head: q)) { |
1754 | intel_wakeref_t wakeref; |
1755 | |
1756 | with_intel_runtime_pm(engine->gt->uncore->rpm, wakeref) |
1757 | ret = intel_gvt_scan_and_shadow_workload(workload); |
1758 | } |
1759 | |
1760 | if (ret) { |
1761 | if (vgpu_is_vm_unhealthy(ret)) |
1762 | enter_failsafe_mode(vgpu, reason: GVT_FAILSAFE_GUEST_ERR); |
1763 | intel_vgpu_destroy_workload(workload); |
1764 | return ERR_PTR(error: ret); |
1765 | } |
1766 | |
1767 | ret = intel_context_pin(ce: s->shadow[engine->id]); |
1768 | if (ret) { |
1769 | intel_vgpu_destroy_workload(workload); |
1770 | return ERR_PTR(error: ret); |
1771 | } |
1772 | |
1773 | return workload; |
1774 | } |
1775 | |
1776 | /** |
1777 | * intel_vgpu_queue_workload - Qeue a vGPU workload |
1778 | * @workload: the workload to queue in |
1779 | */ |
1780 | void intel_vgpu_queue_workload(struct intel_vgpu_workload *workload) |
1781 | { |
1782 | list_add_tail(new: &workload->list, |
1783 | workload_q_head(workload->vgpu, workload->engine)); |
1784 | intel_gvt_kick_schedule(gvt: workload->vgpu->gvt); |
1785 | wake_up(&workload->vgpu->gvt->scheduler.waitq[workload->engine->id]); |
1786 | } |
1787 | |