1 | // SPDX-License-Identifier: MIT |
2 | /* |
3 | * Copyright © 2008-2021 Intel Corporation |
4 | */ |
5 | |
6 | #include <drm/drm_cache.h> |
7 | |
8 | #include "gem/i915_gem_internal.h" |
9 | |
10 | #include "gen2_engine_cs.h" |
11 | #include "gen6_engine_cs.h" |
12 | #include "gen6_ppgtt.h" |
13 | #include "gen7_renderclear.h" |
14 | #include "i915_drv.h" |
15 | #include "i915_irq.h" |
16 | #include "i915_mitigations.h" |
17 | #include "i915_reg.h" |
18 | #include "intel_breadcrumbs.h" |
19 | #include "intel_context.h" |
20 | #include "intel_engine_regs.h" |
21 | #include "intel_gt.h" |
22 | #include "intel_gt_irq.h" |
23 | #include "intel_gt_regs.h" |
24 | #include "intel_reset.h" |
25 | #include "intel_ring.h" |
26 | #include "shmem_utils.h" |
27 | #include "intel_engine_heartbeat.h" |
28 | #include "intel_engine_pm.h" |
29 | |
30 | /* Rough estimate of the typical request size, performing a flush, |
31 | * set-context and then emitting the batch. |
32 | */ |
33 | #define LEGACY_REQUEST_SIZE 200 |
34 | |
35 | static void set_hwstam(struct intel_engine_cs *engine, u32 mask) |
36 | { |
37 | /* |
38 | * Keep the render interrupt unmasked as this papers over |
39 | * lost interrupts following a reset. |
40 | */ |
41 | if (engine->class == RENDER_CLASS) { |
42 | if (GRAPHICS_VER(engine->i915) >= 6) |
43 | mask &= ~BIT(0); |
44 | else |
45 | mask &= ~I915_USER_INTERRUPT; |
46 | } |
47 | |
48 | intel_engine_set_hwsp_writemask(engine, mask); |
49 | } |
50 | |
51 | static void set_hws_pga(struct intel_engine_cs *engine, phys_addr_t phys) |
52 | { |
53 | u32 addr; |
54 | |
55 | addr = lower_32_bits(phys); |
56 | if (GRAPHICS_VER(engine->i915) >= 4) |
57 | addr |= (phys >> 28) & 0xf0; |
58 | |
59 | intel_uncore_write(uncore: engine->uncore, HWS_PGA, val: addr); |
60 | } |
61 | |
62 | static struct page *status_page(struct intel_engine_cs *engine) |
63 | { |
64 | struct drm_i915_gem_object *obj = engine->status_page.vma->obj; |
65 | |
66 | GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); |
67 | return sg_page(sg: obj->mm.pages->sgl); |
68 | } |
69 | |
70 | static void ring_setup_phys_status_page(struct intel_engine_cs *engine) |
71 | { |
72 | set_hws_pga(engine, PFN_PHYS(page_to_pfn(status_page(engine)))); |
73 | set_hwstam(engine, mask: ~0u); |
74 | } |
75 | |
76 | static void set_hwsp(struct intel_engine_cs *engine, u32 offset) |
77 | { |
78 | i915_reg_t hwsp; |
79 | |
80 | /* |
81 | * The ring status page addresses are no longer next to the rest of |
82 | * the ring registers as of gen7. |
83 | */ |
84 | if (GRAPHICS_VER(engine->i915) == 7) { |
85 | switch (engine->id) { |
86 | /* |
87 | * No more rings exist on Gen7. Default case is only to shut up |
88 | * gcc switch check warning. |
89 | */ |
90 | default: |
91 | GEM_BUG_ON(engine->id); |
92 | fallthrough; |
93 | case RCS0: |
94 | hwsp = RENDER_HWS_PGA_GEN7; |
95 | break; |
96 | case BCS0: |
97 | hwsp = BLT_HWS_PGA_GEN7; |
98 | break; |
99 | case VCS0: |
100 | hwsp = BSD_HWS_PGA_GEN7; |
101 | break; |
102 | case VECS0: |
103 | hwsp = VEBOX_HWS_PGA_GEN7; |
104 | break; |
105 | } |
106 | } else if (GRAPHICS_VER(engine->i915) == 6) { |
107 | hwsp = RING_HWS_PGA_GEN6(engine->mmio_base); |
108 | } else { |
109 | hwsp = RING_HWS_PGA(engine->mmio_base); |
110 | } |
111 | |
112 | intel_uncore_write_fw(engine->uncore, hwsp, offset); |
113 | intel_uncore_posting_read_fw(engine->uncore, hwsp); |
114 | } |
115 | |
116 | static void flush_cs_tlb(struct intel_engine_cs *engine) |
117 | { |
118 | if (!IS_GRAPHICS_VER(engine->i915, 6, 7)) |
119 | return; |
120 | |
121 | /* ring should be idle before issuing a sync flush*/ |
122 | if ((ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0) |
123 | drm_warn(&engine->i915->drm, "%s not idle before sync flush!\n" , |
124 | engine->name); |
125 | |
126 | ENGINE_WRITE_FW(engine, RING_INSTPM, |
127 | _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE | |
128 | INSTPM_SYNC_FLUSH)); |
129 | if (__intel_wait_for_register_fw(uncore: engine->uncore, |
130 | RING_INSTPM(engine->mmio_base), |
131 | INSTPM_SYNC_FLUSH, value: 0, |
132 | fast_timeout_us: 2000, slow_timeout_ms: 0, NULL)) |
133 | ENGINE_TRACE(engine, |
134 | "wait for SyncFlush to complete for TLB invalidation timed out\n" ); |
135 | } |
136 | |
137 | static void ring_setup_status_page(struct intel_engine_cs *engine) |
138 | { |
139 | set_hwsp(engine, offset: i915_ggtt_offset(vma: engine->status_page.vma)); |
140 | set_hwstam(engine, mask: ~0u); |
141 | |
142 | flush_cs_tlb(engine); |
143 | } |
144 | |
145 | static struct i915_address_space *vm_alias(struct i915_address_space *vm) |
146 | { |
147 | if (i915_is_ggtt(vm)) |
148 | vm = &i915_vm_to_ggtt(vm)->alias->vm; |
149 | |
150 | return vm; |
151 | } |
152 | |
153 | static u32 pp_dir(struct i915_address_space *vm) |
154 | { |
155 | return to_gen6_ppgtt(base: i915_vm_to_ppgtt(vm))->pp_dir; |
156 | } |
157 | |
158 | static void set_pp_dir(struct intel_engine_cs *engine) |
159 | { |
160 | struct i915_address_space *vm = vm_alias(vm: engine->gt->vm); |
161 | |
162 | if (!vm) |
163 | return; |
164 | |
165 | ENGINE_WRITE_FW(engine, RING_PP_DIR_DCLV, PP_DIR_DCLV_2G); |
166 | ENGINE_WRITE_FW(engine, RING_PP_DIR_BASE, pp_dir(vm)); |
167 | |
168 | if (GRAPHICS_VER(engine->i915) >= 7) { |
169 | ENGINE_WRITE_FW(engine, |
170 | RING_MODE_GEN7, |
171 | _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); |
172 | } |
173 | } |
174 | |
175 | static bool stop_ring(struct intel_engine_cs *engine) |
176 | { |
177 | /* Empty the ring by skipping to the end */ |
178 | ENGINE_WRITE_FW(engine, RING_HEAD, ENGINE_READ_FW(engine, RING_TAIL)); |
179 | ENGINE_POSTING_READ(engine, RING_HEAD); |
180 | |
181 | /* The ring must be empty before it is disabled */ |
182 | ENGINE_WRITE_FW(engine, RING_CTL, 0); |
183 | ENGINE_POSTING_READ(engine, RING_CTL); |
184 | |
185 | /* Then reset the disabled ring */ |
186 | ENGINE_WRITE_FW(engine, RING_HEAD, 0); |
187 | ENGINE_WRITE_FW(engine, RING_TAIL, 0); |
188 | |
189 | return (ENGINE_READ_FW(engine, RING_HEAD) & HEAD_ADDR) == 0; |
190 | } |
191 | |
192 | static int xcs_resume(struct intel_engine_cs *engine) |
193 | { |
194 | struct intel_ring *ring = engine->legacy.ring; |
195 | |
196 | ENGINE_TRACE(engine, "ring:{HEAD:%04x, TAIL:%04x}\n" , |
197 | ring->head, ring->tail); |
198 | |
199 | /* |
200 | * Double check the ring is empty & disabled before we resume. Called |
201 | * from atomic context during PCI probe, so _hardirq(). |
202 | */ |
203 | intel_synchronize_hardirq(i915: engine->i915); |
204 | if (!stop_ring(engine)) |
205 | goto err; |
206 | |
207 | if (HWS_NEEDS_PHYSICAL(engine->i915)) |
208 | ring_setup_phys_status_page(engine); |
209 | else |
210 | ring_setup_status_page(engine); |
211 | |
212 | intel_breadcrumbs_reset(b: engine->breadcrumbs); |
213 | |
214 | /* Enforce ordering by reading HEAD register back */ |
215 | ENGINE_POSTING_READ(engine, RING_HEAD); |
216 | |
217 | /* |
218 | * Initialize the ring. This must happen _after_ we've cleared the ring |
219 | * registers with the above sequence (the readback of the HEAD registers |
220 | * also enforces ordering), otherwise the hw might lose the new ring |
221 | * register values. |
222 | */ |
223 | ENGINE_WRITE_FW(engine, RING_START, i915_ggtt_offset(ring->vma)); |
224 | |
225 | /* Check that the ring offsets point within the ring! */ |
226 | GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head)); |
227 | GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail)); |
228 | intel_ring_update_space(ring); |
229 | |
230 | set_pp_dir(engine); |
231 | |
232 | /* First wake the ring up to an empty/idle ring */ |
233 | ENGINE_WRITE_FW(engine, RING_HEAD, ring->head); |
234 | ENGINE_WRITE_FW(engine, RING_TAIL, ring->head); |
235 | ENGINE_POSTING_READ(engine, RING_TAIL); |
236 | |
237 | ENGINE_WRITE_FW(engine, RING_CTL, |
238 | RING_CTL_SIZE(ring->size) | RING_VALID); |
239 | |
240 | /* If the head is still not zero, the ring is dead */ |
241 | if (__intel_wait_for_register_fw(uncore: engine->uncore, |
242 | RING_CTL(engine->mmio_base), |
243 | RING_VALID, RING_VALID, |
244 | fast_timeout_us: 5000, slow_timeout_ms: 0, NULL)) |
245 | goto err; |
246 | |
247 | if (GRAPHICS_VER(engine->i915) > 2) |
248 | ENGINE_WRITE_FW(engine, |
249 | RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); |
250 | |
251 | /* Now awake, let it get started */ |
252 | if (ring->tail != ring->head) { |
253 | ENGINE_WRITE_FW(engine, RING_TAIL, ring->tail); |
254 | ENGINE_POSTING_READ(engine, RING_TAIL); |
255 | } |
256 | |
257 | /* Papering over lost _interrupts_ immediately following the restart */ |
258 | intel_engine_signal_breadcrumbs(engine); |
259 | return 0; |
260 | |
261 | err: |
262 | drm_err(&engine->i915->drm, |
263 | "%s initialization failed; " |
264 | "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n" , |
265 | engine->name, |
266 | ENGINE_READ(engine, RING_CTL), |
267 | ENGINE_READ(engine, RING_CTL) & RING_VALID, |
268 | ENGINE_READ(engine, RING_HEAD), ring->head, |
269 | ENGINE_READ(engine, RING_TAIL), ring->tail, |
270 | ENGINE_READ(engine, RING_START), |
271 | i915_ggtt_offset(ring->vma)); |
272 | return -EIO; |
273 | } |
274 | |
275 | static void sanitize_hwsp(struct intel_engine_cs *engine) |
276 | { |
277 | struct intel_timeline *tl; |
278 | |
279 | list_for_each_entry(tl, &engine->status_page.timelines, engine_link) |
280 | intel_timeline_reset_seqno(tl); |
281 | } |
282 | |
283 | static void xcs_sanitize(struct intel_engine_cs *engine) |
284 | { |
285 | /* |
286 | * Poison residual state on resume, in case the suspend didn't! |
287 | * |
288 | * We have to assume that across suspend/resume (or other loss |
289 | * of control) that the contents of our pinned buffers has been |
290 | * lost, replaced by garbage. Since this doesn't always happen, |
291 | * let's poison such state so that we more quickly spot when |
292 | * we falsely assume it has been preserved. |
293 | */ |
294 | if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) |
295 | memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE); |
296 | |
297 | /* |
298 | * The kernel_context HWSP is stored in the status_page. As above, |
299 | * that may be lost on resume/initialisation, and so we need to |
300 | * reset the value in the HWSP. |
301 | */ |
302 | sanitize_hwsp(engine); |
303 | |
304 | /* And scrub the dirty cachelines for the HWSP */ |
305 | drm_clflush_virt_range(addr: engine->status_page.addr, PAGE_SIZE); |
306 | |
307 | intel_engine_reset_pinned_contexts(engine); |
308 | } |
309 | |
310 | static void reset_prepare(struct intel_engine_cs *engine) |
311 | { |
312 | /* |
313 | * We stop engines, otherwise we might get failed reset and a |
314 | * dead gpu (on elk). Also as modern gpu as kbl can suffer |
315 | * from system hang if batchbuffer is progressing when |
316 | * the reset is issued, regardless of READY_TO_RESET ack. |
317 | * Thus assume it is best to stop engines on all gens |
318 | * where we have a gpu reset. |
319 | * |
320 | * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES) |
321 | * |
322 | * WaMediaResetMainRingCleanup:ctg,elk (presumably) |
323 | * WaClearRingBufHeadRegAtInit:ctg,elk |
324 | * |
325 | * FIXME: Wa for more modern gens needs to be validated |
326 | */ |
327 | ENGINE_TRACE(engine, "\n" ); |
328 | intel_engine_stop_cs(engine); |
329 | |
330 | if (!stop_ring(engine)) { |
331 | /* G45 ring initialization often fails to reset head to zero */ |
332 | ENGINE_TRACE(engine, |
333 | "HEAD not reset to zero, " |
334 | "{ CTL:%08x, HEAD:%08x, TAIL:%08x, START:%08x }\n" , |
335 | ENGINE_READ_FW(engine, RING_CTL), |
336 | ENGINE_READ_FW(engine, RING_HEAD), |
337 | ENGINE_READ_FW(engine, RING_TAIL), |
338 | ENGINE_READ_FW(engine, RING_START)); |
339 | if (!stop_ring(engine)) { |
340 | drm_err(&engine->i915->drm, |
341 | "failed to set %s head to zero " |
342 | "ctl %08x head %08x tail %08x start %08x\n" , |
343 | engine->name, |
344 | ENGINE_READ_FW(engine, RING_CTL), |
345 | ENGINE_READ_FW(engine, RING_HEAD), |
346 | ENGINE_READ_FW(engine, RING_TAIL), |
347 | ENGINE_READ_FW(engine, RING_START)); |
348 | } |
349 | } |
350 | } |
351 | |
352 | static void reset_rewind(struct intel_engine_cs *engine, bool stalled) |
353 | { |
354 | struct i915_request *pos, *rq; |
355 | unsigned long flags; |
356 | u32 head; |
357 | |
358 | rq = NULL; |
359 | spin_lock_irqsave(&engine->sched_engine->lock, flags); |
360 | rcu_read_lock(); |
361 | list_for_each_entry(pos, &engine->sched_engine->requests, sched.link) { |
362 | if (!__i915_request_is_complete(rq: pos)) { |
363 | rq = pos; |
364 | break; |
365 | } |
366 | } |
367 | rcu_read_unlock(); |
368 | |
369 | /* |
370 | * The guilty request will get skipped on a hung engine. |
371 | * |
372 | * Users of client default contexts do not rely on logical |
373 | * state preserved between batches so it is safe to execute |
374 | * queued requests following the hang. Non default contexts |
375 | * rely on preserved state, so skipping a batch loses the |
376 | * evolution of the state and it needs to be considered corrupted. |
377 | * Executing more queued batches on top of corrupted state is |
378 | * risky. But we take the risk by trying to advance through |
379 | * the queued requests in order to make the client behaviour |
380 | * more predictable around resets, by not throwing away random |
381 | * amount of batches it has prepared for execution. Sophisticated |
382 | * clients can use gem_reset_stats_ioctl and dma fence status |
383 | * (exported via sync_file info ioctl on explicit fences) to observe |
384 | * when it loses the context state and should rebuild accordingly. |
385 | * |
386 | * The context ban, and ultimately the client ban, mechanism are safety |
387 | * valves if client submission ends up resulting in nothing more than |
388 | * subsequent hangs. |
389 | */ |
390 | |
391 | if (rq) { |
392 | /* |
393 | * Try to restore the logical GPU state to match the |
394 | * continuation of the request queue. If we skip the |
395 | * context/PD restore, then the next request may try to execute |
396 | * assuming that its context is valid and loaded on the GPU and |
397 | * so may try to access invalid memory, prompting repeated GPU |
398 | * hangs. |
399 | * |
400 | * If the request was guilty, we still restore the logical |
401 | * state in case the next request requires it (e.g. the |
402 | * aliasing ppgtt), but skip over the hung batch. |
403 | * |
404 | * If the request was innocent, we try to replay the request |
405 | * with the restored context. |
406 | */ |
407 | __i915_request_reset(rq, guilty: stalled); |
408 | |
409 | GEM_BUG_ON(rq->ring != engine->legacy.ring); |
410 | head = rq->head; |
411 | } else { |
412 | head = engine->legacy.ring->tail; |
413 | } |
414 | engine->legacy.ring->head = intel_ring_wrap(ring: engine->legacy.ring, pos: head); |
415 | |
416 | spin_unlock_irqrestore(lock: &engine->sched_engine->lock, flags); |
417 | } |
418 | |
419 | static void reset_finish(struct intel_engine_cs *engine) |
420 | { |
421 | } |
422 | |
423 | static void reset_cancel(struct intel_engine_cs *engine) |
424 | { |
425 | struct i915_request *request; |
426 | unsigned long flags; |
427 | |
428 | spin_lock_irqsave(&engine->sched_engine->lock, flags); |
429 | |
430 | /* Mark all submitted requests as skipped. */ |
431 | list_for_each_entry(request, &engine->sched_engine->requests, sched.link) |
432 | i915_request_put(rq: i915_request_mark_eio(rq: request)); |
433 | intel_engine_signal_breadcrumbs(engine); |
434 | |
435 | /* Remaining _unready_ requests will be nop'ed when submitted */ |
436 | |
437 | spin_unlock_irqrestore(lock: &engine->sched_engine->lock, flags); |
438 | } |
439 | |
440 | static void i9xx_submit_request(struct i915_request *request) |
441 | { |
442 | i915_request_submit(request); |
443 | wmb(); /* paranoid flush writes out of the WCB before mmio */ |
444 | |
445 | ENGINE_WRITE(request->engine, RING_TAIL, |
446 | intel_ring_set_tail(request->ring, request->tail)); |
447 | } |
448 | |
449 | static void __ring_context_fini(struct intel_context *ce) |
450 | { |
451 | i915_vma_put(vma: ce->state); |
452 | } |
453 | |
454 | static void ring_context_destroy(struct kref *ref) |
455 | { |
456 | struct intel_context *ce = container_of(ref, typeof(*ce), ref); |
457 | |
458 | GEM_BUG_ON(intel_context_is_pinned(ce)); |
459 | |
460 | if (ce->state) |
461 | __ring_context_fini(ce); |
462 | |
463 | intel_context_fini(ce); |
464 | intel_context_free(ce); |
465 | } |
466 | |
467 | static int ring_context_init_default_state(struct intel_context *ce, |
468 | struct i915_gem_ww_ctx *ww) |
469 | { |
470 | struct drm_i915_gem_object *obj = ce->state->obj; |
471 | void *vaddr; |
472 | |
473 | vaddr = i915_gem_object_pin_map(obj, type: I915_MAP_WB); |
474 | if (IS_ERR(ptr: vaddr)) |
475 | return PTR_ERR(ptr: vaddr); |
476 | |
477 | shmem_read(file: ce->engine->default_state, off: 0, |
478 | dst: vaddr, len: ce->engine->context_size); |
479 | |
480 | i915_gem_object_flush_map(obj); |
481 | __i915_gem_object_release_map(obj); |
482 | |
483 | __set_bit(CONTEXT_VALID_BIT, &ce->flags); |
484 | return 0; |
485 | } |
486 | |
487 | static int ring_context_pre_pin(struct intel_context *ce, |
488 | struct i915_gem_ww_ctx *ww, |
489 | void **unused) |
490 | { |
491 | struct i915_address_space *vm; |
492 | int err = 0; |
493 | |
494 | if (ce->engine->default_state && |
495 | !test_bit(CONTEXT_VALID_BIT, &ce->flags)) { |
496 | err = ring_context_init_default_state(ce, ww); |
497 | if (err) |
498 | return err; |
499 | } |
500 | |
501 | vm = vm_alias(vm: ce->vm); |
502 | if (vm) |
503 | err = gen6_ppgtt_pin(base: i915_vm_to_ppgtt((vm)), ww); |
504 | |
505 | return err; |
506 | } |
507 | |
508 | static void __context_unpin_ppgtt(struct intel_context *ce) |
509 | { |
510 | struct i915_address_space *vm; |
511 | |
512 | vm = vm_alias(vm: ce->vm); |
513 | if (vm) |
514 | gen6_ppgtt_unpin(base: i915_vm_to_ppgtt(vm)); |
515 | } |
516 | |
517 | static void ring_context_unpin(struct intel_context *ce) |
518 | { |
519 | } |
520 | |
521 | static void ring_context_post_unpin(struct intel_context *ce) |
522 | { |
523 | __context_unpin_ppgtt(ce); |
524 | } |
525 | |
526 | static struct i915_vma * |
527 | alloc_context_vma(struct intel_engine_cs *engine) |
528 | { |
529 | struct drm_i915_private *i915 = engine->i915; |
530 | struct drm_i915_gem_object *obj; |
531 | struct i915_vma *vma; |
532 | int err; |
533 | |
534 | obj = i915_gem_object_create_shmem(i915, size: engine->context_size); |
535 | if (IS_ERR(ptr: obj)) |
536 | return ERR_CAST(ptr: obj); |
537 | |
538 | /* |
539 | * Try to make the context utilize L3 as well as LLC. |
540 | * |
541 | * On VLV we don't have L3 controls in the PTEs so we |
542 | * shouldn't touch the cache level, especially as that |
543 | * would make the object snooped which might have a |
544 | * negative performance impact. |
545 | * |
546 | * Snooping is required on non-llc platforms in execlist |
547 | * mode, but since all GGTT accesses use PAT entry 0 we |
548 | * get snooping anyway regardless of cache_level. |
549 | * |
550 | * This is only applicable for Ivy Bridge devices since |
551 | * later platforms don't have L3 control bits in the PTE. |
552 | */ |
553 | if (IS_IVYBRIDGE(i915)) |
554 | i915_gem_object_set_cache_coherency(obj, cache_level: I915_CACHE_L3_LLC); |
555 | |
556 | vma = i915_vma_instance(obj, vm: &engine->gt->ggtt->vm, NULL); |
557 | if (IS_ERR(ptr: vma)) { |
558 | err = PTR_ERR(ptr: vma); |
559 | goto err_obj; |
560 | } |
561 | |
562 | return vma; |
563 | |
564 | err_obj: |
565 | i915_gem_object_put(obj); |
566 | return ERR_PTR(error: err); |
567 | } |
568 | |
569 | static int ring_context_alloc(struct intel_context *ce) |
570 | { |
571 | struct intel_engine_cs *engine = ce->engine; |
572 | |
573 | /* One ringbuffer to rule them all */ |
574 | GEM_BUG_ON(!engine->legacy.ring); |
575 | ce->ring = engine->legacy.ring; |
576 | ce->timeline = intel_timeline_get(timeline: engine->legacy.timeline); |
577 | |
578 | GEM_BUG_ON(ce->state); |
579 | if (engine->context_size) { |
580 | struct i915_vma *vma; |
581 | |
582 | vma = alloc_context_vma(engine); |
583 | if (IS_ERR(ptr: vma)) |
584 | return PTR_ERR(ptr: vma); |
585 | |
586 | ce->state = vma; |
587 | } |
588 | |
589 | return 0; |
590 | } |
591 | |
592 | static int ring_context_pin(struct intel_context *ce, void *unused) |
593 | { |
594 | return 0; |
595 | } |
596 | |
597 | static void ring_context_reset(struct intel_context *ce) |
598 | { |
599 | intel_ring_reset(ring: ce->ring, tail: ce->ring->emit); |
600 | clear_bit(CONTEXT_VALID_BIT, addr: &ce->flags); |
601 | } |
602 | |
603 | static void ring_context_revoke(struct intel_context *ce, |
604 | struct i915_request *rq, |
605 | unsigned int preempt_timeout_ms) |
606 | { |
607 | struct intel_engine_cs *engine; |
608 | |
609 | if (!rq || !i915_request_is_active(rq)) |
610 | return; |
611 | |
612 | engine = rq->engine; |
613 | lockdep_assert_held(&engine->sched_engine->lock); |
614 | list_for_each_entry_continue(rq, &engine->sched_engine->requests, |
615 | sched.link) |
616 | if (rq->context == ce) { |
617 | i915_request_set_error_once(rq, error: -EIO); |
618 | __i915_request_skip(rq); |
619 | } |
620 | } |
621 | |
622 | static void ring_context_cancel_request(struct intel_context *ce, |
623 | struct i915_request *rq) |
624 | { |
625 | struct intel_engine_cs *engine = NULL; |
626 | |
627 | i915_request_active_engine(rq, active: &engine); |
628 | |
629 | if (engine && intel_engine_pulse(engine)) |
630 | intel_gt_handle_error(gt: engine->gt, engine_mask: engine->mask, flags: 0, |
631 | fmt: "request cancellation by %s" , |
632 | current->comm); |
633 | } |
634 | |
635 | static const struct intel_context_ops ring_context_ops = { |
636 | .alloc = ring_context_alloc, |
637 | |
638 | .cancel_request = ring_context_cancel_request, |
639 | |
640 | .revoke = ring_context_revoke, |
641 | |
642 | .pre_pin = ring_context_pre_pin, |
643 | .pin = ring_context_pin, |
644 | .unpin = ring_context_unpin, |
645 | .post_unpin = ring_context_post_unpin, |
646 | |
647 | .enter = intel_context_enter_engine, |
648 | .exit = intel_context_exit_engine, |
649 | |
650 | .reset = ring_context_reset, |
651 | .destroy = ring_context_destroy, |
652 | }; |
653 | |
654 | static int load_pd_dir(struct i915_request *rq, |
655 | struct i915_address_space *vm, |
656 | u32 valid) |
657 | { |
658 | const struct intel_engine_cs * const engine = rq->engine; |
659 | u32 *cs; |
660 | |
661 | cs = intel_ring_begin(rq, num_dwords: 12); |
662 | if (IS_ERR(ptr: cs)) |
663 | return PTR_ERR(ptr: cs); |
664 | |
665 | *cs++ = MI_LOAD_REGISTER_IMM(1); |
666 | *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base)); |
667 | *cs++ = valid; |
668 | |
669 | *cs++ = MI_LOAD_REGISTER_IMM(1); |
670 | *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base)); |
671 | *cs++ = pp_dir(vm); |
672 | |
673 | /* Stall until the page table load is complete? */ |
674 | *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; |
675 | *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base)); |
676 | *cs++ = intel_gt_scratch_offset(gt: engine->gt, |
677 | field: INTEL_GT_SCRATCH_FIELD_DEFAULT); |
678 | |
679 | *cs++ = MI_LOAD_REGISTER_IMM(1); |
680 | *cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base)); |
681 | *cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE); |
682 | |
683 | intel_ring_advance(rq, cs); |
684 | |
685 | return rq->engine->emit_flush(rq, EMIT_FLUSH); |
686 | } |
687 | |
688 | static int mi_set_context(struct i915_request *rq, |
689 | struct intel_context *ce, |
690 | u32 flags) |
691 | { |
692 | struct intel_engine_cs *engine = rq->engine; |
693 | struct drm_i915_private *i915 = engine->i915; |
694 | enum intel_engine_id id; |
695 | const int num_engines = |
696 | IS_HASWELL(i915) ? engine->gt->info.num_engines - 1 : 0; |
697 | bool force_restore = false; |
698 | int len; |
699 | u32 *cs; |
700 | |
701 | len = 4; |
702 | if (GRAPHICS_VER(i915) == 7) |
703 | len += 2 + (num_engines ? 4 * num_engines + 6 : 0); |
704 | else if (GRAPHICS_VER(i915) == 5) |
705 | len += 2; |
706 | if (flags & MI_FORCE_RESTORE) { |
707 | GEM_BUG_ON(flags & MI_RESTORE_INHIBIT); |
708 | flags &= ~MI_FORCE_RESTORE; |
709 | force_restore = true; |
710 | len += 2; |
711 | } |
712 | |
713 | cs = intel_ring_begin(rq, num_dwords: len); |
714 | if (IS_ERR(ptr: cs)) |
715 | return PTR_ERR(ptr: cs); |
716 | |
717 | /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */ |
718 | if (GRAPHICS_VER(i915) == 7) { |
719 | *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; |
720 | if (num_engines) { |
721 | struct intel_engine_cs *signaller; |
722 | |
723 | *cs++ = MI_LOAD_REGISTER_IMM(num_engines); |
724 | for_each_engine(signaller, engine->gt, id) { |
725 | if (signaller == engine) |
726 | continue; |
727 | |
728 | *cs++ = i915_mmio_reg_offset( |
729 | RING_PSMI_CTL(signaller->mmio_base)); |
730 | *cs++ = _MASKED_BIT_ENABLE( |
731 | GEN6_PSMI_SLEEP_MSG_DISABLE); |
732 | } |
733 | } |
734 | } else if (GRAPHICS_VER(i915) == 5) { |
735 | /* |
736 | * This w/a is only listed for pre-production ilk a/b steppings, |
737 | * but is also mentioned for programming the powerctx. To be |
738 | * safe, just apply the workaround; we do not use SyncFlush so |
739 | * this should never take effect and so be a no-op! |
740 | */ |
741 | *cs++ = MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN; |
742 | } |
743 | |
744 | if (force_restore) { |
745 | /* |
746 | * The HW doesn't handle being told to restore the current |
747 | * context very well. Quite often it likes goes to go off and |
748 | * sulk, especially when it is meant to be reloading PP_DIR. |
749 | * A very simple fix to force the reload is to simply switch |
750 | * away from the current context and back again. |
751 | * |
752 | * Note that the kernel_context will contain random state |
753 | * following the INHIBIT_RESTORE. We accept this since we |
754 | * never use the kernel_context state; it is merely a |
755 | * placeholder we use to flush other contexts. |
756 | */ |
757 | *cs++ = MI_SET_CONTEXT; |
758 | *cs++ = i915_ggtt_offset(vma: engine->kernel_context->state) | |
759 | MI_MM_SPACE_GTT | |
760 | MI_RESTORE_INHIBIT; |
761 | } |
762 | |
763 | *cs++ = MI_NOOP; |
764 | *cs++ = MI_SET_CONTEXT; |
765 | *cs++ = i915_ggtt_offset(vma: ce->state) | flags; |
766 | /* |
767 | * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP |
768 | * WaMiSetContext_Hang:snb,ivb,vlv |
769 | */ |
770 | *cs++ = MI_NOOP; |
771 | |
772 | if (GRAPHICS_VER(i915) == 7) { |
773 | if (num_engines) { |
774 | struct intel_engine_cs *signaller; |
775 | i915_reg_t last_reg = INVALID_MMIO_REG; /* keep gcc quiet */ |
776 | |
777 | *cs++ = MI_LOAD_REGISTER_IMM(num_engines); |
778 | for_each_engine(signaller, engine->gt, id) { |
779 | if (signaller == engine) |
780 | continue; |
781 | |
782 | last_reg = RING_PSMI_CTL(signaller->mmio_base); |
783 | *cs++ = i915_mmio_reg_offset(last_reg); |
784 | *cs++ = _MASKED_BIT_DISABLE( |
785 | GEN6_PSMI_SLEEP_MSG_DISABLE); |
786 | } |
787 | |
788 | /* Insert a delay before the next switch! */ |
789 | *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; |
790 | *cs++ = i915_mmio_reg_offset(last_reg); |
791 | *cs++ = intel_gt_scratch_offset(gt: engine->gt, |
792 | field: INTEL_GT_SCRATCH_FIELD_DEFAULT); |
793 | *cs++ = MI_NOOP; |
794 | } |
795 | *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; |
796 | } else if (GRAPHICS_VER(i915) == 5) { |
797 | *cs++ = MI_SUSPEND_FLUSH; |
798 | } |
799 | |
800 | intel_ring_advance(rq, cs); |
801 | |
802 | return 0; |
803 | } |
804 | |
805 | static int remap_l3_slice(struct i915_request *rq, int slice) |
806 | { |
807 | #define L3LOG_DW (GEN7_L3LOG_SIZE / sizeof(u32)) |
808 | u32 *cs, *remap_info = rq->i915->l3_parity.remap_info[slice]; |
809 | int i; |
810 | |
811 | if (!remap_info) |
812 | return 0; |
813 | |
814 | cs = intel_ring_begin(rq, L3LOG_DW * 2 + 2); |
815 | if (IS_ERR(ptr: cs)) |
816 | return PTR_ERR(ptr: cs); |
817 | |
818 | /* |
819 | * Note: We do not worry about the concurrent register cacheline hang |
820 | * here because no other code should access these registers other than |
821 | * at initialization time. |
822 | */ |
823 | *cs++ = MI_LOAD_REGISTER_IMM(L3LOG_DW); |
824 | for (i = 0; i < L3LOG_DW; i++) { |
825 | *cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i)); |
826 | *cs++ = remap_info[i]; |
827 | } |
828 | *cs++ = MI_NOOP; |
829 | intel_ring_advance(rq, cs); |
830 | |
831 | return 0; |
832 | #undef L3LOG_DW |
833 | } |
834 | |
835 | static int remap_l3(struct i915_request *rq) |
836 | { |
837 | struct i915_gem_context *ctx = i915_request_gem_context(rq); |
838 | int i, err; |
839 | |
840 | if (!ctx || !ctx->remap_slice) |
841 | return 0; |
842 | |
843 | for (i = 0; i < MAX_L3_SLICES; i++) { |
844 | if (!(ctx->remap_slice & BIT(i))) |
845 | continue; |
846 | |
847 | err = remap_l3_slice(rq, slice: i); |
848 | if (err) |
849 | return err; |
850 | } |
851 | |
852 | ctx->remap_slice = 0; |
853 | return 0; |
854 | } |
855 | |
856 | static int switch_mm(struct i915_request *rq, struct i915_address_space *vm) |
857 | { |
858 | int ret; |
859 | |
860 | if (!vm) |
861 | return 0; |
862 | |
863 | ret = rq->engine->emit_flush(rq, EMIT_FLUSH); |
864 | if (ret) |
865 | return ret; |
866 | |
867 | /* |
868 | * Not only do we need a full barrier (post-sync write) after |
869 | * invalidating the TLBs, but we need to wait a little bit |
870 | * longer. Whether this is merely delaying us, or the |
871 | * subsequent flush is a key part of serialising with the |
872 | * post-sync op, this extra pass appears vital before a |
873 | * mm switch! |
874 | */ |
875 | ret = load_pd_dir(rq, vm, PP_DIR_DCLV_2G); |
876 | if (ret) |
877 | return ret; |
878 | |
879 | return rq->engine->emit_flush(rq, EMIT_INVALIDATE); |
880 | } |
881 | |
882 | static int clear_residuals(struct i915_request *rq) |
883 | { |
884 | struct intel_engine_cs *engine = rq->engine; |
885 | int ret; |
886 | |
887 | ret = switch_mm(rq, vm: vm_alias(vm: engine->kernel_context->vm)); |
888 | if (ret) |
889 | return ret; |
890 | |
891 | if (engine->kernel_context->state) { |
892 | ret = mi_set_context(rq, |
893 | ce: engine->kernel_context, |
894 | MI_MM_SPACE_GTT | MI_RESTORE_INHIBIT); |
895 | if (ret) |
896 | return ret; |
897 | } |
898 | |
899 | ret = engine->emit_bb_start(rq, |
900 | i915_vma_offset(vma: engine->wa_ctx.vma), 0, |
901 | 0); |
902 | if (ret) |
903 | return ret; |
904 | |
905 | ret = engine->emit_flush(rq, EMIT_FLUSH); |
906 | if (ret) |
907 | return ret; |
908 | |
909 | /* Always invalidate before the next switch_mm() */ |
910 | return engine->emit_flush(rq, EMIT_INVALIDATE); |
911 | } |
912 | |
913 | static int switch_context(struct i915_request *rq) |
914 | { |
915 | struct intel_engine_cs *engine = rq->engine; |
916 | struct intel_context *ce = rq->context; |
917 | void **residuals = NULL; |
918 | int ret; |
919 | |
920 | GEM_BUG_ON(HAS_EXECLISTS(engine->i915)); |
921 | |
922 | if (engine->wa_ctx.vma && ce != engine->kernel_context) { |
923 | if (engine->wa_ctx.vma->private != ce && |
924 | i915_mitigate_clear_residuals()) { |
925 | ret = clear_residuals(rq); |
926 | if (ret) |
927 | return ret; |
928 | |
929 | residuals = &engine->wa_ctx.vma->private; |
930 | } |
931 | } |
932 | |
933 | ret = switch_mm(rq, vm: vm_alias(vm: ce->vm)); |
934 | if (ret) |
935 | return ret; |
936 | |
937 | if (ce->state) { |
938 | u32 flags; |
939 | |
940 | GEM_BUG_ON(engine->id != RCS0); |
941 | |
942 | /* For resource streamer on HSW+ and power context elsewhere */ |
943 | BUILD_BUG_ON(HSW_MI_RS_SAVE_STATE_EN != MI_SAVE_EXT_STATE_EN); |
944 | BUILD_BUG_ON(HSW_MI_RS_RESTORE_STATE_EN != MI_RESTORE_EXT_STATE_EN); |
945 | |
946 | flags = MI_SAVE_EXT_STATE_EN | MI_MM_SPACE_GTT; |
947 | if (test_bit(CONTEXT_VALID_BIT, &ce->flags)) |
948 | flags |= MI_RESTORE_EXT_STATE_EN; |
949 | else |
950 | flags |= MI_RESTORE_INHIBIT; |
951 | |
952 | ret = mi_set_context(rq, ce, flags); |
953 | if (ret) |
954 | return ret; |
955 | } |
956 | |
957 | ret = remap_l3(rq); |
958 | if (ret) |
959 | return ret; |
960 | |
961 | /* |
962 | * Now past the point of no return, this request _will_ be emitted. |
963 | * |
964 | * Or at least this preamble will be emitted, the request may be |
965 | * interrupted prior to submitting the user payload. If so, we |
966 | * still submit the "empty" request in order to preserve global |
967 | * state tracking such as this, our tracking of the current |
968 | * dirty context. |
969 | */ |
970 | if (residuals) { |
971 | intel_context_put(ce: *residuals); |
972 | *residuals = intel_context_get(ce); |
973 | } |
974 | |
975 | return 0; |
976 | } |
977 | |
978 | static int ring_request_alloc(struct i915_request *request) |
979 | { |
980 | int ret; |
981 | |
982 | GEM_BUG_ON(!intel_context_is_pinned(request->context)); |
983 | GEM_BUG_ON(i915_request_timeline(request)->has_initial_breadcrumb); |
984 | |
985 | /* |
986 | * Flush enough space to reduce the likelihood of waiting after |
987 | * we start building the request - in which case we will just |
988 | * have to repeat work. |
989 | */ |
990 | request->reserved_space += LEGACY_REQUEST_SIZE; |
991 | |
992 | /* Unconditionally invalidate GPU caches and TLBs. */ |
993 | ret = request->engine->emit_flush(request, EMIT_INVALIDATE); |
994 | if (ret) |
995 | return ret; |
996 | |
997 | ret = switch_context(rq: request); |
998 | if (ret) |
999 | return ret; |
1000 | |
1001 | request->reserved_space -= LEGACY_REQUEST_SIZE; |
1002 | return 0; |
1003 | } |
1004 | |
1005 | static void gen6_bsd_submit_request(struct i915_request *request) |
1006 | { |
1007 | struct intel_uncore *uncore = request->engine->uncore; |
1008 | |
1009 | intel_uncore_forcewake_get(uncore, domains: FORCEWAKE_ALL); |
1010 | |
1011 | /* Every tail move must follow the sequence below */ |
1012 | |
1013 | /* Disable notification that the ring is IDLE. The GT |
1014 | * will then assume that it is busy and bring it out of rc6. |
1015 | */ |
1016 | intel_uncore_write_fw(uncore, RING_PSMI_CTL(GEN6_BSD_RING_BASE), |
1017 | _MASKED_BIT_ENABLE(GEN6_PSMI_SLEEP_MSG_DISABLE)); |
1018 | |
1019 | /* Clear the context id. Here be magic! */ |
1020 | intel_uncore_write64_fw(uncore, GEN6_BSD_RNCID, 0x0); |
1021 | |
1022 | /* Wait for the ring not to be idle, i.e. for it to wake up. */ |
1023 | if (__intel_wait_for_register_fw(uncore, |
1024 | RING_PSMI_CTL(GEN6_BSD_RING_BASE), |
1025 | GEN6_BSD_SLEEP_INDICATOR, |
1026 | value: 0, |
1027 | fast_timeout_us: 1000, slow_timeout_ms: 0, NULL)) |
1028 | drm_err(&uncore->i915->drm, |
1029 | "timed out waiting for the BSD ring to wake up\n" ); |
1030 | |
1031 | /* Now that the ring is fully powered up, update the tail */ |
1032 | i9xx_submit_request(request); |
1033 | |
1034 | /* Let the ring send IDLE messages to the GT again, |
1035 | * and so let it sleep to conserve power when idle. |
1036 | */ |
1037 | intel_uncore_write_fw(uncore, RING_PSMI_CTL(GEN6_BSD_RING_BASE), |
1038 | _MASKED_BIT_DISABLE(GEN6_PSMI_SLEEP_MSG_DISABLE)); |
1039 | |
1040 | intel_uncore_forcewake_put(uncore, domains: FORCEWAKE_ALL); |
1041 | } |
1042 | |
1043 | static void i9xx_set_default_submission(struct intel_engine_cs *engine) |
1044 | { |
1045 | engine->submit_request = i9xx_submit_request; |
1046 | } |
1047 | |
1048 | static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine) |
1049 | { |
1050 | engine->submit_request = gen6_bsd_submit_request; |
1051 | } |
1052 | |
1053 | static void ring_release(struct intel_engine_cs *engine) |
1054 | { |
1055 | struct drm_i915_private *i915 = engine->i915; |
1056 | |
1057 | drm_WARN_ON(&i915->drm, GRAPHICS_VER(i915) > 2 && |
1058 | (ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0); |
1059 | |
1060 | intel_engine_cleanup_common(engine); |
1061 | |
1062 | if (engine->wa_ctx.vma) { |
1063 | intel_context_put(ce: engine->wa_ctx.vma->private); |
1064 | i915_vma_unpin_and_release(p_vma: &engine->wa_ctx.vma, flags: 0); |
1065 | } |
1066 | |
1067 | intel_ring_unpin(ring: engine->legacy.ring); |
1068 | intel_ring_put(ring: engine->legacy.ring); |
1069 | |
1070 | intel_timeline_unpin(tl: engine->legacy.timeline); |
1071 | intel_timeline_put(timeline: engine->legacy.timeline); |
1072 | } |
1073 | |
1074 | static void irq_handler(struct intel_engine_cs *engine, u16 iir) |
1075 | { |
1076 | intel_engine_signal_breadcrumbs(engine); |
1077 | } |
1078 | |
1079 | static void setup_irq(struct intel_engine_cs *engine) |
1080 | { |
1081 | struct drm_i915_private *i915 = engine->i915; |
1082 | |
1083 | intel_engine_set_irq_handler(engine, fn: irq_handler); |
1084 | |
1085 | if (GRAPHICS_VER(i915) >= 6) { |
1086 | engine->irq_enable = gen6_irq_enable; |
1087 | engine->irq_disable = gen6_irq_disable; |
1088 | } else if (GRAPHICS_VER(i915) >= 5) { |
1089 | engine->irq_enable = gen5_irq_enable; |
1090 | engine->irq_disable = gen5_irq_disable; |
1091 | } else if (GRAPHICS_VER(i915) >= 3) { |
1092 | engine->irq_enable = gen3_irq_enable; |
1093 | engine->irq_disable = gen3_irq_disable; |
1094 | } else { |
1095 | engine->irq_enable = gen2_irq_enable; |
1096 | engine->irq_disable = gen2_irq_disable; |
1097 | } |
1098 | } |
1099 | |
1100 | static void add_to_engine(struct i915_request *rq) |
1101 | { |
1102 | lockdep_assert_held(&rq->engine->sched_engine->lock); |
1103 | list_move_tail(list: &rq->sched.link, head: &rq->engine->sched_engine->requests); |
1104 | } |
1105 | |
1106 | static void remove_from_engine(struct i915_request *rq) |
1107 | { |
1108 | spin_lock_irq(lock: &rq->engine->sched_engine->lock); |
1109 | list_del_init(entry: &rq->sched.link); |
1110 | |
1111 | /* Prevent further __await_execution() registering a cb, then flush */ |
1112 | set_bit(nr: I915_FENCE_FLAG_ACTIVE, addr: &rq->fence.flags); |
1113 | |
1114 | spin_unlock_irq(lock: &rq->engine->sched_engine->lock); |
1115 | |
1116 | i915_request_notify_execute_cb_imm(rq); |
1117 | } |
1118 | |
1119 | static void setup_common(struct intel_engine_cs *engine) |
1120 | { |
1121 | struct drm_i915_private *i915 = engine->i915; |
1122 | |
1123 | /* gen8+ are only supported with execlists */ |
1124 | GEM_BUG_ON(GRAPHICS_VER(i915) >= 8); |
1125 | |
1126 | setup_irq(engine); |
1127 | |
1128 | engine->resume = xcs_resume; |
1129 | engine->sanitize = xcs_sanitize; |
1130 | |
1131 | engine->reset.prepare = reset_prepare; |
1132 | engine->reset.rewind = reset_rewind; |
1133 | engine->reset.cancel = reset_cancel; |
1134 | engine->reset.finish = reset_finish; |
1135 | |
1136 | engine->add_active_request = add_to_engine; |
1137 | engine->remove_active_request = remove_from_engine; |
1138 | |
1139 | engine->cops = &ring_context_ops; |
1140 | engine->request_alloc = ring_request_alloc; |
1141 | |
1142 | /* |
1143 | * Using a global execution timeline; the previous final breadcrumb is |
1144 | * equivalent to our next initial bread so we can elide |
1145 | * engine->emit_init_breadcrumb(). |
1146 | */ |
1147 | engine->emit_fini_breadcrumb = gen3_emit_breadcrumb; |
1148 | if (GRAPHICS_VER(i915) == 5) |
1149 | engine->emit_fini_breadcrumb = gen5_emit_breadcrumb; |
1150 | |
1151 | engine->set_default_submission = i9xx_set_default_submission; |
1152 | |
1153 | if (GRAPHICS_VER(i915) >= 6) |
1154 | engine->emit_bb_start = gen6_emit_bb_start; |
1155 | else if (GRAPHICS_VER(i915) >= 4) |
1156 | engine->emit_bb_start = gen4_emit_bb_start; |
1157 | else if (IS_I830(i915) || IS_I845G(i915)) |
1158 | engine->emit_bb_start = i830_emit_bb_start; |
1159 | else |
1160 | engine->emit_bb_start = gen3_emit_bb_start; |
1161 | } |
1162 | |
1163 | static void setup_rcs(struct intel_engine_cs *engine) |
1164 | { |
1165 | struct drm_i915_private *i915 = engine->i915; |
1166 | |
1167 | if (HAS_L3_DPF(i915)) |
1168 | engine->irq_keep_mask = GT_RENDER_L3_PARITY_ERROR_INTERRUPT; |
1169 | |
1170 | engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT; |
1171 | |
1172 | if (GRAPHICS_VER(i915) >= 7) { |
1173 | engine->emit_flush = gen7_emit_flush_rcs; |
1174 | engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_rcs; |
1175 | } else if (GRAPHICS_VER(i915) == 6) { |
1176 | engine->emit_flush = gen6_emit_flush_rcs; |
1177 | engine->emit_fini_breadcrumb = gen6_emit_breadcrumb_rcs; |
1178 | } else if (GRAPHICS_VER(i915) == 5) { |
1179 | engine->emit_flush = gen4_emit_flush_rcs; |
1180 | } else { |
1181 | if (GRAPHICS_VER(i915) < 4) |
1182 | engine->emit_flush = gen2_emit_flush; |
1183 | else |
1184 | engine->emit_flush = gen4_emit_flush_rcs; |
1185 | engine->irq_enable_mask = I915_USER_INTERRUPT; |
1186 | } |
1187 | |
1188 | if (IS_HASWELL(i915)) |
1189 | engine->emit_bb_start = hsw_emit_bb_start; |
1190 | } |
1191 | |
1192 | static void setup_vcs(struct intel_engine_cs *engine) |
1193 | { |
1194 | struct drm_i915_private *i915 = engine->i915; |
1195 | |
1196 | if (GRAPHICS_VER(i915) >= 6) { |
1197 | /* gen6 bsd needs a special wa for tail updates */ |
1198 | if (GRAPHICS_VER(i915) == 6) |
1199 | engine->set_default_submission = gen6_bsd_set_default_submission; |
1200 | engine->emit_flush = gen6_emit_flush_vcs; |
1201 | engine->irq_enable_mask = GT_BSD_USER_INTERRUPT; |
1202 | |
1203 | if (GRAPHICS_VER(i915) == 6) |
1204 | engine->emit_fini_breadcrumb = gen6_emit_breadcrumb_xcs; |
1205 | else |
1206 | engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_xcs; |
1207 | } else { |
1208 | engine->emit_flush = gen4_emit_flush_vcs; |
1209 | if (GRAPHICS_VER(i915) == 5) |
1210 | engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT; |
1211 | else |
1212 | engine->irq_enable_mask = I915_BSD_USER_INTERRUPT; |
1213 | } |
1214 | } |
1215 | |
1216 | static void setup_bcs(struct intel_engine_cs *engine) |
1217 | { |
1218 | struct drm_i915_private *i915 = engine->i915; |
1219 | |
1220 | engine->emit_flush = gen6_emit_flush_xcs; |
1221 | engine->irq_enable_mask = GT_BLT_USER_INTERRUPT; |
1222 | |
1223 | if (GRAPHICS_VER(i915) == 6) |
1224 | engine->emit_fini_breadcrumb = gen6_emit_breadcrumb_xcs; |
1225 | else |
1226 | engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_xcs; |
1227 | } |
1228 | |
1229 | static void setup_vecs(struct intel_engine_cs *engine) |
1230 | { |
1231 | struct drm_i915_private *i915 = engine->i915; |
1232 | |
1233 | GEM_BUG_ON(GRAPHICS_VER(i915) < 7); |
1234 | |
1235 | engine->emit_flush = gen6_emit_flush_xcs; |
1236 | engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT; |
1237 | engine->irq_enable = hsw_irq_enable_vecs; |
1238 | engine->irq_disable = hsw_irq_disable_vecs; |
1239 | |
1240 | engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_xcs; |
1241 | } |
1242 | |
1243 | static int gen7_ctx_switch_bb_setup(struct intel_engine_cs * const engine, |
1244 | struct i915_vma * const vma) |
1245 | { |
1246 | return gen7_setup_clear_gpr_bb(engine, vma); |
1247 | } |
1248 | |
1249 | static int gen7_ctx_switch_bb_init(struct intel_engine_cs *engine, |
1250 | struct i915_gem_ww_ctx *ww, |
1251 | struct i915_vma *vma) |
1252 | { |
1253 | int err; |
1254 | |
1255 | err = i915_vma_pin_ww(vma, ww, size: 0, alignment: 0, PIN_USER | PIN_HIGH); |
1256 | if (err) |
1257 | return err; |
1258 | |
1259 | err = i915_vma_sync(vma); |
1260 | if (err) |
1261 | goto err_unpin; |
1262 | |
1263 | err = gen7_ctx_switch_bb_setup(engine, vma); |
1264 | if (err) |
1265 | goto err_unpin; |
1266 | |
1267 | engine->wa_ctx.vma = vma; |
1268 | return 0; |
1269 | |
1270 | err_unpin: |
1271 | i915_vma_unpin(vma); |
1272 | return err; |
1273 | } |
1274 | |
1275 | static struct i915_vma *gen7_ctx_vma(struct intel_engine_cs *engine) |
1276 | { |
1277 | struct drm_i915_gem_object *obj; |
1278 | struct i915_vma *vma; |
1279 | int size, err; |
1280 | |
1281 | if (GRAPHICS_VER(engine->i915) != 7 || engine->class != RENDER_CLASS) |
1282 | return NULL; |
1283 | |
1284 | err = gen7_ctx_switch_bb_setup(engine, NULL /* probe size */); |
1285 | if (err < 0) |
1286 | return ERR_PTR(error: err); |
1287 | if (!err) |
1288 | return NULL; |
1289 | |
1290 | size = ALIGN(err, PAGE_SIZE); |
1291 | |
1292 | obj = i915_gem_object_create_internal(i915: engine->i915, size); |
1293 | if (IS_ERR(ptr: obj)) |
1294 | return ERR_CAST(ptr: obj); |
1295 | |
1296 | vma = i915_vma_instance(obj, vm: engine->gt->vm, NULL); |
1297 | if (IS_ERR(ptr: vma)) { |
1298 | i915_gem_object_put(obj); |
1299 | return ERR_CAST(ptr: vma); |
1300 | } |
1301 | |
1302 | vma->private = intel_context_create(engine); /* dummy residuals */ |
1303 | if (IS_ERR(ptr: vma->private)) { |
1304 | err = PTR_ERR(ptr: vma->private); |
1305 | vma->private = NULL; |
1306 | i915_gem_object_put(obj); |
1307 | return ERR_PTR(error: err); |
1308 | } |
1309 | |
1310 | return vma; |
1311 | } |
1312 | |
1313 | int intel_ring_submission_setup(struct intel_engine_cs *engine) |
1314 | { |
1315 | struct i915_gem_ww_ctx ww; |
1316 | struct intel_timeline *timeline; |
1317 | struct intel_ring *ring; |
1318 | struct i915_vma *gen7_wa_vma; |
1319 | int err; |
1320 | |
1321 | setup_common(engine); |
1322 | |
1323 | switch (engine->class) { |
1324 | case RENDER_CLASS: |
1325 | setup_rcs(engine); |
1326 | break; |
1327 | case VIDEO_DECODE_CLASS: |
1328 | setup_vcs(engine); |
1329 | break; |
1330 | case COPY_ENGINE_CLASS: |
1331 | setup_bcs(engine); |
1332 | break; |
1333 | case VIDEO_ENHANCEMENT_CLASS: |
1334 | setup_vecs(engine); |
1335 | break; |
1336 | default: |
1337 | MISSING_CASE(engine->class); |
1338 | return -ENODEV; |
1339 | } |
1340 | |
1341 | timeline = intel_timeline_create_from_engine(engine, |
1342 | I915_GEM_HWS_SEQNO_ADDR); |
1343 | if (IS_ERR(ptr: timeline)) { |
1344 | err = PTR_ERR(ptr: timeline); |
1345 | goto err; |
1346 | } |
1347 | GEM_BUG_ON(timeline->has_initial_breadcrumb); |
1348 | |
1349 | ring = intel_engine_create_ring(engine, SZ_16K); |
1350 | if (IS_ERR(ptr: ring)) { |
1351 | err = PTR_ERR(ptr: ring); |
1352 | goto err_timeline; |
1353 | } |
1354 | |
1355 | GEM_BUG_ON(engine->legacy.ring); |
1356 | engine->legacy.ring = ring; |
1357 | engine->legacy.timeline = timeline; |
1358 | |
1359 | gen7_wa_vma = gen7_ctx_vma(engine); |
1360 | if (IS_ERR(ptr: gen7_wa_vma)) { |
1361 | err = PTR_ERR(ptr: gen7_wa_vma); |
1362 | goto err_ring; |
1363 | } |
1364 | |
1365 | i915_gem_ww_ctx_init(ctx: &ww, intr: false); |
1366 | |
1367 | retry: |
1368 | err = i915_gem_object_lock(obj: timeline->hwsp_ggtt->obj, ww: &ww); |
1369 | if (!err && gen7_wa_vma) |
1370 | err = i915_gem_object_lock(obj: gen7_wa_vma->obj, ww: &ww); |
1371 | if (!err) |
1372 | err = i915_gem_object_lock(obj: engine->legacy.ring->vma->obj, ww: &ww); |
1373 | if (!err) |
1374 | err = intel_timeline_pin(tl: timeline, ww: &ww); |
1375 | if (!err) { |
1376 | err = intel_ring_pin(ring, ww: &ww); |
1377 | if (err) |
1378 | intel_timeline_unpin(tl: timeline); |
1379 | } |
1380 | if (err) |
1381 | goto out; |
1382 | |
1383 | GEM_BUG_ON(timeline->hwsp_ggtt != engine->status_page.vma); |
1384 | |
1385 | if (gen7_wa_vma) { |
1386 | err = gen7_ctx_switch_bb_init(engine, ww: &ww, vma: gen7_wa_vma); |
1387 | if (err) { |
1388 | intel_ring_unpin(ring); |
1389 | intel_timeline_unpin(tl: timeline); |
1390 | } |
1391 | } |
1392 | |
1393 | out: |
1394 | if (err == -EDEADLK) { |
1395 | err = i915_gem_ww_ctx_backoff(ctx: &ww); |
1396 | if (!err) |
1397 | goto retry; |
1398 | } |
1399 | i915_gem_ww_ctx_fini(ctx: &ww); |
1400 | if (err) |
1401 | goto err_gen7_put; |
1402 | |
1403 | /* Finally, take ownership and responsibility for cleanup! */ |
1404 | engine->release = ring_release; |
1405 | |
1406 | return 0; |
1407 | |
1408 | err_gen7_put: |
1409 | if (gen7_wa_vma) { |
1410 | intel_context_put(ce: gen7_wa_vma->private); |
1411 | i915_gem_object_put(obj: gen7_wa_vma->obj); |
1412 | } |
1413 | err_ring: |
1414 | intel_ring_put(ring); |
1415 | err_timeline: |
1416 | intel_timeline_put(timeline); |
1417 | err: |
1418 | intel_engine_cleanup_common(engine); |
1419 | return err; |
1420 | } |
1421 | |
1422 | #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) |
1423 | #include "selftest_ring_submission.c" |
1424 | #endif |
1425 | |