1 | // SPDX-License-Identifier: MIT |
2 | /* |
3 | * Copyright © 2019 Intel Corporation |
4 | */ |
5 | |
6 | #include "gem/i915_gem_context.h" |
7 | #include "gem/i915_gem_pm.h" |
8 | |
9 | #include "i915_drm_client.h" |
10 | #include "i915_drv.h" |
11 | #include "i915_trace.h" |
12 | |
13 | #include "intel_context.h" |
14 | #include "intel_engine.h" |
15 | #include "intel_engine_pm.h" |
16 | #include "intel_ring.h" |
17 | |
18 | static struct kmem_cache *slab_ce; |
19 | |
20 | static struct intel_context *intel_context_alloc(void) |
21 | { |
22 | return kmem_cache_zalloc(k: slab_ce, GFP_KERNEL); |
23 | } |
24 | |
25 | static void rcu_context_free(struct rcu_head *rcu) |
26 | { |
27 | struct intel_context *ce = container_of(rcu, typeof(*ce), rcu); |
28 | |
29 | trace_intel_context_free(ce); |
30 | kmem_cache_free(s: slab_ce, objp: ce); |
31 | } |
32 | |
33 | void intel_context_free(struct intel_context *ce) |
34 | { |
35 | call_rcu(head: &ce->rcu, func: rcu_context_free); |
36 | } |
37 | |
38 | struct intel_context * |
39 | intel_context_create(struct intel_engine_cs *engine) |
40 | { |
41 | struct intel_context *ce; |
42 | |
43 | ce = intel_context_alloc(); |
44 | if (!ce) |
45 | return ERR_PTR(error: -ENOMEM); |
46 | |
47 | intel_context_init(ce, engine); |
48 | trace_intel_context_create(ce); |
49 | return ce; |
50 | } |
51 | |
52 | int intel_context_alloc_state(struct intel_context *ce) |
53 | { |
54 | struct i915_gem_context *ctx; |
55 | int err = 0; |
56 | |
57 | if (mutex_lock_interruptible(&ce->pin_mutex)) |
58 | return -EINTR; |
59 | |
60 | if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) { |
61 | if (intel_context_is_banned(ce)) { |
62 | err = -EIO; |
63 | goto unlock; |
64 | } |
65 | |
66 | err = ce->ops->alloc(ce); |
67 | if (unlikely(err)) |
68 | goto unlock; |
69 | |
70 | set_bit(CONTEXT_ALLOC_BIT, addr: &ce->flags); |
71 | |
72 | rcu_read_lock(); |
73 | ctx = rcu_dereference(ce->gem_context); |
74 | if (ctx && !kref_get_unless_zero(kref: &ctx->ref)) |
75 | ctx = NULL; |
76 | rcu_read_unlock(); |
77 | if (ctx) { |
78 | if (ctx->client) |
79 | i915_drm_client_add_context_objects(client: ctx->client, |
80 | ce); |
81 | i915_gem_context_put(ctx); |
82 | } |
83 | } |
84 | |
85 | unlock: |
86 | mutex_unlock(lock: &ce->pin_mutex); |
87 | return err; |
88 | } |
89 | |
90 | static int intel_context_active_acquire(struct intel_context *ce) |
91 | { |
92 | int err; |
93 | |
94 | __i915_active_acquire(ref: &ce->active); |
95 | |
96 | if (intel_context_is_barrier(ce) || intel_engine_uses_guc(engine: ce->engine) || |
97 | intel_context_is_parallel(ce)) |
98 | return 0; |
99 | |
100 | /* Preallocate tracking nodes */ |
101 | err = i915_active_acquire_preallocate_barrier(ref: &ce->active, |
102 | engine: ce->engine); |
103 | if (err) |
104 | i915_active_release(ref: &ce->active); |
105 | |
106 | return err; |
107 | } |
108 | |
109 | static void intel_context_active_release(struct intel_context *ce) |
110 | { |
111 | /* Nodes preallocated in intel_context_active() */ |
112 | i915_active_acquire_barrier(ref: &ce->active); |
113 | i915_active_release(ref: &ce->active); |
114 | } |
115 | |
116 | static int __context_pin_state(struct i915_vma *vma, struct i915_gem_ww_ctx *ww) |
117 | { |
118 | unsigned int bias = i915_ggtt_pin_bias(vma) | PIN_OFFSET_BIAS; |
119 | int err; |
120 | |
121 | err = i915_ggtt_pin(vma, ww, align: 0, flags: bias | PIN_HIGH); |
122 | if (err) |
123 | return err; |
124 | |
125 | err = i915_active_acquire(ref: &vma->active); |
126 | if (err) |
127 | goto err_unpin; |
128 | |
129 | /* |
130 | * And mark it as a globally pinned object to let the shrinker know |
131 | * it cannot reclaim the object until we release it. |
132 | */ |
133 | i915_vma_make_unshrinkable(vma); |
134 | vma->obj->mm.dirty = true; |
135 | |
136 | return 0; |
137 | |
138 | err_unpin: |
139 | i915_vma_unpin(vma); |
140 | return err; |
141 | } |
142 | |
143 | static void __context_unpin_state(struct i915_vma *vma) |
144 | { |
145 | i915_vma_make_shrinkable(vma); |
146 | i915_active_release(ref: &vma->active); |
147 | __i915_vma_unpin(vma); |
148 | } |
149 | |
150 | static int __ring_active(struct intel_ring *ring, |
151 | struct i915_gem_ww_ctx *ww) |
152 | { |
153 | int err; |
154 | |
155 | err = intel_ring_pin(ring, ww); |
156 | if (err) |
157 | return err; |
158 | |
159 | err = i915_active_acquire(ref: &ring->vma->active); |
160 | if (err) |
161 | goto err_pin; |
162 | |
163 | return 0; |
164 | |
165 | err_pin: |
166 | intel_ring_unpin(ring); |
167 | return err; |
168 | } |
169 | |
170 | static void __ring_retire(struct intel_ring *ring) |
171 | { |
172 | i915_active_release(ref: &ring->vma->active); |
173 | intel_ring_unpin(ring); |
174 | } |
175 | |
176 | static int intel_context_pre_pin(struct intel_context *ce, |
177 | struct i915_gem_ww_ctx *ww) |
178 | { |
179 | int err; |
180 | |
181 | CE_TRACE(ce, "active\n" ); |
182 | |
183 | err = __ring_active(ring: ce->ring, ww); |
184 | if (err) |
185 | return err; |
186 | |
187 | err = intel_timeline_pin(tl: ce->timeline, ww); |
188 | if (err) |
189 | goto err_ring; |
190 | |
191 | if (!ce->state) |
192 | return 0; |
193 | |
194 | err = __context_pin_state(vma: ce->state, ww); |
195 | if (err) |
196 | goto err_timeline; |
197 | |
198 | |
199 | return 0; |
200 | |
201 | err_timeline: |
202 | intel_timeline_unpin(tl: ce->timeline); |
203 | err_ring: |
204 | __ring_retire(ring: ce->ring); |
205 | return err; |
206 | } |
207 | |
208 | static void intel_context_post_unpin(struct intel_context *ce) |
209 | { |
210 | if (ce->state) |
211 | __context_unpin_state(vma: ce->state); |
212 | |
213 | intel_timeline_unpin(tl: ce->timeline); |
214 | __ring_retire(ring: ce->ring); |
215 | } |
216 | |
217 | int __intel_context_do_pin_ww(struct intel_context *ce, |
218 | struct i915_gem_ww_ctx *ww) |
219 | { |
220 | bool handoff = false; |
221 | void *vaddr; |
222 | int err = 0; |
223 | |
224 | if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) { |
225 | err = intel_context_alloc_state(ce); |
226 | if (err) |
227 | return err; |
228 | } |
229 | |
230 | /* |
231 | * We always pin the context/ring/timeline here, to ensure a pin |
232 | * refcount for __intel_context_active(), which prevent a lock |
233 | * inversion of ce->pin_mutex vs dma_resv_lock(). |
234 | */ |
235 | |
236 | err = i915_gem_object_lock(obj: ce->timeline->hwsp_ggtt->obj, ww); |
237 | if (!err) |
238 | err = i915_gem_object_lock(obj: ce->ring->vma->obj, ww); |
239 | if (!err && ce->state) |
240 | err = i915_gem_object_lock(obj: ce->state->obj, ww); |
241 | if (!err) |
242 | err = intel_context_pre_pin(ce, ww); |
243 | if (err) |
244 | return err; |
245 | |
246 | err = ce->ops->pre_pin(ce, ww, &vaddr); |
247 | if (err) |
248 | goto err_ctx_unpin; |
249 | |
250 | err = i915_active_acquire(ref: &ce->active); |
251 | if (err) |
252 | goto err_post_unpin; |
253 | |
254 | err = mutex_lock_interruptible(&ce->pin_mutex); |
255 | if (err) |
256 | goto err_release; |
257 | |
258 | intel_engine_pm_might_get(engine: ce->engine); |
259 | |
260 | if (unlikely(intel_context_is_closed(ce))) { |
261 | err = -ENOENT; |
262 | goto err_unlock; |
263 | } |
264 | |
265 | if (likely(!atomic_add_unless(&ce->pin_count, 1, 0))) { |
266 | err = intel_context_active_acquire(ce); |
267 | if (unlikely(err)) |
268 | goto err_unlock; |
269 | |
270 | err = ce->ops->pin(ce, vaddr); |
271 | if (err) { |
272 | intel_context_active_release(ce); |
273 | goto err_unlock; |
274 | } |
275 | |
276 | CE_TRACE(ce, "pin ring:{start:%08x, head:%04x, tail:%04x}\n" , |
277 | i915_ggtt_offset(ce->ring->vma), |
278 | ce->ring->head, ce->ring->tail); |
279 | |
280 | handoff = true; |
281 | smp_mb__before_atomic(); /* flush pin before it is visible */ |
282 | atomic_inc(v: &ce->pin_count); |
283 | } |
284 | |
285 | GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */ |
286 | |
287 | trace_intel_context_do_pin(ce); |
288 | |
289 | err_unlock: |
290 | mutex_unlock(lock: &ce->pin_mutex); |
291 | err_release: |
292 | i915_active_release(ref: &ce->active); |
293 | err_post_unpin: |
294 | if (!handoff) |
295 | ce->ops->post_unpin(ce); |
296 | err_ctx_unpin: |
297 | intel_context_post_unpin(ce); |
298 | |
299 | /* |
300 | * Unlock the hwsp_ggtt object since it's shared. |
301 | * In principle we can unlock all the global state locked above |
302 | * since it's pinned and doesn't need fencing, and will |
303 | * thus remain resident until it is explicitly unpinned. |
304 | */ |
305 | i915_gem_ww_unlock_single(obj: ce->timeline->hwsp_ggtt->obj); |
306 | |
307 | return err; |
308 | } |
309 | |
310 | int __intel_context_do_pin(struct intel_context *ce) |
311 | { |
312 | struct i915_gem_ww_ctx ww; |
313 | int err; |
314 | |
315 | i915_gem_ww_ctx_init(ctx: &ww, intr: true); |
316 | retry: |
317 | err = __intel_context_do_pin_ww(ce, ww: &ww); |
318 | if (err == -EDEADLK) { |
319 | err = i915_gem_ww_ctx_backoff(ctx: &ww); |
320 | if (!err) |
321 | goto retry; |
322 | } |
323 | i915_gem_ww_ctx_fini(ctx: &ww); |
324 | return err; |
325 | } |
326 | |
327 | void __intel_context_do_unpin(struct intel_context *ce, int sub) |
328 | { |
329 | if (!atomic_sub_and_test(i: sub, v: &ce->pin_count)) |
330 | return; |
331 | |
332 | CE_TRACE(ce, "unpin\n" ); |
333 | ce->ops->unpin(ce); |
334 | ce->ops->post_unpin(ce); |
335 | |
336 | /* |
337 | * Once released, we may asynchronously drop the active reference. |
338 | * As that may be the only reference keeping the context alive, |
339 | * take an extra now so that it is not freed before we finish |
340 | * dereferencing it. |
341 | */ |
342 | intel_context_get(ce); |
343 | intel_context_active_release(ce); |
344 | trace_intel_context_do_unpin(ce); |
345 | intel_context_put(ce); |
346 | } |
347 | |
348 | static void __intel_context_retire(struct i915_active *active) |
349 | { |
350 | struct intel_context *ce = container_of(active, typeof(*ce), active); |
351 | |
352 | CE_TRACE(ce, "retire runtime: { total:%lluns, avg:%lluns }\n" , |
353 | intel_context_get_total_runtime_ns(ce), |
354 | intel_context_get_avg_runtime_ns(ce)); |
355 | |
356 | set_bit(CONTEXT_VALID_BIT, addr: &ce->flags); |
357 | intel_context_post_unpin(ce); |
358 | intel_context_put(ce); |
359 | } |
360 | |
361 | static int __intel_context_active(struct i915_active *active) |
362 | { |
363 | struct intel_context *ce = container_of(active, typeof(*ce), active); |
364 | |
365 | intel_context_get(ce); |
366 | |
367 | /* everything should already be activated by intel_context_pre_pin() */ |
368 | GEM_WARN_ON(!i915_active_acquire_if_busy(&ce->ring->vma->active)); |
369 | __intel_ring_pin(ring: ce->ring); |
370 | |
371 | __intel_timeline_pin(tl: ce->timeline); |
372 | |
373 | if (ce->state) { |
374 | GEM_WARN_ON(!i915_active_acquire_if_busy(&ce->state->active)); |
375 | __i915_vma_pin(vma: ce->state); |
376 | i915_vma_make_unshrinkable(vma: ce->state); |
377 | } |
378 | |
379 | return 0; |
380 | } |
381 | |
382 | static int |
383 | sw_fence_dummy_notify(struct i915_sw_fence *sf, |
384 | enum i915_sw_fence_notify state) |
385 | { |
386 | return NOTIFY_DONE; |
387 | } |
388 | |
389 | void |
390 | intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) |
391 | { |
392 | GEM_BUG_ON(!engine->cops); |
393 | GEM_BUG_ON(!engine->gt->vm); |
394 | |
395 | kref_init(kref: &ce->ref); |
396 | |
397 | ce->engine = engine; |
398 | ce->ops = engine->cops; |
399 | ce->sseu = engine->sseu; |
400 | ce->ring = NULL; |
401 | ce->ring_size = SZ_4K; |
402 | |
403 | ewma_runtime_init(e: &ce->stats.runtime.avg); |
404 | |
405 | ce->vm = i915_vm_get(vm: engine->gt->vm); |
406 | |
407 | /* NB ce->signal_link/lock is used under RCU */ |
408 | spin_lock_init(&ce->signal_lock); |
409 | INIT_LIST_HEAD(list: &ce->signals); |
410 | |
411 | mutex_init(&ce->pin_mutex); |
412 | |
413 | spin_lock_init(&ce->guc_state.lock); |
414 | INIT_LIST_HEAD(list: &ce->guc_state.fences); |
415 | INIT_LIST_HEAD(list: &ce->guc_state.requests); |
416 | |
417 | ce->guc_id.id = GUC_INVALID_CONTEXT_ID; |
418 | INIT_LIST_HEAD(list: &ce->guc_id.link); |
419 | |
420 | INIT_LIST_HEAD(list: &ce->destroyed_link); |
421 | |
422 | INIT_LIST_HEAD(list: &ce->parallel.child_list); |
423 | |
424 | /* |
425 | * Initialize fence to be complete as this is expected to be complete |
426 | * unless there is a pending schedule disable outstanding. |
427 | */ |
428 | i915_sw_fence_init(&ce->guc_state.blocked, |
429 | sw_fence_dummy_notify); |
430 | i915_sw_fence_commit(fence: &ce->guc_state.blocked); |
431 | |
432 | i915_active_init(&ce->active, |
433 | __intel_context_active, __intel_context_retire, 0); |
434 | } |
435 | |
436 | void intel_context_fini(struct intel_context *ce) |
437 | { |
438 | struct intel_context *child, *next; |
439 | |
440 | if (ce->timeline) |
441 | intel_timeline_put(timeline: ce->timeline); |
442 | i915_vm_put(vm: ce->vm); |
443 | |
444 | /* Need to put the creation ref for the children */ |
445 | if (intel_context_is_parent(ce)) |
446 | for_each_child_safe(ce, child, next) |
447 | intel_context_put(ce: child); |
448 | |
449 | mutex_destroy(lock: &ce->pin_mutex); |
450 | i915_active_fini(ref: &ce->active); |
451 | i915_sw_fence_fini(fence: &ce->guc_state.blocked); |
452 | } |
453 | |
454 | void i915_context_module_exit(void) |
455 | { |
456 | kmem_cache_destroy(s: slab_ce); |
457 | } |
458 | |
459 | int __init i915_context_module_init(void) |
460 | { |
461 | slab_ce = KMEM_CACHE(intel_context, SLAB_HWCACHE_ALIGN); |
462 | if (!slab_ce) |
463 | return -ENOMEM; |
464 | |
465 | return 0; |
466 | } |
467 | |
468 | void intel_context_enter_engine(struct intel_context *ce) |
469 | { |
470 | intel_engine_pm_get(engine: ce->engine); |
471 | intel_timeline_enter(tl: ce->timeline); |
472 | } |
473 | |
474 | void intel_context_exit_engine(struct intel_context *ce) |
475 | { |
476 | intel_timeline_exit(tl: ce->timeline); |
477 | intel_engine_pm_put(engine: ce->engine); |
478 | } |
479 | |
480 | int intel_context_prepare_remote_request(struct intel_context *ce, |
481 | struct i915_request *rq) |
482 | { |
483 | struct intel_timeline *tl = ce->timeline; |
484 | int err; |
485 | |
486 | /* Only suitable for use in remotely modifying this context */ |
487 | GEM_BUG_ON(rq->context == ce); |
488 | |
489 | if (rcu_access_pointer(rq->timeline) != tl) { /* timeline sharing! */ |
490 | /* Queue this switch after current activity by this context. */ |
491 | err = i915_active_fence_set(active: &tl->last_request, rq); |
492 | if (err) |
493 | return err; |
494 | } |
495 | |
496 | /* |
497 | * Guarantee context image and the timeline remains pinned until the |
498 | * modifying request is retired by setting the ce activity tracker. |
499 | * |
500 | * But we only need to take one pin on the account of it. Or in other |
501 | * words transfer the pinned ce object to tracked active request. |
502 | */ |
503 | GEM_BUG_ON(i915_active_is_idle(&ce->active)); |
504 | return i915_active_add_request(ref: &ce->active, rq); |
505 | } |
506 | |
507 | struct i915_request *intel_context_create_request(struct intel_context *ce) |
508 | { |
509 | struct i915_gem_ww_ctx ww; |
510 | struct i915_request *rq; |
511 | int err; |
512 | |
513 | i915_gem_ww_ctx_init(ctx: &ww, intr: true); |
514 | retry: |
515 | err = intel_context_pin_ww(ce, ww: &ww); |
516 | if (!err) { |
517 | rq = i915_request_create(ce); |
518 | intel_context_unpin(ce); |
519 | } else if (err == -EDEADLK) { |
520 | err = i915_gem_ww_ctx_backoff(ctx: &ww); |
521 | if (!err) |
522 | goto retry; |
523 | rq = ERR_PTR(error: err); |
524 | } else { |
525 | rq = ERR_PTR(error: err); |
526 | } |
527 | |
528 | i915_gem_ww_ctx_fini(ctx: &ww); |
529 | |
530 | if (IS_ERR(ptr: rq)) |
531 | return rq; |
532 | |
533 | /* |
534 | * timeline->mutex should be the inner lock, but is used as outer lock. |
535 | * Hack around this to shut up lockdep in selftests.. |
536 | */ |
537 | lockdep_unpin_lock(&ce->timeline->mutex, rq->cookie); |
538 | mutex_release(&ce->timeline->mutex.dep_map, _RET_IP_); |
539 | mutex_acquire(&ce->timeline->mutex.dep_map, SINGLE_DEPTH_NESTING, 0, _RET_IP_); |
540 | rq->cookie = lockdep_pin_lock(&ce->timeline->mutex); |
541 | |
542 | return rq; |
543 | } |
544 | |
545 | struct i915_request *intel_context_get_active_request(struct intel_context *ce) |
546 | { |
547 | struct intel_context *parent = intel_context_to_parent(ce); |
548 | struct i915_request *rq, *active = NULL; |
549 | unsigned long flags; |
550 | |
551 | GEM_BUG_ON(!intel_engine_uses_guc(ce->engine)); |
552 | |
553 | /* |
554 | * We search the parent list to find an active request on the submitted |
555 | * context. The parent list contains the requests for all the contexts |
556 | * in the relationship so we have to do a compare of each request's |
557 | * context. |
558 | */ |
559 | spin_lock_irqsave(&parent->guc_state.lock, flags); |
560 | list_for_each_entry_reverse(rq, &parent->guc_state.requests, |
561 | sched.link) { |
562 | if (rq->context != ce) |
563 | continue; |
564 | if (i915_request_completed(rq)) |
565 | break; |
566 | |
567 | active = rq; |
568 | } |
569 | if (active) |
570 | active = i915_request_get_rcu(rq: active); |
571 | spin_unlock_irqrestore(lock: &parent->guc_state.lock, flags); |
572 | |
573 | return active; |
574 | } |
575 | |
576 | void intel_context_bind_parent_child(struct intel_context *parent, |
577 | struct intel_context *child) |
578 | { |
579 | /* |
580 | * Callers responsibility to validate that this function is used |
581 | * correctly but we use GEM_BUG_ON here ensure that they do. |
582 | */ |
583 | GEM_BUG_ON(intel_context_is_pinned(parent)); |
584 | GEM_BUG_ON(intel_context_is_child(parent)); |
585 | GEM_BUG_ON(intel_context_is_pinned(child)); |
586 | GEM_BUG_ON(intel_context_is_child(child)); |
587 | GEM_BUG_ON(intel_context_is_parent(child)); |
588 | |
589 | parent->parallel.child_index = parent->parallel.number_children++; |
590 | list_add_tail(new: &child->parallel.child_link, |
591 | head: &parent->parallel.child_list); |
592 | child->parallel.parent = parent; |
593 | } |
594 | |
595 | u64 intel_context_get_total_runtime_ns(struct intel_context *ce) |
596 | { |
597 | u64 total, active; |
598 | |
599 | if (ce->ops->update_stats) |
600 | ce->ops->update_stats(ce); |
601 | |
602 | total = ce->stats.runtime.total; |
603 | if (ce->ops->flags & COPS_RUNTIME_CYCLES) |
604 | total *= ce->engine->gt->clock_period_ns; |
605 | |
606 | active = READ_ONCE(ce->stats.active); |
607 | if (active) |
608 | active = intel_context_clock() - active; |
609 | |
610 | return total + active; |
611 | } |
612 | |
613 | u64 intel_context_get_avg_runtime_ns(struct intel_context *ce) |
614 | { |
615 | u64 avg = ewma_runtime_read(e: &ce->stats.runtime.avg); |
616 | |
617 | if (ce->ops->flags & COPS_RUNTIME_CYCLES) |
618 | avg *= ce->engine->gt->clock_period_ns; |
619 | |
620 | return avg; |
621 | } |
622 | |
623 | bool intel_context_ban(struct intel_context *ce, struct i915_request *rq) |
624 | { |
625 | bool ret = intel_context_set_banned(ce); |
626 | |
627 | trace_intel_context_ban(ce); |
628 | |
629 | if (ce->ops->revoke) |
630 | ce->ops->revoke(ce, rq, |
631 | INTEL_CONTEXT_BANNED_PREEMPT_TIMEOUT_MS); |
632 | |
633 | return ret; |
634 | } |
635 | |
636 | bool intel_context_revoke(struct intel_context *ce) |
637 | { |
638 | bool ret = intel_context_set_exiting(ce); |
639 | |
640 | if (ce->ops->revoke) |
641 | ce->ops->revoke(ce, NULL, ce->engine->props.preempt_timeout_ms); |
642 | |
643 | return ret; |
644 | } |
645 | |
646 | #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) |
647 | #include "selftest_context.c" |
648 | #endif |
649 | |