1 | // SPDX-License-Identifier: MIT |
2 | /* |
3 | * Copyright © 2015-2021 Intel Corporation |
4 | */ |
5 | |
6 | #include <linux/kthread.h> |
7 | #include <linux/string_helpers.h> |
8 | #include <trace/events/dma_fence.h> |
9 | #include <uapi/linux/sched/types.h> |
10 | |
11 | #include "i915_drv.h" |
12 | #include "i915_trace.h" |
13 | #include "intel_breadcrumbs.h" |
14 | #include "intel_context.h" |
15 | #include "intel_engine_pm.h" |
16 | #include "intel_gt_pm.h" |
17 | #include "intel_gt_requests.h" |
18 | |
19 | static bool irq_enable(struct intel_breadcrumbs *b) |
20 | { |
21 | return intel_engine_irq_enable(engine: b->irq_engine); |
22 | } |
23 | |
24 | static void irq_disable(struct intel_breadcrumbs *b) |
25 | { |
26 | intel_engine_irq_disable(engine: b->irq_engine); |
27 | } |
28 | |
29 | static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) |
30 | { |
31 | intel_wakeref_t wakeref; |
32 | |
33 | /* |
34 | * Since we are waiting on a request, the GPU should be busy |
35 | * and should have its own rpm reference. |
36 | */ |
37 | wakeref = intel_gt_pm_get_if_awake(gt: b->irq_engine->gt); |
38 | if (GEM_WARN_ON(!wakeref)) |
39 | return; |
40 | |
41 | /* |
42 | * The breadcrumb irq will be disarmed on the interrupt after the |
43 | * waiters are signaled. This gives us a single interrupt window in |
44 | * which we can add a new waiter and avoid the cost of re-enabling |
45 | * the irq. |
46 | */ |
47 | WRITE_ONCE(b->irq_armed, wakeref); |
48 | |
49 | /* Requests may have completed before we could enable the interrupt. */ |
50 | if (!b->irq_enabled++ && b->irq_enable(b)) |
51 | irq_work_queue(work: &b->irq_work); |
52 | } |
53 | |
54 | static void intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) |
55 | { |
56 | if (!b->irq_engine) |
57 | return; |
58 | |
59 | spin_lock(lock: &b->irq_lock); |
60 | if (!b->irq_armed) |
61 | __intel_breadcrumbs_arm_irq(b); |
62 | spin_unlock(lock: &b->irq_lock); |
63 | } |
64 | |
65 | static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b) |
66 | { |
67 | intel_wakeref_t wakeref = b->irq_armed; |
68 | |
69 | GEM_BUG_ON(!b->irq_enabled); |
70 | if (!--b->irq_enabled) |
71 | b->irq_disable(b); |
72 | |
73 | WRITE_ONCE(b->irq_armed, 0); |
74 | intel_gt_pm_put_async(gt: b->irq_engine->gt, handle: wakeref); |
75 | } |
76 | |
77 | static void intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b) |
78 | { |
79 | spin_lock(lock: &b->irq_lock); |
80 | if (b->irq_armed) |
81 | __intel_breadcrumbs_disarm_irq(b); |
82 | spin_unlock(lock: &b->irq_lock); |
83 | } |
84 | |
85 | static void add_signaling_context(struct intel_breadcrumbs *b, |
86 | struct intel_context *ce) |
87 | { |
88 | lockdep_assert_held(&ce->signal_lock); |
89 | |
90 | spin_lock(lock: &b->signalers_lock); |
91 | list_add_rcu(new: &ce->signal_link, head: &b->signalers); |
92 | spin_unlock(lock: &b->signalers_lock); |
93 | } |
94 | |
95 | static bool remove_signaling_context(struct intel_breadcrumbs *b, |
96 | struct intel_context *ce) |
97 | { |
98 | lockdep_assert_held(&ce->signal_lock); |
99 | |
100 | if (!list_empty(head: &ce->signals)) |
101 | return false; |
102 | |
103 | spin_lock(lock: &b->signalers_lock); |
104 | list_del_rcu(entry: &ce->signal_link); |
105 | spin_unlock(lock: &b->signalers_lock); |
106 | |
107 | return true; |
108 | } |
109 | |
110 | __maybe_unused static bool |
111 | check_signal_order(struct intel_context *ce, struct i915_request *rq) |
112 | { |
113 | if (rq->context != ce) |
114 | return false; |
115 | |
116 | if (!list_is_last(list: &rq->signal_link, head: &ce->signals) && |
117 | i915_seqno_passed(seq1: rq->fence.seqno, |
118 | list_next_entry(rq, signal_link)->fence.seqno)) |
119 | return false; |
120 | |
121 | if (!list_is_first(list: &rq->signal_link, head: &ce->signals) && |
122 | i915_seqno_passed(list_prev_entry(rq, signal_link)->fence.seqno, |
123 | seq2: rq->fence.seqno)) |
124 | return false; |
125 | |
126 | return true; |
127 | } |
128 | |
129 | static bool |
130 | __dma_fence_signal(struct dma_fence *fence) |
131 | { |
132 | return !test_and_set_bit(nr: DMA_FENCE_FLAG_SIGNALED_BIT, addr: &fence->flags); |
133 | } |
134 | |
135 | static void |
136 | __dma_fence_signal__timestamp(struct dma_fence *fence, ktime_t timestamp) |
137 | { |
138 | fence->timestamp = timestamp; |
139 | set_bit(nr: DMA_FENCE_FLAG_TIMESTAMP_BIT, addr: &fence->flags); |
140 | trace_dma_fence_signaled(fence); |
141 | } |
142 | |
143 | static void |
144 | __dma_fence_signal__notify(struct dma_fence *fence, |
145 | const struct list_head *list) |
146 | { |
147 | struct dma_fence_cb *cur, *tmp; |
148 | |
149 | lockdep_assert_held(fence->lock); |
150 | |
151 | list_for_each_entry_safe(cur, tmp, list, node) { |
152 | INIT_LIST_HEAD(list: &cur->node); |
153 | cur->func(fence, cur); |
154 | } |
155 | } |
156 | |
157 | static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl) |
158 | { |
159 | if (b->irq_engine) |
160 | intel_engine_add_retire(engine: b->irq_engine, tl); |
161 | } |
162 | |
163 | static struct llist_node * |
164 | slist_add(struct llist_node *node, struct llist_node *head) |
165 | { |
166 | node->next = head; |
167 | return node; |
168 | } |
169 | |
170 | static void signal_irq_work(struct irq_work *work) |
171 | { |
172 | struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work); |
173 | const ktime_t timestamp = ktime_get(); |
174 | struct llist_node *signal, *sn; |
175 | struct intel_context *ce; |
176 | |
177 | signal = NULL; |
178 | if (unlikely(!llist_empty(&b->signaled_requests))) |
179 | signal = llist_del_all(head: &b->signaled_requests); |
180 | |
181 | /* |
182 | * Keep the irq armed until the interrupt after all listeners are gone. |
183 | * |
184 | * Enabling/disabling the interrupt is rather costly, roughly a couple |
185 | * of hundred microseconds. If we are proactive and enable/disable |
186 | * the interrupt around every request that wants a breadcrumb, we |
187 | * quickly drown in the extra orders of magnitude of latency imposed |
188 | * on request submission. |
189 | * |
190 | * So we try to be lazy, and keep the interrupts enabled until no |
191 | * more listeners appear within a breadcrumb interrupt interval (that |
192 | * is until a request completes that no one cares about). The |
193 | * observation is that listeners come in batches, and will often |
194 | * listen to a bunch of requests in succession. Though note on icl+, |
195 | * interrupts are always enabled due to concerns with rc6 being |
196 | * dysfunctional with per-engine interrupt masking. |
197 | * |
198 | * We also try to avoid raising too many interrupts, as they may |
199 | * be generated by userspace batches and it is unfortunately rather |
200 | * too easy to drown the CPU under a flood of GPU interrupts. Thus |
201 | * whenever no one appears to be listening, we turn off the interrupts. |
202 | * Fewer interrupts should conserve power -- at the very least, fewer |
203 | * interrupt draw less ire from other users of the system and tools |
204 | * like powertop. |
205 | */ |
206 | if (!signal && READ_ONCE(b->irq_armed) && list_empty(head: &b->signalers)) |
207 | intel_breadcrumbs_disarm_irq(b); |
208 | |
209 | rcu_read_lock(); |
210 | atomic_inc(v: &b->signaler_active); |
211 | list_for_each_entry_rcu(ce, &b->signalers, signal_link) { |
212 | struct i915_request *rq; |
213 | |
214 | list_for_each_entry_rcu(rq, &ce->signals, signal_link) { |
215 | bool release; |
216 | |
217 | if (!__i915_request_is_complete(rq)) |
218 | break; |
219 | |
220 | if (!test_and_clear_bit(nr: I915_FENCE_FLAG_SIGNAL, |
221 | addr: &rq->fence.flags)) |
222 | break; |
223 | |
224 | /* |
225 | * Queue for execution after dropping the signaling |
226 | * spinlock as the callback chain may end up adding |
227 | * more signalers to the same context or engine. |
228 | */ |
229 | spin_lock(lock: &ce->signal_lock); |
230 | list_del_rcu(entry: &rq->signal_link); |
231 | release = remove_signaling_context(b, ce); |
232 | spin_unlock(lock: &ce->signal_lock); |
233 | if (release) { |
234 | if (intel_timeline_is_last(tl: ce->timeline, rq)) |
235 | add_retire(b, tl: ce->timeline); |
236 | intel_context_put(ce); |
237 | } |
238 | |
239 | if (__dma_fence_signal(fence: &rq->fence)) |
240 | /* We own signal_node now, xfer to local list */ |
241 | signal = slist_add(node: &rq->signal_node, head: signal); |
242 | else |
243 | i915_request_put(rq); |
244 | } |
245 | } |
246 | atomic_dec(v: &b->signaler_active); |
247 | rcu_read_unlock(); |
248 | |
249 | llist_for_each_safe(signal, sn, signal) { |
250 | struct i915_request *rq = |
251 | llist_entry(signal, typeof(*rq), signal_node); |
252 | struct list_head cb_list; |
253 | |
254 | if (rq->engine->sched_engine->retire_inflight_request_prio) |
255 | rq->engine->sched_engine->retire_inflight_request_prio(rq); |
256 | |
257 | spin_lock(lock: &rq->lock); |
258 | list_replace(old: &rq->fence.cb_list, new: &cb_list); |
259 | __dma_fence_signal__timestamp(fence: &rq->fence, timestamp); |
260 | __dma_fence_signal__notify(fence: &rq->fence, list: &cb_list); |
261 | spin_unlock(lock: &rq->lock); |
262 | |
263 | i915_request_put(rq); |
264 | } |
265 | |
266 | if (!READ_ONCE(b->irq_armed) && !list_empty(head: &b->signalers)) |
267 | intel_breadcrumbs_arm_irq(b); |
268 | } |
269 | |
270 | struct intel_breadcrumbs * |
271 | intel_breadcrumbs_create(struct intel_engine_cs *irq_engine) |
272 | { |
273 | struct intel_breadcrumbs *b; |
274 | |
275 | b = kzalloc(size: sizeof(*b), GFP_KERNEL); |
276 | if (!b) |
277 | return NULL; |
278 | |
279 | kref_init(kref: &b->ref); |
280 | |
281 | spin_lock_init(&b->signalers_lock); |
282 | INIT_LIST_HEAD(list: &b->signalers); |
283 | init_llist_head(list: &b->signaled_requests); |
284 | |
285 | spin_lock_init(&b->irq_lock); |
286 | init_irq_work(work: &b->irq_work, func: signal_irq_work); |
287 | |
288 | b->irq_engine = irq_engine; |
289 | b->irq_enable = irq_enable; |
290 | b->irq_disable = irq_disable; |
291 | |
292 | return b; |
293 | } |
294 | |
295 | void intel_breadcrumbs_reset(struct intel_breadcrumbs *b) |
296 | { |
297 | unsigned long flags; |
298 | |
299 | if (!b->irq_engine) |
300 | return; |
301 | |
302 | spin_lock_irqsave(&b->irq_lock, flags); |
303 | |
304 | if (b->irq_enabled) |
305 | b->irq_enable(b); |
306 | else |
307 | b->irq_disable(b); |
308 | |
309 | spin_unlock_irqrestore(lock: &b->irq_lock, flags); |
310 | } |
311 | |
312 | void __intel_breadcrumbs_park(struct intel_breadcrumbs *b) |
313 | { |
314 | if (!READ_ONCE(b->irq_armed)) |
315 | return; |
316 | |
317 | /* Kick the work once more to drain the signalers, and disarm the irq */ |
318 | irq_work_sync(work: &b->irq_work); |
319 | while (READ_ONCE(b->irq_armed) && !atomic_read(v: &b->active)) { |
320 | local_irq_disable(); |
321 | signal_irq_work(work: &b->irq_work); |
322 | local_irq_enable(); |
323 | cond_resched(); |
324 | } |
325 | } |
326 | |
327 | void intel_breadcrumbs_free(struct kref *kref) |
328 | { |
329 | struct intel_breadcrumbs *b = container_of(kref, typeof(*b), ref); |
330 | |
331 | irq_work_sync(work: &b->irq_work); |
332 | GEM_BUG_ON(!list_empty(&b->signalers)); |
333 | GEM_BUG_ON(b->irq_armed); |
334 | |
335 | kfree(objp: b); |
336 | } |
337 | |
338 | static void irq_signal_request(struct i915_request *rq, |
339 | struct intel_breadcrumbs *b) |
340 | { |
341 | if (!__dma_fence_signal(fence: &rq->fence)) |
342 | return; |
343 | |
344 | i915_request_get(rq); |
345 | if (llist_add(new: &rq->signal_node, head: &b->signaled_requests)) |
346 | irq_work_queue(work: &b->irq_work); |
347 | } |
348 | |
349 | static void insert_breadcrumb(struct i915_request *rq) |
350 | { |
351 | struct intel_breadcrumbs *b = READ_ONCE(rq->engine)->breadcrumbs; |
352 | struct intel_context *ce = rq->context; |
353 | struct list_head *pos; |
354 | |
355 | if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) |
356 | return; |
357 | |
358 | /* |
359 | * If the request is already completed, we can transfer it |
360 | * straight onto a signaled list, and queue the irq worker for |
361 | * its signal completion. |
362 | */ |
363 | if (__i915_request_is_complete(rq)) { |
364 | irq_signal_request(rq, b); |
365 | return; |
366 | } |
367 | |
368 | if (list_empty(head: &ce->signals)) { |
369 | intel_context_get(ce); |
370 | add_signaling_context(b, ce); |
371 | pos = &ce->signals; |
372 | } else { |
373 | /* |
374 | * We keep the seqno in retirement order, so we can break |
375 | * inside intel_engine_signal_breadcrumbs as soon as we've |
376 | * passed the last completed request (or seen a request that |
377 | * hasn't event started). We could walk the timeline->requests, |
378 | * but keeping a separate signalers_list has the advantage of |
379 | * hopefully being much smaller than the full list and so |
380 | * provides faster iteration and detection when there are no |
381 | * more interrupts required for this context. |
382 | * |
383 | * We typically expect to add new signalers in order, so we |
384 | * start looking for our insertion point from the tail of |
385 | * the list. |
386 | */ |
387 | list_for_each_prev(pos, &ce->signals) { |
388 | struct i915_request *it = |
389 | list_entry(pos, typeof(*it), signal_link); |
390 | |
391 | if (i915_seqno_passed(seq1: rq->fence.seqno, seq2: it->fence.seqno)) |
392 | break; |
393 | } |
394 | } |
395 | |
396 | i915_request_get(rq); |
397 | list_add_rcu(new: &rq->signal_link, head: pos); |
398 | GEM_BUG_ON(!check_signal_order(ce, rq)); |
399 | GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)); |
400 | set_bit(nr: I915_FENCE_FLAG_SIGNAL, addr: &rq->fence.flags); |
401 | |
402 | /* |
403 | * Defer enabling the interrupt to after HW submission and recheck |
404 | * the request as it may have completed and raised the interrupt as |
405 | * we were attaching it into the lists. |
406 | */ |
407 | if (!b->irq_armed || __i915_request_is_complete(rq)) |
408 | irq_work_queue(work: &b->irq_work); |
409 | } |
410 | |
411 | bool i915_request_enable_breadcrumb(struct i915_request *rq) |
412 | { |
413 | struct intel_context *ce = rq->context; |
414 | |
415 | /* Serialises with i915_request_retire() using rq->lock */ |
416 | if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)) |
417 | return true; |
418 | |
419 | /* |
420 | * Peek at i915_request_submit()/i915_request_unsubmit() status. |
421 | * |
422 | * If the request is not yet active (and not signaled), we will |
423 | * attach the breadcrumb later. |
424 | */ |
425 | if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) |
426 | return true; |
427 | |
428 | spin_lock(lock: &ce->signal_lock); |
429 | if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) |
430 | insert_breadcrumb(rq); |
431 | spin_unlock(lock: &ce->signal_lock); |
432 | |
433 | return true; |
434 | } |
435 | |
436 | void i915_request_cancel_breadcrumb(struct i915_request *rq) |
437 | { |
438 | struct intel_breadcrumbs *b = READ_ONCE(rq->engine)->breadcrumbs; |
439 | struct intel_context *ce = rq->context; |
440 | bool release; |
441 | |
442 | spin_lock(lock: &ce->signal_lock); |
443 | if (!test_and_clear_bit(nr: I915_FENCE_FLAG_SIGNAL, addr: &rq->fence.flags)) { |
444 | spin_unlock(lock: &ce->signal_lock); |
445 | return; |
446 | } |
447 | |
448 | list_del_rcu(entry: &rq->signal_link); |
449 | release = remove_signaling_context(b, ce); |
450 | spin_unlock(lock: &ce->signal_lock); |
451 | if (release) |
452 | intel_context_put(ce); |
453 | |
454 | if (__i915_request_is_complete(rq)) |
455 | irq_signal_request(rq, b); |
456 | |
457 | i915_request_put(rq); |
458 | } |
459 | |
460 | void intel_context_remove_breadcrumbs(struct intel_context *ce, |
461 | struct intel_breadcrumbs *b) |
462 | { |
463 | struct i915_request *rq, *rn; |
464 | bool release = false; |
465 | unsigned long flags; |
466 | |
467 | spin_lock_irqsave(&ce->signal_lock, flags); |
468 | |
469 | if (list_empty(head: &ce->signals)) |
470 | goto unlock; |
471 | |
472 | list_for_each_entry_safe(rq, rn, &ce->signals, signal_link) { |
473 | GEM_BUG_ON(!__i915_request_is_complete(rq)); |
474 | if (!test_and_clear_bit(nr: I915_FENCE_FLAG_SIGNAL, |
475 | addr: &rq->fence.flags)) |
476 | continue; |
477 | |
478 | list_del_rcu(entry: &rq->signal_link); |
479 | irq_signal_request(rq, b); |
480 | i915_request_put(rq); |
481 | } |
482 | release = remove_signaling_context(b, ce); |
483 | |
484 | unlock: |
485 | spin_unlock_irqrestore(lock: &ce->signal_lock, flags); |
486 | if (release) |
487 | intel_context_put(ce); |
488 | |
489 | while (atomic_read(v: &b->signaler_active)) |
490 | cpu_relax(); |
491 | } |
492 | |
493 | static void print_signals(struct intel_breadcrumbs *b, struct drm_printer *p) |
494 | { |
495 | struct intel_context *ce; |
496 | struct i915_request *rq; |
497 | |
498 | drm_printf(p, f: "Signals:\n" ); |
499 | |
500 | rcu_read_lock(); |
501 | list_for_each_entry_rcu(ce, &b->signalers, signal_link) { |
502 | list_for_each_entry_rcu(rq, &ce->signals, signal_link) |
503 | drm_printf(p, f: "\t[%llx:%llx%s] @ %dms\n" , |
504 | rq->fence.context, rq->fence.seqno, |
505 | __i915_request_is_complete(rq) ? "!" : |
506 | __i915_request_has_started(rq) ? "*" : |
507 | "" , |
508 | jiffies_to_msecs(j: jiffies - rq->emitted_jiffies)); |
509 | } |
510 | rcu_read_unlock(); |
511 | } |
512 | |
513 | void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine, |
514 | struct drm_printer *p) |
515 | { |
516 | struct intel_breadcrumbs *b; |
517 | |
518 | b = engine->breadcrumbs; |
519 | if (!b) |
520 | return; |
521 | |
522 | drm_printf(p, f: "IRQ: %s\n" , str_enabled_disabled(v: b->irq_armed)); |
523 | if (!list_empty(head: &b->signalers)) |
524 | print_signals(b, p); |
525 | } |
526 | |