intel_breadcrumbs.c source code [linux/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c]

1	// SPDX-License-Identifier: MIT
2	/*
3	* Copyright © 2015-2021 Intel Corporation
4	*/
5
6	#include <linux/kthread.h>
7	#include <linux/string_helpers.h>
8	#include <trace/events/dma_fence.h>
9	#include <uapi/linux/sched/types.h>
10
11	#include "i915_drv.h"
12	#include "i915_trace.h"
13	#include "intel_breadcrumbs.h"
14	#include "intel_context.h"
15	#include "intel_engine_pm.h"
16	#include "intel_gt_pm.h"
17	#include "intel_gt_requests.h"
18
19	static bool irq_enable(struct intel_breadcrumbs *b)
20	{
21	return intel_engine_irq_enable(engine: b->irq_engine);
22	}
23
24	static void irq_disable(struct intel_breadcrumbs *b)
25	{
26	intel_engine_irq_disable(engine: b->irq_engine);
27	}
28
29	static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
30	{
31	intel_wakeref_t wakeref;
32
33	/*
34	* Since we are waiting on a request, the GPU should be busy
35	* and should have its own rpm reference.
36	*/
37	wakeref = intel_gt_pm_get_if_awake(gt: b->irq_engine->gt);
38	if (GEM_WARN_ON(!wakeref))
39	return;
40
41	/*
42	* The breadcrumb irq will be disarmed on the interrupt after the
43	* waiters are signaled. This gives us a single interrupt window in
44	* which we can add a new waiter and avoid the cost of re-enabling
45	* the irq.
46	*/
47	WRITE_ONCE(b->irq_armed, wakeref);
48
49	/ Requests may have completed before we could enable the interrupt. /
50	if (!b->irq_enabled++ && b->irq_enable(b))
51	irq_work_queue(work: &b->irq_work);
52	}
53
54	static void intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
55	{
56	if (!b->irq_engine)
57	return;
58
59	spin_lock(lock: &b->irq_lock);
60	if (!b->irq_armed)
61	__intel_breadcrumbs_arm_irq(b);
62	spin_unlock(lock: &b->irq_lock);
63	}
64
65	static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
66	{
67	intel_wakeref_t wakeref = b->irq_armed;
68
69	GEM_BUG_ON(!b->irq_enabled);
70	if (!--b->irq_enabled)
71	b->irq_disable(b);
72
73	WRITE_ONCE(b->irq_armed, `0`);
74	intel_gt_pm_put_async(gt: b->irq_engine->gt, handle: wakeref);
75	}
76
77	static void intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
78	{
79	spin_lock(lock: &b->irq_lock);
80	if (b->irq_armed)
81	__intel_breadcrumbs_disarm_irq(b);
82	spin_unlock(lock: &b->irq_lock);
83	}
84
85	static void add_signaling_context(struct intel_breadcrumbs *b,
86	struct intel_context *ce)
87	{
88	lockdep_assert_held(&ce->signal_lock);
89
90	spin_lock(lock: &b->signalers_lock);
91	list_add_rcu(new: &ce->signal_link, head: &b->signalers);
92	spin_unlock(lock: &b->signalers_lock);
93	}
94
95	static bool remove_signaling_context(struct intel_breadcrumbs *b,
96	struct intel_context *ce)
97	{
98	lockdep_assert_held(&ce->signal_lock);
99
100	if (!list_empty(head: &ce->signals))
101	return false;
102
103	spin_lock(lock: &b->signalers_lock);
104	list_del_rcu(entry: &ce->signal_link);
105	spin_unlock(lock: &b->signalers_lock);
106
107	return true;
108	}
109
110	__maybe_unused static bool
111	check_signal_order(struct intel_context ce, struct* i915_request *rq)
112	{
113	if (rq->context != ce)
114	return false;
115
116	if (!list_is_last(list: &rq->signal_link, head: &ce->signals) &&
117	i915_seqno_passed(seq1: rq->fence.seqno,
118	list_next_entry(rq, signal_link)->fence.seqno))
119	return false;
120
121	if (!list_is_first(list: &rq->signal_link, head: &ce->signals) &&
122	i915_seqno_passed(list_prev_entry(rq, signal_link)->fence.seqno,
123	seq2: rq->fence.seqno))
124	return false;
125
126	return true;
127	}
128
129	static bool
130	__dma_fence_signal(struct dma_fence *fence)
131	{
132	return !test_and_set_bit(nr: DMA_FENCE_FLAG_SIGNALED_BIT, addr: &fence->flags);
133	}
134
135	static void
136	__dma_fence_signal__timestamp(struct dma_fence *fence, ktime_t timestamp)
137	{
138	fence->timestamp = timestamp;
139	set_bit(nr: DMA_FENCE_FLAG_TIMESTAMP_BIT, addr: &fence->flags);
140	trace_dma_fence_signaled(fence);
141	}
142
143	static void
144	__dma_fence_signal__notify(struct dma_fence *fence,
145	const struct list_head *list)
146	{
147	struct dma_fence_cb cur, tmp;
148
149	lockdep_assert_held(fence->lock);
150
151	list_for_each_entry_safe(cur, tmp, list, node) {
152	INIT_LIST_HEAD(list: &cur->node);
153	cur->func(fence, cur);
154	}
155	}
156
157	static void add_retire(struct intel_breadcrumbs b, struct* intel_timeline *tl)
158	{
159	if (b->irq_engine)
160	intel_engine_add_retire(engine: b->irq_engine, tl);
161	}
162
163	static struct llist_node *
164	slist_add(struct llist_node node, struct* llist_node *head)
165	{
166	node->next = head;
167	return node;
168	}
169
170	static void signal_irq_work(struct irq_work *work)
171	{
172	struct intel_breadcrumbs b = container_of(work, typeof(b), irq_work);
173	const ktime_t timestamp = ktime_get();
174	struct llist_node signal, sn;
175	struct intel_context *ce;
176
177	signal = NULL;
178	if (unlikely(!llist_empty(&b->signaled_requests)))
179	signal = llist_del_all(head: &b->signaled_requests);
180
181	/*
182	* Keep the irq armed until the interrupt after all listeners are gone.
183	*
184	* Enabling/disabling the interrupt is rather costly, roughly a couple
185	* of hundred microseconds. If we are proactive and enable/disable
186	* the interrupt around every request that wants a breadcrumb, we
187	* quickly drown in the extra orders of magnitude of latency imposed
188	* on request submission.
189	*
190	* So we try to be lazy, and keep the interrupts enabled until no
191	* more listeners appear within a breadcrumb interrupt interval (that
192	* is until a request completes that no one cares about). The
193	* observation is that listeners come in batches, and will often
194	* listen to a bunch of requests in succession. Though note on icl+,
195	* interrupts are always enabled due to concerns with rc6 being
196	* dysfunctional with per-engine interrupt masking.
197	*
198	* We also try to avoid raising too many interrupts, as they may
199	* be generated by userspace batches and it is unfortunately rather
200	* too easy to drown the CPU under a flood of GPU interrupts. Thus
201	* whenever no one appears to be listening, we turn off the interrupts.
202	* Fewer interrupts should conserve power -- at the very least, fewer
203	* interrupt draw less ire from other users of the system and tools
204	* like powertop.
205	*/
206	if (!signal && READ_ONCE(b->irq_armed) && list_empty(head: &b->signalers))
207	intel_breadcrumbs_disarm_irq(b);
208
209	rcu_read_lock();
210	atomic_inc(v: &b->signaler_active);
211	list_for_each_entry_rcu(ce, &b->signalers, signal_link) {
212	struct i915_request *rq;
213
214	list_for_each_entry_rcu(rq, &ce->signals, signal_link) {
215	bool release;
216
217	if (!__i915_request_is_complete(rq))
218	break;
219
220	if (!test_and_clear_bit(nr: I915_FENCE_FLAG_SIGNAL,
221	addr: &rq->fence.flags))
222	break;
223
224	/*
225	* Queue for execution after dropping the signaling
226	* spinlock as the callback chain may end up adding
227	* more signalers to the same context or engine.
228	*/
229	spin_lock(lock: &ce->signal_lock);
230	list_del_rcu(entry: &rq->signal_link);
231	release = remove_signaling_context(b, ce);
232	spin_unlock(lock: &ce->signal_lock);
233	if (release) {
234	if (intel_timeline_is_last(tl: ce->timeline, rq))
235	add_retire(b, tl: ce->timeline);
236	intel_context_put(ce);
237	}
238
239	if (__dma_fence_signal(fence: &rq->fence))
240	/ We own signal_node now, xfer to local list /
241	signal = slist_add(node: &rq->signal_node, head: signal);
242	else
243	i915_request_put(rq);
244	}
245	}
246	atomic_dec(v: &b->signaler_active);
247	rcu_read_unlock();
248
249	llist_for_each_safe(signal, sn, signal) {
250	struct i915_request *rq =
251	llist_entry(signal, typeof(*rq), signal_node);
252	struct list_head cb_list;
253
254	if (rq->engine->sched_engine->retire_inflight_request_prio)
255	rq->engine->sched_engine->retire_inflight_request_prio(rq);
256
257	spin_lock(lock: &rq->lock);
258	list_replace(old: &rq->fence.cb_list, new: &cb_list);
259	__dma_fence_signal__timestamp(fence: &rq->fence, timestamp);
260	__dma_fence_signal__notify(fence: &rq->fence, list: &cb_list);
261	spin_unlock(lock: &rq->lock);
262
263	i915_request_put(rq);
264	}
265
266	if (!READ_ONCE(b->irq_armed) && !list_empty(head: &b->signalers))
267	intel_breadcrumbs_arm_irq(b);
268	}
269
270	struct intel_breadcrumbs *
271	intel_breadcrumbs_create(struct intel_engine_cs *irq_engine)
272	{
273	struct intel_breadcrumbs *b;
274
275	b = kzalloc(size: sizeof(*b), GFP_KERNEL);
276	if (!b)
277	return NULL;
278
279	kref_init(kref: &b->ref);
280
281	spin_lock_init(&b->signalers_lock);
282	INIT_LIST_HEAD(list: &b->signalers);
283	init_llist_head(list: &b->signaled_requests);
284
285	spin_lock_init(&b->irq_lock);
286	init_irq_work(work: &b->irq_work, func: signal_irq_work);
287
288	b->irq_engine = irq_engine;
289	b->irq_enable = irq_enable;
290	b->irq_disable = irq_disable;
291
292	return b;
293	}
294
295	void intel_breadcrumbs_reset(struct intel_breadcrumbs *b)
296	{
297	unsigned long flags;
298
299	if (!b->irq_engine)
300	return;
301
302	spin_lock_irqsave(&b->irq_lock, flags);
303
304	if (b->irq_enabled)
305	b->irq_enable(b);
306	else
307	b->irq_disable(b);
308
309	spin_unlock_irqrestore(lock: &b->irq_lock, flags);
310	}
311
312	void __intel_breadcrumbs_park(struct intel_breadcrumbs *b)
313	{
314	if (!READ_ONCE(b->irq_armed))
315	return;
316
317	/ Kick the work once more to drain the signalers, and disarm the irq /
318	irq_work_sync(work: &b->irq_work);
319	while (READ_ONCE(b->irq_armed) && !atomic_read(v: &b->active)) {
320	local_irq_disable();
321	signal_irq_work(work: &b->irq_work);
322	local_irq_enable();
323	cond_resched();
324	}
325	}
326
327	void intel_breadcrumbs_free(struct kref *kref)
328	{
329	struct intel_breadcrumbs b = container_of(kref, typeof(b), ref);
330
331	irq_work_sync(work: &b->irq_work);
332	GEM_BUG_ON(!list_empty(&b->signalers));
333	GEM_BUG_ON(b->irq_armed);
334
335	kfree(objp: b);
336	}
337
338	static void irq_signal_request(struct i915_request *rq,
339	struct intel_breadcrumbs *b)
340	{
341	if (!__dma_fence_signal(fence: &rq->fence))
342	return;
343
344	i915_request_get(rq);
345	if (llist_add(new: &rq->signal_node, head: &b->signaled_requests))
346	irq_work_queue(work: &b->irq_work);
347	}
348
349	static void insert_breadcrumb(struct i915_request *rq)
350	{
351	struct intel_breadcrumbs *b = READ_ONCE(rq->engine)->breadcrumbs;
352	struct intel_context *ce = rq->context;
353	struct list_head *pos;
354
355	if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
356	return;
357
358	/*
359	* If the request is already completed, we can transfer it
360	* straight onto a signaled list, and queue the irq worker for
361	* its signal completion.
362	*/
363	if (__i915_request_is_complete(rq)) {
364	irq_signal_request(rq, b);
365	return;
366	}
367
368	if (list_empty(head: &ce->signals)) {
369	intel_context_get(ce);
370	add_signaling_context(b, ce);
371	pos = &ce->signals;
372	} else {
373	/*
374	* We keep the seqno in retirement order, so we can break
375	* inside intel_engine_signal_breadcrumbs as soon as we've
376	* passed the last completed request (or seen a request that
377	* hasn't event started). We could walk the timeline->requests,
378	* but keeping a separate signalers_list has the advantage of
379	* hopefully being much smaller than the full list and so
380	* provides faster iteration and detection when there are no
381	* more interrupts required for this context.
382	*
383	* We typically expect to add new signalers in order, so we
384	* start looking for our insertion point from the tail of
385	* the list.
386	*/
387	list_for_each_prev(pos, &ce->signals) {
388	struct i915_request *it =
389	list_entry(pos, typeof(*it), signal_link);
390
391	if (i915_seqno_passed(seq1: rq->fence.seqno, seq2: it->fence.seqno))
392	break;
393	}
394	}
395
396	i915_request_get(rq);
397	list_add_rcu(new: &rq->signal_link, head: pos);
398	GEM_BUG_ON(!check_signal_order(ce, rq));
399	GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags));
400	set_bit(nr: I915_FENCE_FLAG_SIGNAL, addr: &rq->fence.flags);
401
402	/*
403	* Defer enabling the interrupt to after HW submission and recheck
404	* the request as it may have completed and raised the interrupt as
405	* we were attaching it into the lists.
406	*/
407	if (!b->irq_armed \|\| __i915_request_is_complete(rq))
408	irq_work_queue(work: &b->irq_work);
409	}
410
411	bool i915_request_enable_breadcrumb(struct i915_request *rq)
412	{
413	struct intel_context *ce = rq->context;
414
415	/ Serialises with i915_request_retire() using rq->lock /
416	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
417	return true;
418
419	/*
420	* Peek at i915_request_submit()/i915_request_unsubmit() status.
421	*
422	* If the request is not yet active (and not signaled), we will
423	* attach the breadcrumb later.
424	*/
425	if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
426	return true;
427
428	spin_lock(lock: &ce->signal_lock);
429	if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
430	insert_breadcrumb(rq);
431	spin_unlock(lock: &ce->signal_lock);
432
433	return true;
434	}
435
436	void i915_request_cancel_breadcrumb(struct i915_request *rq)
437	{
438	struct intel_breadcrumbs *b = READ_ONCE(rq->engine)->breadcrumbs;
439	struct intel_context *ce = rq->context;
440	bool release;
441
442	spin_lock(lock: &ce->signal_lock);
443	if (!test_and_clear_bit(nr: I915_FENCE_FLAG_SIGNAL, addr: &rq->fence.flags)) {
444	spin_unlock(lock: &ce->signal_lock);
445	return;
446	}
447
448	list_del_rcu(entry: &rq->signal_link);
449	release = remove_signaling_context(b, ce);
450	spin_unlock(lock: &ce->signal_lock);
451	if (release)
452	intel_context_put(ce);
453
454	if (__i915_request_is_complete(rq))
455	irq_signal_request(rq, b);
456
457	i915_request_put(rq);
458	}
459
460	void intel_context_remove_breadcrumbs(struct intel_context *ce,
461	struct intel_breadcrumbs *b)
462	{
463	struct i915_request rq, rn;
464	bool release = false;
465	unsigned long flags;
466
467	spin_lock_irqsave(&ce->signal_lock, flags);
468
469	if (list_empty(head: &ce->signals))
470	goto unlock;
471
472	list_for_each_entry_safe(rq, rn, &ce->signals, signal_link) {
473	GEM_BUG_ON(!__i915_request_is_complete(rq));
474	if (!test_and_clear_bit(nr: I915_FENCE_FLAG_SIGNAL,
475	addr: &rq->fence.flags))
476	continue;
477
478	list_del_rcu(entry: &rq->signal_link);
479	irq_signal_request(rq, b);
480	i915_request_put(rq);
481	}
482	release = remove_signaling_context(b, ce);
483
484	unlock:
485	spin_unlock_irqrestore(lock: &ce->signal_lock, flags);
486	if (release)
487	intel_context_put(ce);
488
489	while (atomic_read(v: &b->signaler_active))
490	cpu_relax();
491	}
492
493	static void print_signals(struct intel_breadcrumbs b, struct* drm_printer *p)
494	{
495	struct intel_context *ce;
496	struct i915_request *rq;
497
498	drm_printf(p, f: "Signals:\n");
499
500	rcu_read_lock();
501	list_for_each_entry_rcu(ce, &b->signalers, signal_link) {
502	list_for_each_entry_rcu(rq, &ce->signals, signal_link)
503	drm_printf(p, f: "\t[%llx:%llx%s] @ %dms\n",
504	rq->fence.context, rq->fence.seqno,
505	__i915_request_is_complete(rq) ? "!" :
506	__i915_request_has_started(rq) ? "*" :
507	"",
508	jiffies_to_msecs(j: jiffies - rq->emitted_jiffies));
509	}
510	rcu_read_unlock();
511	}
512
513	void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
514	struct drm_printer *p)
515	{
516	struct intel_breadcrumbs *b;
517
518	b = engine->breadcrumbs;
519	if (!b)
520	return;
521
522	drm_printf(p, f: "IRQ: %s\n", str_enabled_disabled(v: b->irq_armed));
523	if (!list_empty(head: &b->signalers))
524	print_signals(b, p);
525	}
526

source code of linux/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c