i915_request.c source code [linux/drivers/gpu/drm/i915/i915_request.c]

1	/*
2	* Copyright © 2008-2015 Intel Corporation
3	*
4	* Permission is hereby granted, free of charge, to any person obtaining a
5	* copy of this software and associated documentation files (the "Software"),
6	* to deal in the Software without restriction, including without limitation
7	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8	* and/or sell copies of the Software, and to permit persons to whom the
9	* Software is furnished to do so, subject to the following conditions:
10	*
11	* The above copyright notice and this permission notice (including the next
12	* paragraph) shall be included in all copies or substantial portions of the
13	* Software.
14	*
15	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21	* IN THE SOFTWARE.
22	*
23	*/
24
25	#include <linux/dma-fence-array.h>
26	#include <linux/dma-fence-chain.h>
27	#include <linux/irq_work.h>
28	#include <linux/prefetch.h>
29	#include <linux/sched.h>
30	#include <linux/sched/clock.h>
31	#include <linux/sched/signal.h>
32	#include <linux/sched/mm.h>
33
34	#include "gem/i915_gem_context.h"
35	#include "gt/intel_breadcrumbs.h"
36	#include "gt/intel_context.h"
37	#include "gt/intel_engine.h"
38	#include "gt/intel_engine_heartbeat.h"
39	#include "gt/intel_engine_regs.h"
40	#include "gt/intel_gpu_commands.h"
41	#include "gt/intel_reset.h"
42	#include "gt/intel_ring.h"
43	#include "gt/intel_rps.h"
44
45	#include "i915_active.h"
46	#include "i915_config.h"
47	#include "i915_deps.h"
48	#include "i915_driver.h"
49	#include "i915_drv.h"
50	#include "i915_trace.h"
51
52	struct execute_cb {
53	struct irq_work work;
54	struct i915_sw_fence *fence;
55	struct i915_request *signal;
56	};
57
58	static struct kmem_cache *slab_requests;
59	static struct kmem_cache *slab_execute_cbs;
60
61	static const char i915_fence_get_driver_name(struct* dma_fence *fence)
62	{
63	return dev_name(dev: to_request(fence)->i915->drm.dev);
64	}
65
66	static const char i915_fence_get_timeline_name(struct* dma_fence *fence)
67	{
68	const struct i915_gem_context *ctx;
69
70	/*
71	* The timeline struct (as part of the ppgtt underneath a context)
72	* may be freed when the request is no longer in use by the GPU.
73	* We could extend the life of a context to beyond that of all
74	* fences, possibly keeping the hw resource around indefinitely,
75	* or we just give them a false name. Since
76	* dma_fence_ops.get_timeline_name is a debug feature, the occasional
77	* lie seems justifiable.
78	*/
79	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
80	return "signaled";
81
82	ctx = i915_request_gem_context(rq: to_request(fence));
83	if (!ctx)
84	return "[" DRIVER_NAME "]";
85
86	return ctx->name;
87	}
88
89	static bool i915_fence_signaled(struct dma_fence *fence)
90	{
91	return i915_request_completed(rq: to_request(fence));
92	}
93
94	static bool i915_fence_enable_signaling(struct dma_fence *fence)
95	{
96	return i915_request_enable_breadcrumb(request: to_request(fence));
97	}
98
99	static signed long i915_fence_wait(struct dma_fence *fence,
100	bool interruptible,
101	signed long timeout)
102	{
103	return i915_request_wait_timeout(rq: to_request(fence),
104	flags: interruptible \| I915_WAIT_PRIORITY,
105	timeout);
106	}
107
108	struct kmem_cache i915_request_slab_cache(void*)
109	{
110	return slab_requests;
111	}
112
113	static void i915_fence_release(struct dma_fence *fence)
114	{
115	struct i915_request *rq = to_request(fence);
116
117	GEM_BUG_ON(rq->guc_prio != GUC_PRIO_INIT &&
118	rq->guc_prio != GUC_PRIO_FINI);
119
120	i915_request_free_capture_list(fetch_and_zero(&rq->capture_list));
121	if (rq->batch_res) {
122	i915_vma_resource_put(vma_res: rq->batch_res);
123	rq->batch_res = NULL;
124	}
125
126	/*
127	* The request is put onto a RCU freelist (i.e. the address
128	* is immediately reused), mark the fences as being freed now.
129	* Otherwise the debugobjects for the fences are only marked as
130	* freed when the slab cache itself is freed, and so we would get
131	* caught trying to reuse dead objects.
132	*/
133	i915_sw_fence_fini(fence: &rq->submit);
134	i915_sw_fence_fini(fence: &rq->semaphore);
135
136	/*
137	* Keep one request on each engine for reserved use under mempressure.
138	*
139	* We do not hold a reference to the engine here and so have to be
140	* very careful in what rq->engine we poke. The virtual engine is
141	* referenced via the rq->context and we released that ref during
142	* i915_request_retire(), ergo we must not dereference a virtual
143	* engine here. Not that we would want to, as the only consumer of
144	* the reserved engine->request_pool is the power management parking,
145	* which must-not-fail, and that is only run on the physical engines.
146	*
147	* Since the request must have been executed to be have completed,
148	* we know that it will have been processed by the HW and will
149	* not be unsubmitted again, so rq->engine and rq->execution_mask
150	* at this point is stable. rq->execution_mask will be a single
151	* bit if the last and _only_ engine it could execution on was a
152	* physical engine, if it's multiple bits then it started on and
153	* could still be on a virtual engine. Thus if the mask is not a
154	* power-of-two we assume that rq->engine may still be a virtual
155	* engine and so a dangling invalid pointer that we cannot dereference
156	*
157	* For example, consider the flow of a bonded request through a virtual
158	* engine. The request is created with a wide engine mask (all engines
159	* that we might execute on). On processing the bond, the request mask
160	* is reduced to one or more engines. If the request is subsequently
161	* bound to a single engine, it will then be constrained to only
162	* execute on that engine and never returned to the virtual engine
163	* after timeslicing away, see __unwind_incomplete_requests(). Thus we
164	* know that if the rq->execution_mask is a single bit, rq->engine
165	* can be a physical engine with the exact corresponding mask.
166	*/
167	if (is_power_of_2(n: rq->execution_mask) &&
168	!cmpxchg(&rq->engine->request_pool, NULL, rq))
169	return;
170
171	kmem_cache_free(s: slab_requests, objp: rq);
172	}
173
174	const struct dma_fence_ops i915_fence_ops = {
175	.get_driver_name = i915_fence_get_driver_name,
176	.get_timeline_name = i915_fence_get_timeline_name,
177	.enable_signaling = i915_fence_enable_signaling,
178	.signaled = i915_fence_signaled,
179	.wait = i915_fence_wait,
180	.release = i915_fence_release,
181	};
182
183	static void irq_execute_cb(struct irq_work *wrk)
184	{
185	struct execute_cb cb = container_of(wrk, typeof(cb), work);
186
187	i915_sw_fence_complete(fence: cb->fence);
188	kmem_cache_free(s: slab_execute_cbs, objp: cb);
189	}
190
191	static __always_inline void
192	__notify_execute_cb(struct i915_request rq, bool (fn)(struct irq_work *wrk))
193	{
194	struct execute_cb cb, cn;
195
196	if (llist_empty(head: &rq->execute_cb))
197	return;
198
199	llist_for_each_entry_safe(cb, cn,
200	llist_del_all(&rq->execute_cb),
201	work.node.llist)
202	fn(&cb->work);
203	}
204
205	static void __notify_execute_cb_irq(struct i915_request *rq)
206	{
207	__notify_execute_cb(rq, fn: irq_work_queue);
208	}
209
210	static bool irq_work_imm(struct irq_work *wrk)
211	{
212	wrk->func(wrk);
213	return false;
214	}
215
216	void i915_request_notify_execute_cb_imm(struct i915_request *rq)
217	{
218	__notify_execute_cb(rq, fn: irq_work_imm);
219	}
220
221	static void __i915_request_fill(struct i915_request *rq, u8 val)
222	{
223	void *vaddr = rq->ring->vaddr;
224	u32 head;
225
226	head = rq->infix;
227	if (rq->postfix < head) {
228	memset(vaddr + head, val, rq->ring->size - head);
229	head = `0`;
230	}
231	memset(vaddr + head, val, rq->postfix - head);
232	}
233
234	/**
235	* i915_request_active_engine
236	* @rq: request to inspect
237	* @active: pointer in which to return the active engine
238	*
239	* Fills the currently active engine to the @active pointer if the request
240	* is active and still not completed.
241	*
242	* Returns true if request was active or false otherwise.
243	*/
244	bool
245	i915_request_active_engine(struct i915_request *rq,
246	struct intel_engine_cs **active)
247	{
248	struct intel_engine_cs engine, locked;
249	bool ret = false;
250
251	/*
252	* Serialise with __i915_request_submit() so that it sees
253	* is-banned?, or we know the request is already inflight.
254	*
255	* Note that rq->engine is unstable, and so we double
256	* check that we have acquired the lock on the final engine.
257	*/
258	locked = READ_ONCE(rq->engine);
259	spin_lock_irq(lock: &locked->sched_engine->lock);
260	while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) {
261	spin_unlock(lock: &locked->sched_engine->lock);
262	locked = engine;
263	spin_lock(lock: &locked->sched_engine->lock);
264	}
265
266	if (i915_request_is_active(rq)) {
267	if (!__i915_request_is_complete(rq))
268	*active = locked;
269	ret = true;
270	}
271
272	spin_unlock_irq(lock: &locked->sched_engine->lock);
273
274	return ret;
275	}
276
277	static void __rq_init_watchdog(struct i915_request *rq)
278	{
279	rq->watchdog.timer.function = NULL;
280	}
281
282	static enum hrtimer_restart __rq_watchdog_expired(struct hrtimer *hrtimer)
283	{
284	struct i915_request *rq =
285	container_of(hrtimer, struct i915_request, watchdog.timer);
286	struct intel_gt *gt = rq->engine->gt;
287
288	if (!i915_request_completed(rq)) {
289	if (llist_add(new: &rq->watchdog.link, head: &gt->watchdog.list))
290	queue_work(wq: gt->i915->unordered_wq, work: &gt->watchdog.work);
291	} else {
292	i915_request_put(rq);
293	}
294
295	return HRTIMER_NORESTART;
296	}
297
298	static void __rq_arm_watchdog(struct i915_request *rq)
299	{
300	struct i915_request_watchdog *wdg = &rq->watchdog;
301	struct intel_context *ce = rq->context;
302
303	if (!ce->watchdog.timeout_us)
304	return;
305
306	i915_request_get(rq);
307
308	hrtimer_init(timer: &wdg->timer, CLOCK_MONOTONIC, mode: HRTIMER_MODE_REL);
309	wdg->timer.function = __rq_watchdog_expired;
310	hrtimer_start_range_ns(timer: &wdg->timer,
311	tim: ns_to_ktime(ns: ce->watchdog.timeout_us *
312	NSEC_PER_USEC),
313	NSEC_PER_MSEC,
314	mode: HRTIMER_MODE_REL);
315	}
316
317	static void __rq_cancel_watchdog(struct i915_request *rq)
318	{
319	struct i915_request_watchdog *wdg = &rq->watchdog;
320
321	if (wdg->timer.function && hrtimer_try_to_cancel(timer: &wdg->timer) > `0`)
322	i915_request_put(rq);
323	}
324
325	#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
326
327	/**
328	* i915_request_free_capture_list - Free a capture list
329	* @capture: Pointer to the first list item or NULL
330	*
331	*/
332	void i915_request_free_capture_list(struct i915_capture_list *capture)
333	{
334	while (capture) {
335	struct i915_capture_list *next = capture->next;
336
337	i915_vma_resource_put(vma_res: capture->vma_res);
338	kfree(objp: capture);
339	capture = next;
340	}
341	}
342
343	#define assert_capture_list_is_null(_rq) GEM_BUG_ON((_rq)->capture_list)
344
345	#define clear_capture_list(_rq) ((_rq)->capture_list = NULL)
346
347	#else
348
349	#define i915_request_free_capture_list(_a) do {} while (0)
350
351	#define assert_capture_list_is_null(_a) do {} while (0)
352
353	#define clear_capture_list(_rq) do {} while (0)
354
355	#endif
356
357	bool i915_request_retire(struct i915_request *rq)
358	{
359	if (!__i915_request_is_complete(rq))
360	return false;
361
362	RQ_TRACE(rq, "\n");
363
364	GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
365	trace_i915_request_retire(rq);
366	i915_request_mark_complete(rq);
367
368	__rq_cancel_watchdog(rq);
369
370	/*
371	* We know the GPU must have read the request to have
372	* sent us the seqno + interrupt, so use the position
373	* of tail of the request to update the last known position
374	* of the GPU head.
375	*
376	* Note this requires that we are always called in request
377	* completion order.
378	*/
379	GEM_BUG_ON(!list_is_first(&rq->link,
380	&i915_request_timeline(rq)->requests));
381	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
382	/ Poison before we release our space in the ring /
383	__i915_request_fill(rq, POISON_FREE);
384	rq->ring->head = rq->postfix;
385
386	if (!i915_request_signaled(rq)) {
387	spin_lock_irq(lock: &rq->lock);
388	dma_fence_signal_locked(fence: &rq->fence);
389	spin_unlock_irq(lock: &rq->lock);
390	}
391
392	if (test_and_set_bit(nr: I915_FENCE_FLAG_BOOST, addr: &rq->fence.flags))
393	intel_rps_dec_waiters(rps: &rq->engine->gt->rps);
394
395	/*
396	* We only loosely track inflight requests across preemption,
397	* and so we may find ourselves attempting to retire a _completed_
398	* request that we have removed from the HW and put back on a run
399	* queue.
400	*
401	* As we set I915_FENCE_FLAG_ACTIVE on the request, this should be
402	* after removing the breadcrumb and signaling it, so that we do not
403	* inadvertently attach the breadcrumb to a completed request.
404	*/
405	rq->engine->remove_active_request(rq);
406	GEM_BUG_ON(!llist_empty(&rq->execute_cb));
407
408	__list_del_entry(entry: &rq->link); / poison neither prev/next (RCU walks) /
409
410	intel_context_exit(ce: rq->context);
411	intel_context_unpin(ce: rq->context);
412
413	i915_sched_node_fini(node: &rq->sched);
414	i915_request_put(rq);
415
416	return true;
417	}
418
419	void i915_request_retire_upto(struct i915_request *rq)
420	{
421	struct intel_timeline * const tl = i915_request_timeline(rq);
422	struct i915_request *tmp;
423
424	RQ_TRACE(rq, "\n");
425	GEM_BUG_ON(!__i915_request_is_complete(rq));
426
427	do {
428	tmp = list_first_entry(&tl->requests, typeof(*tmp), link);
429	GEM_BUG_ON(!i915_request_completed(tmp));
430	} while (i915_request_retire(rq: tmp) && tmp != rq);
431	}
432
433	static struct i915_request * const *
434	__engine_active(struct intel_engine_cs *engine)
435	{
436	return READ_ONCE(engine->execlists.active);
437	}
438
439	static bool __request_in_flight(const struct i915_request *signal)
440	{
441	struct i915_request * const port, rq;
442	bool inflight = false;
443
444	if (!i915_request_is_ready(rq: signal))
445	return false;
446
447	/*
448	* Even if we have unwound the request, it may still be on
449	* the GPU (preempt-to-busy). If that request is inside an
450	* unpreemptible critical section, it will not be removed. Some
451	* GPU functions may even be stuck waiting for the paired request
452	* (__await_execution) to be submitted and cannot be preempted
453	* until the bond is executing.
454	*
455	* As we know that there are always preemption points between
456	* requests, we know that only the currently executing request
457	* may be still active even though we have cleared the flag.
458	* However, we can't rely on our tracking of ELSP[0] to know
459	* which request is currently active and so maybe stuck, as
460	* the tracking maybe an event behind. Instead assume that
461	* if the context is still inflight, then it is still active
462	* even if the active flag has been cleared.
463	*
464	* To further complicate matters, if there a pending promotion, the HW
465	* may either perform a context switch to the second inflight execlists,
466	* or it may switch to the pending set of execlists. In the case of the
467	* latter, it may send the ACK and we process the event copying the
468	* pending[] over top of inflight[], _overwriting_ our *active. Since
469	* this implies the HW is arbitrating and not struck in *active, we do
470	* not worry about complete accuracy, but we do require no read/write
471	* tearing of the pointer [the read of the pointer must be valid, even
472	* as the array is being overwritten, for which we require the writes
473	* to avoid tearing.]
474	*
475	* Note that the read of *execlists->active may race with the promotion
476	* of execlists->pending[] to execlists->inflight[], overwritting
477	* the value at *execlists->active. This is fine. The promotion implies
478	* that we received an ACK from the HW, and so the context is not
479	* stuck -- if we do not see ourselves in *active, the inflight status
480	* is valid. If instead we see ourselves being copied into *active,
481	* we are inflight and may signal the callback.
482	*/
483	if (!intel_context_inflight(signal->context))
484	return false;
485
486	rcu_read_lock();
487	for (port = __engine_active(engine: signal->engine);
488	(rq = READ_ONCE(port)); /* may race with promotion of pending[] /
489	port++) {
490	if (rq->context == signal->context) {
491	inflight = i915_seqno_passed(seq1: rq->fence.seqno,
492	seq2: signal->fence.seqno);
493	break;
494	}
495	}
496	rcu_read_unlock();
497
498	return inflight;
499	}
500
501	static int
502	__await_execution(struct i915_request *rq,
503	struct i915_request *signal,
504	gfp_t gfp)
505	{
506	struct execute_cb *cb;
507
508	if (i915_request_is_active(rq: signal))
509	return `0`;
510
511	cb = kmem_cache_alloc(cachep: slab_execute_cbs, flags: gfp);
512	if (!cb)
513	return -ENOMEM;
514
515	cb->fence = &rq->submit;
516	i915_sw_fence_await(fence: cb->fence);
517	init_irq_work(work: &cb->work, func: irq_execute_cb);
518
519	/*
520	* Register the callback first, then see if the signaler is already
521	* active. This ensures that if we race with the
522	* __notify_execute_cb from i915_request_submit() and we are not
523	* included in that list, we get a second bite of the cherry and
524	* execute it ourselves. After this point, a future
525	* i915_request_submit() will notify us.
526	*
527	* In i915_request_retire() we set the ACTIVE bit on a completed
528	* request (then flush the execute_cb). So by registering the
529	* callback first, then checking the ACTIVE bit, we serialise with
530	* the completed/retired request.
531	*/
532	if (llist_add(new: &cb->work.node.llist, head: &signal->execute_cb)) {
533	if (i915_request_is_active(rq: signal) \|\|
534	__request_in_flight(signal))
535	i915_request_notify_execute_cb_imm(rq: signal);
536	}
537
538	return `0`;
539	}
540
541	static bool fatal_error(int error)
542	{
543	switch (error) {
544	case `0`: / not an error! /
545	case -EAGAIN: / innocent victim of a GT reset (__i915_request_reset) /
546	case -ETIMEDOUT: / waiting for Godot (timer_i915_sw_fence_wake) /
547	return false;
548	default:
549	return true;
550	}
551	}
552
553	void __i915_request_skip(struct i915_request *rq)
554	{
555	GEM_BUG_ON(!fatal_error(rq->fence.error));
556
557	if (rq->infix == rq->postfix)
558	return;
559
560	RQ_TRACE(rq, "error: %d\n", rq->fence.error);
561
562	/*
563	* As this request likely depends on state from the lost
564	* context, clear out all the user operations leaving the
565	* breadcrumb at the end (so we get the fence notifications).
566	*/
567	__i915_request_fill(rq, val: `0`);
568	rq->infix = rq->postfix;
569	}
570
571	bool i915_request_set_error_once(struct i915_request rq, int* error)
572	{
573	int old;
574
575	GEM_BUG_ON(!IS_ERR_VALUE((long)error));
576
577	if (i915_request_signaled(rq))
578	return false;
579
580	old = READ_ONCE(rq->fence.error);
581	do {
582	if (fatal_error(error: old))
583	return false;
584	} while (!try_cmpxchg(&rq->fence.error, &old, error));
585
586	return true;
587	}
588
589	struct i915_request i915_request_mark_eio(struct* i915_request *rq)
590	{
591	if (__i915_request_is_complete(rq))
592	return NULL;
593
594	GEM_BUG_ON(i915_request_signaled(rq));
595
596	/ As soon as the request is completed, it may be retired /
597	rq = i915_request_get(rq);
598
599	i915_request_set_error_once(rq, error: -EIO);
600	i915_request_mark_complete(rq);
601
602	return rq;
603	}
604
605	bool __i915_request_submit(struct i915_request *request)
606	{
607	struct intel_engine_cs *engine = request->engine;
608	bool result = false;
609
610	RQ_TRACE(request, "\n");
611
612	GEM_BUG_ON(!irqs_disabled());
613	lockdep_assert_held(&engine->sched_engine->lock);
614
615	/*
616	* With the advent of preempt-to-busy, we frequently encounter
617	* requests that we have unsubmitted from HW, but left running
618	* until the next ack and so have completed in the meantime. On
619	* resubmission of that completed request, we can skip
620	* updating the payload, and execlists can even skip submitting
621	* the request.
622	*
623	* We must remove the request from the caller's priority queue,
624	* and the caller must only call us when the request is in their
625	* priority queue, under the sched_engine->lock. This ensures that the
626	* request has not yet been retired and we can safely move
627	* the request into the engine->active.list where it will be
628	* dropped upon retiring. (Otherwise if resubmit a retired
629	* request, this would be a horrible use-after-free.)
630	*/
631	if (__i915_request_is_complete(rq: request)) {
632	list_del_init(entry: &request->sched.link);
633	goto active;
634	}
635
636	if (unlikely(!intel_context_is_schedulable(request->context)))
637	i915_request_set_error_once(rq: request, error: -EIO);
638
639	if (unlikely(fatal_error(request->fence.error)))
640	__i915_request_skip(rq: request);
641
642	/*
643	* Are we using semaphores when the gpu is already saturated?
644	*
645	* Using semaphores incurs a cost in having the GPU poll a
646	* memory location, busywaiting for it to change. The continual
647	* memory reads can have a noticeable impact on the rest of the
648	* system with the extra bus traffic, stalling the cpu as it too
649	* tries to access memory across the bus (perf stat -e bus-cycles).
650	*
651	* If we installed a semaphore on this request and we only submit
652	* the request after the signaler completed, that indicates the
653	* system is overloaded and using semaphores at this time only
654	* increases the amount of work we are doing. If so, we disable
655	* further use of semaphores until we are idle again, whence we
656	* optimistically try again.
657	*/
658	if (request->sched.semaphores &&
659	i915_sw_fence_signaled(fence: &request->semaphore))
660	engine->saturated \|= request->sched.semaphores;
661
662	engine->emit_fini_breadcrumb(request,
663	request->ring->vaddr + request->postfix);
664
665	trace_i915_request_execute(rq: request);
666	if (engine->bump_serial)
667	engine->bump_serial(engine);
668	else
669	engine->serial++;
670
671	result = true;
672
673	GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
674	engine->add_active_request(request);
675	active:
676	clear_bit(nr: I915_FENCE_FLAG_PQUEUE, addr: &request->fence.flags);
677	set_bit(nr: I915_FENCE_FLAG_ACTIVE, addr: &request->fence.flags);
678
679	/*
680	* XXX Rollback bonded-execution on __i915_request_unsubmit()?
681	*
682	* In the future, perhaps when we have an active time-slicing scheduler,
683	* it will be interesting to unsubmit parallel execution and remove
684	* busywaits from the GPU until their master is restarted. This is
685	* quite hairy, we have to carefully rollback the fence and do a
686	* preempt-to-idle cycle on the target engine, all the while the
687	* master execute_cb may refire.
688	*/
689	__notify_execute_cb_irq(rq: request);
690
691	/ We may be recursing from the signal callback of another i915 fence /
692	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
693	i915_request_enable_breadcrumb(request);
694
695	return result;
696	}
697
698	void i915_request_submit(struct i915_request *request)
699	{
700	struct intel_engine_cs *engine = request->engine;
701	unsigned long flags;
702
703	/ Will be called from irq-context when using foreign fences. /
704	spin_lock_irqsave(&engine->sched_engine->lock, flags);
705
706	__i915_request_submit(request);
707
708	spin_unlock_irqrestore(lock: &engine->sched_engine->lock, flags);
709	}
710
711	void __i915_request_unsubmit(struct i915_request *request)
712	{
713	struct intel_engine_cs *engine = request->engine;
714
715	/*
716	* Only unwind in reverse order, required so that the per-context list
717	* is kept in seqno/ring order.
718	*/
719	RQ_TRACE(request, "\n");
720
721	GEM_BUG_ON(!irqs_disabled());
722	lockdep_assert_held(&engine->sched_engine->lock);
723
724	/*
725	* Before we remove this breadcrumb from the signal list, we have
726	* to ensure that a concurrent dma_fence_enable_signaling() does not
727	* attach itself. We first mark the request as no longer active and
728	* make sure that is visible to other cores, and then remove the
729	* breadcrumb if attached.
730	*/
731	GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
732	clear_bit_unlock(nr: I915_FENCE_FLAG_ACTIVE, addr: &request->fence.flags);
733	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
734	i915_request_cancel_breadcrumb(request);
735
736	/ We've already spun, don't charge on resubmitting. /
737	if (request->sched.semaphores && __i915_request_has_started(rq: request))
738	request->sched.semaphores = `0`;
739
740	/*
741	* We don't need to wake_up any waiters on request->execute, they
742	* will get woken by any other event or us re-adding this request
743	* to the engine timeline (__i915_request_submit()). The waiters
744	* should be quite adapt at finding that the request now has a new
745	* global_seqno to the one they went to sleep on.
746	*/
747	}
748
749	void i915_request_unsubmit(struct i915_request *request)
750	{
751	struct intel_engine_cs *engine = request->engine;
752	unsigned long flags;
753
754	/ Will be called from irq-context when using foreign fences. /
755	spin_lock_irqsave(&engine->sched_engine->lock, flags);
756
757	__i915_request_unsubmit(request);
758
759	spin_unlock_irqrestore(lock: &engine->sched_engine->lock, flags);
760	}
761
762	void i915_request_cancel(struct i915_request rq, int* error)
763	{
764	if (!i915_request_set_error_once(rq, error))
765	return;
766
767	set_bit(nr: I915_FENCE_FLAG_SENTINEL, addr: &rq->fence.flags);
768
769	intel_context_cancel_request(ce: rq->context, rq);
770	}
771
772	static int
773	submit_notify(struct i915_sw_fence fence, enum* i915_sw_fence_notify state)
774	{
775	struct i915_request *request =
776	container_of(fence, typeof(*request), submit);
777
778	switch (state) {
779	case FENCE_COMPLETE:
780	trace_i915_request_submit(rq: request);
781
782	if (unlikely(fence->error))
783	i915_request_set_error_once(rq: request, error: fence->error);
784	else
785	__rq_arm_watchdog(rq: request);
786
787	/*
788	* We need to serialize use of the submit_request() callback
789	* with its hotplugging performed during an emergency
790	* i915_gem_set_wedged(). We use the RCU mechanism to mark the
791	* critical section in order to force i915_gem_set_wedged() to
792	* wait until the submit_request() is completed before
793	* proceeding.
794	*/
795	rcu_read_lock();
796	request->engine->submit_request(request);
797	rcu_read_unlock();
798	break;
799
800	case FENCE_FREE:
801	i915_request_put(rq: request);
802	break;
803	}
804
805	return NOTIFY_DONE;
806	}
807
808	static int
809	semaphore_notify(struct i915_sw_fence fence, enum* i915_sw_fence_notify state)
810	{
811	struct i915_request rq = container_of(fence, typeof(rq), semaphore);
812
813	switch (state) {
814	case FENCE_COMPLETE:
815	break;
816
817	case FENCE_FREE:
818	i915_request_put(rq);
819	break;
820	}
821
822	return NOTIFY_DONE;
823	}
824
825	static void retire_requests(struct intel_timeline *tl)
826	{
827	struct i915_request rq, rn;
828
829	list_for_each_entry_safe(rq, rn, &tl->requests, link)
830	if (!i915_request_retire(rq))
831	break;
832	}
833
834	static noinline struct i915_request *
835	request_alloc_slow(struct intel_timeline *tl,
836	struct i915_request **rsvd,
837	gfp_t gfp)
838	{
839	struct i915_request *rq;
840
841	/ If we cannot wait, dip into our reserves /
842	if (!gfpflags_allow_blocking(gfp_flags: gfp)) {
843	rq = xchg(rsvd, NULL);
844	if (!rq) / Use the normal failure path for one final WARN /
845	goto out;
846
847	return rq;
848	}
849
850	if (list_empty(head: &tl->requests))
851	goto out;
852
853	/ Move our oldest request to the slab-cache (if not in use!) /
854	rq = list_first_entry(&tl->requests, typeof(*rq), link);
855	i915_request_retire(rq);
856
857	rq = kmem_cache_alloc(cachep: slab_requests,
858	flags: gfp \| __GFP_RETRY_MAYFAIL \| __GFP_NOWARN);
859	if (rq)
860	return rq;
861
862	/ Ratelimit ourselves to prevent oom from malicious clients /
863	rq = list_last_entry(&tl->requests, typeof(*rq), link);
864	cond_synchronize_rcu(oldstate: rq->rcustate);
865
866	/ Retire our old requests in the hope that we free some /
867	retire_requests(tl);
868
869	out:
870	return kmem_cache_alloc(cachep: slab_requests, flags: gfp);
871	}
872
873	static void __i915_request_ctor(void *arg)
874	{
875	struct i915_request *rq = arg;
876
877	spin_lock_init(&rq->lock);
878	i915_sched_node_init(node: &rq->sched);
879	i915_sw_fence_init(&rq->submit, submit_notify);
880	i915_sw_fence_init(&rq->semaphore, semaphore_notify);
881
882	clear_capture_list(rq);
883	rq->batch_res = NULL;
884
885	init_llist_head(list: &rq->execute_cb);
886	}
887
888	#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
889	#define clear_batch_ptr(_rq) ((_rq)->batch = NULL)
890	#else
891	#define clear_batch_ptr(_a) do {} while (0)
892	#endif
893
894	struct i915_request *
895	__i915_request_create(struct intel_context *ce, gfp_t gfp)
896	{
897	struct intel_timeline *tl = ce->timeline;
898	struct i915_request *rq;
899	u32 seqno;
900	int ret;
901
902	might_alloc(gfp_mask: gfp);
903
904	/ Check that the caller provided an already pinned context /
905	__intel_context_pin(ce);
906
907	/*
908	* Beware: Dragons be flying overhead.
909	*
910	* We use RCU to look up requests in flight. The lookups may
911	* race with the request being allocated from the slab freelist.
912	* That is the request we are writing to here, may be in the process
913	* of being read by __i915_active_request_get_rcu(). As such,
914	* we have to be very careful when overwriting the contents. During
915	* the RCU lookup, we change chase the request->engine pointer,
916	* read the request->global_seqno and increment the reference count.
917	*
918	* The reference count is incremented atomically. If it is zero,
919	* the lookup knows the request is unallocated and complete. Otherwise,
920	* it is either still in use, or has been reallocated and reset
921	* with dma_fence_init(). This increment is safe for release as we
922	* check that the request we have a reference to and matches the active
923	* request.
924	*
925	* Before we increment the refcount, we chase the request->engine
926	* pointer. We must not call kmem_cache_zalloc() or else we set
927	* that pointer to NULL and cause a crash during the lookup. If
928	* we see the request is completed (based on the value of the
929	* old engine and seqno), the lookup is complete and reports NULL.
930	* If we decide the request is not completed (new engine or seqno),
931	* then we grab a reference and double check that it is still the
932	* active request - which it won't be and restart the lookup.
933	*
934	* Do not use kmem_cache_zalloc() here!
935	*/
936	rq = kmem_cache_alloc(cachep: slab_requests,
937	flags: gfp \| __GFP_RETRY_MAYFAIL \| __GFP_NOWARN);
938	if (unlikely(!rq)) {
939	rq = request_alloc_slow(tl, rsvd: &ce->engine->request_pool, gfp);
940	if (!rq) {
941	ret = -ENOMEM;
942	goto err_unreserve;
943	}
944	}
945
946	rq->context = ce;
947	rq->engine = ce->engine;
948	rq->ring = ce->ring;
949	rq->execution_mask = ce->engine->mask;
950	rq->i915 = ce->engine->i915;
951
952	ret = intel_timeline_get_seqno(tl, rq, seqno: &seqno);
953	if (ret)
954	goto err_free;
955
956	dma_fence_init(fence: &rq->fence, ops: &i915_fence_ops, lock: &rq->lock,
957	context: tl->fence_context, seqno);
958
959	RCU_INIT_POINTER(rq->timeline, tl);
960	rq->hwsp_seqno = tl->hwsp_seqno;
961	GEM_BUG_ON(__i915_request_is_complete(rq));
962
963	rq->rcustate = get_state_synchronize_rcu(); / acts as smp_mb() /
964
965	rq->guc_prio = GUC_PRIO_INIT;
966
967	/ We bump the ref for the fence chain /
968	i915_sw_fence_reinit(fence: &i915_request_get(rq)->submit);
969	i915_sw_fence_reinit(fence: &i915_request_get(rq)->semaphore);
970
971	i915_sched_node_reinit(node: &rq->sched);
972
973	/ No zalloc, everything must be cleared after use /
974	clear_batch_ptr(rq);
975	__rq_init_watchdog(rq);
976	assert_capture_list_is_null(rq);
977	GEM_BUG_ON(!llist_empty(&rq->execute_cb));
978	GEM_BUG_ON(rq->batch_res);
979
980	/*
981	* Reserve space in the ring buffer for all the commands required to
982	* eventually emit this request. This is to guarantee that the
983	* i915_request_add() call can't fail. Note that the reserve may need
984	* to be redone if the request is not actually submitted straight
985	* away, e.g. because a GPU scheduler has deferred it.
986	*
987	* Note that due to how we add reserved_space to intel_ring_begin()
988	* we need to double our request to ensure that if we need to wrap
989	* around inside i915_request_add() there is sufficient space at
990	* the beginning of the ring as well.
991	*/
992	rq->reserved_space =
993	`2` * rq->engine->emit_fini_breadcrumb_dw * sizeof(u32);
994
995	/*
996	* Record the position of the start of the request so that
997	* should we detect the updated seqno part-way through the
998	* GPU processing the request, we never over-estimate the
999	* position of the head.
1000	*/
1001	rq->head = rq->ring->emit;
1002
1003	ret = rq->engine->request_alloc(rq);
1004	if (ret)
1005	goto err_unwind;
1006
1007	rq->infix = rq->ring->emit; / end of header; start of user payload /
1008
1009	intel_context_mark_active(ce);
1010	list_add_tail_rcu(new: &rq->link, head: &tl->requests);
1011
1012	return rq;
1013
1014	err_unwind:
1015	ce->ring->emit = rq->head;
1016
1017	/ Make sure we didn't add ourselves to external state before freeing /
1018	GEM_BUG_ON(!list_empty(&rq->sched.signalers_list));
1019	GEM_BUG_ON(!list_empty(&rq->sched.waiters_list));
1020
1021	err_free:
1022	kmem_cache_free(s: slab_requests, objp: rq);
1023	err_unreserve:
1024	intel_context_unpin(ce);
1025	return ERR_PTR(error: ret);
1026	}
1027
1028	struct i915_request *
1029	i915_request_create(struct intel_context *ce)
1030	{
1031	struct i915_request *rq;
1032	struct intel_timeline *tl;
1033
1034	tl = intel_context_timeline_lock(ce);
1035	if (IS_ERR(ptr: tl))
1036	return ERR_CAST(ptr: tl);
1037
1038	/ Move our oldest request to the slab-cache (if not in use!) /
1039	rq = list_first_entry(&tl->requests, typeof(*rq), link);
1040	if (!list_is_last(list: &rq->link, head: &tl->requests))
1041	i915_request_retire(rq);
1042
1043	intel_context_enter(ce);
1044	rq = __i915_request_create(ce, GFP_KERNEL);
1045	intel_context_exit(ce); / active reference transferred to request /
1046	if (IS_ERR(ptr: rq))
1047	goto err_unlock;
1048
1049	/ Check that we do not interrupt ourselves with a new request /
1050	rq->cookie = lockdep_pin_lock(&tl->mutex);
1051
1052	return rq;
1053
1054	err_unlock:
1055	intel_context_timeline_unlock(tl);
1056	return rq;
1057	}
1058
1059	static int
1060	i915_request_await_start(struct i915_request rq, struct* i915_request *signal)
1061	{
1062	struct dma_fence *fence;
1063	int err;
1064
1065	if (i915_request_timeline(rq) == rcu_access_pointer(signal->timeline))
1066	return `0`;
1067
1068	if (i915_request_started(rq: signal))
1069	return `0`;
1070
1071	/*
1072	* The caller holds a reference on @signal, but we do not serialise
1073	* against it being retired and removed from the lists.
1074	*
1075	* We do not hold a reference to the request before @signal, and
1076	* so must be very careful to ensure that it is not _recycled_ as
1077	* we follow the link backwards.
1078	*/
1079	fence = NULL;
1080	rcu_read_lock();
1081	do {
1082	struct list_head *pos = READ_ONCE(signal->link.prev);
1083	struct i915_request *prev;
1084
1085	/ Confirm signal has not been retired, the link is valid /
1086	if (unlikely(__i915_request_has_started(signal)))
1087	break;
1088
1089	/ Is signal the earliest request on its timeline? /
1090	if (pos == &rcu_dereference(signal->timeline)->requests)
1091	break;
1092
1093	/*
1094	* Peek at the request before us in the timeline. That
1095	* request will only be valid before it is retired, so
1096	* after acquiring a reference to it, confirm that it is
1097	* still part of the signaler's timeline.
1098	*/
1099	prev = list_entry(pos, typeof(*prev), link);
1100	if (!i915_request_get_rcu(rq: prev))
1101	break;
1102
1103	/ After the strong barrier, confirm prev is still attached /
1104	if (unlikely(READ_ONCE(prev->link.next) != &signal->link)) {
1105	i915_request_put(rq: prev);
1106	break;
1107	}
1108
1109	fence = &prev->fence;
1110	} while (`0`);
1111	rcu_read_unlock();
1112	if (!fence)
1113	return `0`;
1114
1115	err = `0`;
1116	if (!intel_timeline_sync_is_later(tl: i915_request_timeline(rq), fence))
1117	err = i915_sw_fence_await_dma_fence(fence: &rq->submit,
1118	dma: fence, timeout: `0`,
1119	I915_FENCE_GFP);
1120	dma_fence_put(fence);
1121
1122	return err;
1123	}
1124
1125	static intel_engine_mask_t
1126	already_busywaiting(struct i915_request *rq)
1127	{
1128	/*
1129	* Polling a semaphore causes bus traffic, delaying other users of
1130	* both the GPU and CPU. We want to limit the impact on others,
1131	* while taking advantage of early submission to reduce GPU
1132	* latency. Therefore we restrict ourselves to not using more
1133	* than one semaphore from each source, and not using a semaphore
1134	* if we have detected the engine is saturated (i.e. would not be
1135	* submitted early and cause bus traffic reading an already passed
1136	* semaphore).
1137	*
1138	* See the are-we-too-late? check in __i915_request_submit().
1139	*/
1140	return rq->sched.semaphores \| READ_ONCE(rq->engine->saturated);
1141	}
1142
1143	static int
1144	__emit_semaphore_wait(struct i915_request *to,
1145	struct i915_request *from,
1146	u32 seqno)
1147	{
1148	const int has_token = GRAPHICS_VER(to->engine->i915) >= `12`;
1149	u32 hwsp_offset;
1150	int len, err;
1151	u32 *cs;
1152
1153	GEM_BUG_ON(GRAPHICS_VER(to->engine->i915) < `8`);
1154	GEM_BUG_ON(i915_request_has_initial_breadcrumb(to));
1155
1156	/ We need to pin the signaler's HWSP until we are finished reading. /
1157	err = intel_timeline_read_hwsp(from, until: to, hwsp_offset: &hwsp_offset);
1158	if (err)
1159	return err;
1160
1161	len = `4`;
1162	if (has_token)
1163	len += `2`;
1164
1165	cs = intel_ring_begin(rq: to, num_dwords: len);
1166	if (IS_ERR(ptr: cs))
1167	return PTR_ERR(ptr: cs);
1168
1169	/*
1170	* Using greater-than-or-equal here means we have to worry
1171	* about seqno wraparound. To side step that issue, we swap
1172	* the timeline HWSP upon wrapping, so that everyone listening
1173	* for the old (pre-wrap) values do not see the much smaller
1174	* (post-wrap) values than they were expecting (and so wait
1175	* forever).
1176	*/
1177	*cs++ = (MI_SEMAPHORE_WAIT \|
1178	MI_SEMAPHORE_GLOBAL_GTT \|
1179	MI_SEMAPHORE_POLL \|
1180	MI_SEMAPHORE_SAD_GTE_SDD) +
1181	has_token;
1182	*cs++ = seqno;
1183	*cs++ = hwsp_offset;
1184	*cs++ = `0`;
1185	if (has_token) {
1186	*cs++ = `0`;
1187	*cs++ = MI_NOOP;
1188	}
1189
1190	intel_ring_advance(rq: to, cs);
1191	return `0`;
1192	}
1193
1194	static bool
1195	can_use_semaphore_wait(struct i915_request to, struct* i915_request *from)
1196	{
1197	return to->engine->gt->ggtt == from->engine->gt->ggtt;
1198	}
1199
1200	static int
1201	emit_semaphore_wait(struct i915_request *to,
1202	struct i915_request *from,
1203	gfp_t gfp)
1204	{
1205	const intel_engine_mask_t mask = READ_ONCE(from->engine)->mask;
1206	struct i915_sw_fence *wait = &to->submit;
1207
1208	if (!can_use_semaphore_wait(to, from))
1209	goto await_fence;
1210
1211	if (!intel_context_use_semaphores(ce: to->context))
1212	goto await_fence;
1213
1214	if (i915_request_has_initial_breadcrumb(rq: to))
1215	goto await_fence;
1216
1217	/*
1218	* If this or its dependents are waiting on an external fence
1219	* that may fail catastrophically, then we want to avoid using
1220	* semaphores as they bypass the fence signaling metadata, and we
1221	* lose the fence->error propagation.
1222	*/
1223	if (from->sched.flags & I915_SCHED_HAS_EXTERNAL_CHAIN)
1224	goto await_fence;
1225
1226	/ Just emit the first semaphore we see as request space is limited. /
1227	if (already_busywaiting(rq: to) & mask)
1228	goto await_fence;
1229
1230	if (i915_request_await_start(rq: to, signal: from) < `0`)
1231	goto await_fence;
1232
1233	/ Only submit our spinner after the signaler is running! /
1234	if (__await_execution(rq: to, signal: from, gfp))
1235	goto await_fence;
1236
1237	if (__emit_semaphore_wait(to, from, seqno: from->fence.seqno))
1238	goto await_fence;
1239
1240	to->sched.semaphores \|= mask;
1241	wait = &to->semaphore;
1242
1243	await_fence:
1244	return i915_sw_fence_await_dma_fence(fence: wait,
1245	dma: &from->fence, timeout: `0`,
1246	I915_FENCE_GFP);
1247	}
1248
1249	static bool intel_timeline_sync_has_start(struct intel_timeline *tl,
1250	struct dma_fence *fence)
1251	{
1252	return __intel_timeline_sync_is_later(tl,
1253	context: fence->context,
1254	seqno: fence->seqno - `1`);
1255	}
1256
1257	static int intel_timeline_sync_set_start(struct intel_timeline *tl,
1258	const struct dma_fence *fence)
1259	{
1260	return __intel_timeline_sync_set(tl, context: fence->context, seqno: fence->seqno - `1`);
1261	}
1262
1263	static int
1264	__i915_request_await_execution(struct i915_request *to,
1265	struct i915_request *from)
1266	{
1267	int err;
1268
1269	GEM_BUG_ON(intel_context_is_barrier(from->context));
1270
1271	/ Submit both requests at the same time /
1272	err = __await_execution(rq: to, signal: from, I915_FENCE_GFP);
1273	if (err)
1274	return err;
1275
1276	/ Squash repeated depenendices to the same timelines /
1277	if (intel_timeline_sync_has_start(tl: i915_request_timeline(rq: to),
1278	fence: &from->fence))
1279	return `0`;
1280
1281	/*
1282	* Wait until the start of this request.
1283	*
1284	* The execution cb fires when we submit the request to HW. But in
1285	* many cases this may be long before the request itself is ready to
1286	* run (consider that we submit 2 requests for the same context, where
1287	* the request of interest is behind an indefinite spinner). So we hook
1288	* up to both to reduce our queues and keep the execution lag minimised
1289	* in the worst case, though we hope that the await_start is elided.
1290	*/
1291	err = i915_request_await_start(rq: to, signal: from);
1292	if (err < `0`)
1293	return err;
1294
1295	/*
1296	* Ensure both start together [after all semaphores in signal]
1297	*
1298	* Now that we are queued to the HW at roughly the same time (thanks
1299	* to the execute cb) and are ready to run at roughly the same time
1300	* (thanks to the await start), our signaler may still be indefinitely
1301	* delayed by waiting on a semaphore from a remote engine. If our
1302	* signaler depends on a semaphore, so indirectly do we, and we do not
1303	* want to start our payload until our signaler also starts theirs.
1304	* So we wait.
1305	*
1306	* However, there is also a second condition for which we need to wait
1307	* for the precise start of the signaler. Consider that the signaler
1308	* was submitted in a chain of requests following another context
1309	* (with just an ordinary intra-engine fence dependency between the
1310	* two). In this case the signaler is queued to HW, but not for
1311	* immediate execution, and so we must wait until it reaches the
1312	* active slot.
1313	*/
1314	if (can_use_semaphore_wait(to, from) &&
1315	intel_engine_has_semaphores(engine: to->engine) &&
1316	!i915_request_has_initial_breadcrumb(rq: to)) {
1317	err = __emit_semaphore_wait(to, from, seqno: from->fence.seqno - `1`);
1318	if (err < `0`)
1319	return err;
1320	}
1321
1322	/ Couple the dependency tree for PI on this exposed to->fence /
1323	if (to->engine->sched_engine->schedule) {
1324	err = i915_sched_node_add_dependency(node: &to->sched,
1325	signal: &from->sched,
1326	I915_DEPENDENCY_WEAK);
1327	if (err < `0`)
1328	return err;
1329	}
1330
1331	return intel_timeline_sync_set_start(tl: i915_request_timeline(rq: to),
1332	fence: &from->fence);
1333	}
1334
1335	static void mark_external(struct i915_request *rq)
1336	{
1337	/*
1338	* The downside of using semaphores is that we lose metadata passing
1339	* along the signaling chain. This is particularly nasty when we
1340	* need to pass along a fatal error such as EFAULT or EDEADLK. For
1341	* fatal errors we want to scrub the request before it is executed,
1342	* which means that we cannot preload the request onto HW and have
1343	* it wait upon a semaphore.
1344	*/
1345	rq->sched.flags \|= I915_SCHED_HAS_EXTERNAL_CHAIN;
1346	}
1347
1348	static int
1349	__i915_request_await_external(struct i915_request rq, struct* dma_fence *fence)
1350	{
1351	mark_external(rq);
1352	return i915_sw_fence_await_dma_fence(fence: &rq->submit, dma: fence,
1353	timeout: i915_fence_context_timeout(i915: rq->i915,
1354	context: fence->context),
1355	I915_FENCE_GFP);
1356	}
1357
1358	static int
1359	i915_request_await_external(struct i915_request rq, struct* dma_fence *fence)
1360	{
1361	struct dma_fence *iter;
1362	int err = `0`;
1363
1364	if (!to_dma_fence_chain(fence))
1365	return __i915_request_await_external(rq, fence);
1366
1367	dma_fence_chain_for_each(iter, fence) {
1368	struct dma_fence_chain *chain = to_dma_fence_chain(fence: iter);
1369
1370	if (!dma_fence_is_i915(fence: chain->fence)) {
1371	err = __i915_request_await_external(rq, fence: iter);
1372	break;
1373	}
1374
1375	err = i915_request_await_dma_fence(rq, fence: chain->fence);
1376	if (err < `0`)
1377	break;
1378	}
1379
1380	dma_fence_put(fence: iter);
1381	return err;
1382	}
1383
1384	static inline bool is_parallel_rq(struct i915_request *rq)
1385	{
1386	return intel_context_is_parallel(ce: rq->context);
1387	}
1388
1389	static inline struct intel_context request_to_parent(struct* i915_request *rq)
1390	{
1391	return intel_context_to_parent(ce: rq->context);
1392	}
1393
1394	static bool is_same_parallel_context(struct i915_request *to,
1395	struct i915_request *from)
1396	{
1397	if (is_parallel_rq(rq: to))
1398	return request_to_parent(rq: to) == request_to_parent(rq: from);
1399
1400	return false;
1401	}
1402
1403	int
1404	i915_request_await_execution(struct i915_request *rq,
1405	struct dma_fence *fence)
1406	{
1407	struct dma_fence **child = &fence;
1408	unsigned int nchild = `1`;
1409	int ret;
1410
1411	if (dma_fence_is_array(fence)) {
1412	struct dma_fence_array *array = to_dma_fence_array(fence);
1413
1414	/ XXX Error for signal-on-any fence arrays /
1415
1416	child = array->fences;
1417	nchild = array->num_fences;
1418	GEM_BUG_ON(!nchild);
1419	}
1420
1421	do {
1422	fence = *child++;
1423	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
1424	continue;
1425
1426	if (fence->context == rq->fence.context)
1427	continue;
1428
1429	/*
1430	* We don't squash repeated fence dependencies here as we
1431	* want to run our callback in all cases.
1432	*/
1433
1434	if (dma_fence_is_i915(fence)) {
1435	if (is_same_parallel_context(to: rq, from: to_request(fence)))
1436	continue;
1437	ret = __i915_request_await_execution(to: rq,
1438	from: to_request(fence));
1439	} else {
1440	ret = i915_request_await_external(rq, fence);
1441	}
1442	if (ret < `0`)
1443	return ret;
1444	} while (--nchild);
1445
1446	return `0`;
1447	}
1448
1449	static int
1450	await_request_submit(struct i915_request to, struct* i915_request *from)
1451	{
1452	/*
1453	* If we are waiting on a virtual engine, then it may be
1454	* constrained to execute on a single engine prior to submission.
1455	* When it is submitted, it will be first submitted to the virtual
1456	* engine and then passed to the physical engine. We cannot allow
1457	* the waiter to be submitted immediately to the physical engine
1458	* as it may then bypass the virtual request.
1459	*/
1460	if (to->engine == READ_ONCE(from->engine))
1461	return i915_sw_fence_await_sw_fence_gfp(fence: &to->submit,
1462	after: &from->submit,
1463	I915_FENCE_GFP);
1464	else
1465	return __i915_request_await_execution(to, from);
1466	}
1467
1468	static int
1469	i915_request_await_request(struct i915_request to, struct* i915_request *from)
1470	{
1471	int ret;
1472
1473	GEM_BUG_ON(to == from);
1474	GEM_BUG_ON(to->timeline == from->timeline);
1475
1476	if (i915_request_completed(rq: from)) {
1477	i915_sw_fence_set_error_once(fence: &to->submit, error: from->fence.error);
1478	return `0`;
1479	}
1480
1481	if (to->engine->sched_engine->schedule) {
1482	ret = i915_sched_node_add_dependency(node: &to->sched,
1483	signal: &from->sched,
1484	I915_DEPENDENCY_EXTERNAL);
1485	if (ret < `0`)
1486	return ret;
1487	}
1488
1489	if (!intel_engine_uses_guc(engine: to->engine) &&
1490	is_power_of_2(n: to->execution_mask \| READ_ONCE(from->execution_mask)))
1491	ret = await_request_submit(to, from);
1492	else
1493	ret = emit_semaphore_wait(to, from, I915_FENCE_GFP);
1494	if (ret < `0`)
1495	return ret;
1496
1497	return `0`;
1498	}
1499
1500	int
1501	i915_request_await_dma_fence(struct i915_request rq, struct* dma_fence *fence)
1502	{
1503	struct dma_fence **child = &fence;
1504	unsigned int nchild = `1`;
1505	int ret;
1506
1507	/*
1508	* Note that if the fence-array was created in signal-on-any mode,
1509	* we should not decompose it into its individual fences. However,
1510	* we don't currently store which mode the fence-array is operating
1511	* in. Fortunately, the only user of signal-on-any is private to
1512	* amdgpu and we should not see any incoming fence-array from
1513	* sync-file being in signal-on-any mode.
1514	*/
1515	if (dma_fence_is_array(fence)) {
1516	struct dma_fence_array *array = to_dma_fence_array(fence);
1517
1518	child = array->fences;
1519	nchild = array->num_fences;
1520	GEM_BUG_ON(!nchild);
1521	}
1522
1523	do {
1524	fence = *child++;
1525	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
1526	continue;
1527
1528	/*
1529	* Requests on the same timeline are explicitly ordered, along
1530	* with their dependencies, by i915_request_add() which ensures
1531	* that requests are submitted in-order through each ring.
1532	*/
1533	if (fence->context == rq->fence.context)
1534	continue;
1535
1536	/ Squash repeated waits to the same timelines /
1537	if (fence->context &&
1538	intel_timeline_sync_is_later(tl: i915_request_timeline(rq),
1539	fence))
1540	continue;
1541
1542	if (dma_fence_is_i915(fence)) {
1543	if (is_same_parallel_context(to: rq, from: to_request(fence)))
1544	continue;
1545	ret = i915_request_await_request(to: rq, from: to_request(fence));
1546	} else {
1547	ret = i915_request_await_external(rq, fence);
1548	}
1549	if (ret < `0`)
1550	return ret;
1551
1552	/ Record the latest fence used against each timeline /
1553	if (fence->context)
1554	intel_timeline_sync_set(tl: i915_request_timeline(rq),
1555	fence);
1556	} while (--nchild);
1557
1558	return `0`;
1559	}
1560
1561	/**
1562	* i915_request_await_deps - set this request to (async) wait upon a struct
1563	* i915_deps dma_fence collection
1564	* @rq: request we are wishing to use
1565	* @deps: The struct i915_deps containing the dependencies.
1566	*
1567	* Returns 0 if successful, negative error code on error.
1568	*/
1569	int i915_request_await_deps(struct i915_request rq, const* struct i915_deps *deps)
1570	{
1571	int i, err;
1572
1573	for (i = `0`; i < deps->num_deps; ++i) {
1574	err = i915_request_await_dma_fence(rq, fence: deps->fences[i]);
1575	if (err)
1576	return err;
1577	}
1578
1579	return `0`;
1580	}
1581
1582	/**
1583	* i915_request_await_object - set this request to (async) wait upon a bo
1584	* @to: request we are wishing to use
1585	* @obj: object which may be in use on another ring.
1586	* @write: whether the wait is on behalf of a writer
1587	*
1588	* This code is meant to abstract object synchronization with the GPU.
1589	* Conceptually we serialise writes between engines inside the GPU.
1590	* We only allow one engine to write into a buffer at any time, but
1591	* multiple readers. To ensure each has a coherent view of memory, we must:
1592	*
1593	* - If there is an outstanding write request to the object, the new
1594	* request must wait for it to complete (either CPU or in hw, requests
1595	* on the same ring will be naturally ordered).
1596	*
1597	* - If we are a write request (pending_write_domain is set), the new
1598	* request must wait for outstanding read requests to complete.
1599	*
1600	* Returns 0 if successful, else propagates up the lower layer error.
1601	*/
1602	int
1603	i915_request_await_object(struct i915_request *to,
1604	struct drm_i915_gem_object *obj,
1605	bool write)
1606	{
1607	struct dma_resv_iter cursor;
1608	struct dma_fence *fence;
1609	int ret = `0`;
1610
1611	dma_resv_for_each_fence(&cursor, obj->base.resv,
1612	dma_resv_usage_rw(write), fence) {
1613	ret = i915_request_await_dma_fence(rq: to, fence);
1614	if (ret)
1615	break;
1616	}
1617
1618	return ret;
1619	}
1620
1621	static void i915_request_await_huc(struct i915_request *rq)
1622	{
1623	struct intel_huc *huc = &rq->context->engine->gt->uc.huc;
1624
1625	/ don't stall kernel submissions! /
1626	if (!rcu_access_pointer(rq->context->gem_context))
1627	return;
1628
1629	if (intel_huc_wait_required(huc))
1630	i915_sw_fence_await_sw_fence(fence: &rq->submit,
1631	after: &huc->delayed_load.fence,
1632	wq: &rq->hucq);
1633	}
1634
1635	static struct i915_request *
1636	__i915_request_ensure_parallel_ordering(struct i915_request *rq,
1637	struct intel_timeline *timeline)
1638	{
1639	struct i915_request *prev;
1640
1641	GEM_BUG_ON(!is_parallel_rq(rq));
1642
1643	prev = request_to_parent(rq)->parallel.last_rq;
1644	if (prev) {
1645	if (!__i915_request_is_complete(rq: prev)) {
1646	i915_sw_fence_await_sw_fence(fence: &rq->submit,
1647	after: &prev->submit,
1648	wq: &rq->submitq);
1649
1650	if (rq->engine->sched_engine->schedule)
1651	__i915_sched_node_add_dependency(node: &rq->sched,
1652	signal: &prev->sched,
1653	dep: &rq->dep,
1654	flags: `0`);
1655	}
1656	i915_request_put(rq: prev);
1657	}
1658
1659	request_to_parent(rq)->parallel.last_rq = i915_request_get(rq);
1660
1661	/*
1662	* Users have to put a reference potentially got by
1663	* __i915_active_fence_set() to the returned request
1664	* when no longer needed
1665	*/
1666	return to_request(fence: __i915_active_fence_set(active: &timeline->last_request,
1667	fence: &rq->fence));
1668	}
1669
1670	static struct i915_request *
1671	__i915_request_ensure_ordering(struct i915_request *rq,
1672	struct intel_timeline *timeline)
1673	{
1674	struct i915_request *prev;
1675
1676	GEM_BUG_ON(is_parallel_rq(rq));
1677
1678	prev = to_request(fence: __i915_active_fence_set(active: &timeline->last_request,
1679	fence: &rq->fence));
1680
1681	if (prev && !__i915_request_is_complete(rq: prev)) {
1682	bool uses_guc = intel_engine_uses_guc(engine: rq->engine);
1683	bool pow2 = is_power_of_2(READ_ONCE(prev->engine)->mask \|
1684	rq->engine->mask);
1685	bool same_context = prev->context == rq->context;
1686
1687	/*
1688	* The requests are supposed to be kept in order. However,
1689	* we need to be wary in case the timeline->last_request
1690	* is used as a barrier for external modification to this
1691	* context.
1692	*/
1693	GEM_BUG_ON(same_context &&
1694	i915_seqno_passed(prev->fence.seqno,
1695	rq->fence.seqno));
1696
1697	if ((same_context && uses_guc) \|\| (!uses_guc && pow2))
1698	i915_sw_fence_await_sw_fence(fence: &rq->submit,
1699	after: &prev->submit,
1700	wq: &rq->submitq);
1701	else
1702	__i915_sw_fence_await_dma_fence(fence: &rq->submit,
1703	dma: &prev->fence,
1704	cb: &rq->dmaq);
1705	if (rq->engine->sched_engine->schedule)
1706	__i915_sched_node_add_dependency(node: &rq->sched,
1707	signal: &prev->sched,
1708	dep: &rq->dep,
1709	flags: `0`);
1710	}
1711
1712	/*
1713	* Users have to put the reference to prev potentially got
1714	* by __i915_active_fence_set() when no longer needed
1715	*/
1716	return prev;
1717	}
1718
1719	static struct i915_request *
1720	__i915_request_add_to_timeline(struct i915_request *rq)
1721	{
1722	struct intel_timeline *timeline = i915_request_timeline(rq);
1723	struct i915_request *prev;
1724
1725	/*
1726	* Media workloads may require HuC, so stall them until HuC loading is
1727	* complete. Note that HuC not being loaded when a user submission
1728	* arrives can only happen when HuC is loaded via GSC and in that case
1729	* we still expect the window between us starting to accept submissions
1730	* and HuC loading completion to be small (a few hundred ms).
1731	*/
1732	if (rq->engine->class == VIDEO_DECODE_CLASS)
1733	i915_request_await_huc(rq);
1734
1735	/*
1736	* Dependency tracking and request ordering along the timeline
1737	* is special cased so that we can eliminate redundant ordering
1738	* operations while building the request (we know that the timeline
1739	* itself is ordered, and here we guarantee it).
1740	*
1741	* As we know we will need to emit tracking along the timeline,
1742	* we embed the hooks into our request struct -- at the cost of
1743	* having to have specialised no-allocation interfaces (which will
1744	* be beneficial elsewhere).
1745	*
1746	* A second benefit to open-coding i915_request_await_request is
1747	* that we can apply a slight variant of the rules specialised
1748	* for timelines that jump between engines (such as virtual engines).
1749	* If we consider the case of virtual engine, we must emit a dma-fence
1750	* to prevent scheduling of the second request until the first is
1751	* complete (to maximise our greedy late load balancing) and this
1752	* precludes optimising to use semaphores serialisation of a single
1753	* timeline across engines.
1754	*
1755	* We do not order parallel submission requests on the timeline as each
1756	* parallel submission context has its own timeline and the ordering
1757	* rules for parallel requests are that they must be submitted in the
1758	* order received from the execbuf IOCTL. So rather than using the
1759	* timeline we store a pointer to last request submitted in the
1760	* relationship in the gem context and insert a submission fence
1761	* between that request and request passed into this function or
1762	* alternatively we use completion fence if gem context has a single
1763	* timeline and this is the first submission of an execbuf IOCTL.
1764	*/
1765	if (likely(!is_parallel_rq(rq)))
1766	prev = __i915_request_ensure_ordering(rq, timeline);
1767	else
1768	prev = __i915_request_ensure_parallel_ordering(rq, timeline);
1769	if (prev)
1770	i915_request_put(rq: prev);
1771
1772	/*
1773	* Make sure that no request gazumped us - if it was allocated after
1774	* our i915_request_alloc() and called __i915_request_add() before
1775	* us, the timeline will hold its seqno which is later than ours.
1776	*/
1777	GEM_BUG_ON(timeline->seqno != rq->fence.seqno);
1778
1779	return prev;
1780	}
1781
1782	/*
1783	* NB: This function is not allowed to fail. Doing so would mean the the
1784	* request is not being tracked for completion but the work itself is
1785	* going to happen on the hardware. This would be a Bad Thing(tm).
1786	*/
1787	struct i915_request __i915_request_commit(struct* i915_request *rq)
1788	{
1789	struct intel_engine_cs *engine = rq->engine;
1790	struct intel_ring *ring = rq->ring;
1791	u32 *cs;
1792
1793	RQ_TRACE(rq, "\n");
1794
1795	/*
1796	* To ensure that this call will not fail, space for its emissions
1797	* should already have been reserved in the ring buffer. Let the ring
1798	* know that it is time to use that space up.
1799	*/
1800	GEM_BUG_ON(rq->reserved_space > ring->space);
1801	rq->reserved_space = `0`;
1802	rq->emitted_jiffies = jiffies;
1803
1804	/*
1805	* Record the position of the start of the breadcrumb so that
1806	* should we detect the updated seqno part-way through the
1807	* GPU processing the request, we never over-estimate the
1808	* position of the ring's HEAD.
1809	*/
1810	cs = intel_ring_begin(rq, num_dwords: engine->emit_fini_breadcrumb_dw);
1811	GEM_BUG_ON(IS_ERR(cs));
1812	rq->postfix = intel_ring_offset(rq, addr: cs);
1813
1814	return __i915_request_add_to_timeline(rq);
1815	}
1816
1817	void __i915_request_queue_bh(struct i915_request *rq)
1818	{
1819	i915_sw_fence_commit(fence: &rq->semaphore);
1820	i915_sw_fence_commit(fence: &rq->submit);
1821	}
1822
1823	void __i915_request_queue(struct i915_request *rq,
1824	const struct i915_sched_attr *attr)
1825	{
1826	/*
1827	* Let the backend know a new request has arrived that may need
1828	* to adjust the existing execution schedule due to a high priority
1829	* request - i.e. we may want to preempt the current request in order
1830	* to run a high priority dependency chain before we can execute this
1831	* request.
1832	*
1833	* This is called before the request is ready to run so that we can
1834	* decide whether to preempt the entire chain so that it is ready to
1835	* run at the earliest possible convenience.
1836	*/
1837	if (attr && rq->engine->sched_engine->schedule)
1838	rq->engine->sched_engine->schedule(rq, attr);
1839
1840	local_bh_disable();
1841	__i915_request_queue_bh(rq);
1842	local_bh_enable(); / kick tasklets /
1843	}
1844
1845	void i915_request_add(struct i915_request *rq)
1846	{
1847	struct intel_timeline * const tl = i915_request_timeline(rq);
1848	struct i915_sched_attr attr = {};
1849	struct i915_gem_context *ctx;
1850
1851	lockdep_assert_held(&tl->mutex);
1852	lockdep_unpin_lock(&tl->mutex, rq->cookie);
1853
1854	trace_i915_request_add(rq);
1855	__i915_request_commit(rq);
1856
1857	/ XXX placeholder for selftests /
1858	rcu_read_lock();
1859	ctx = rcu_dereference(rq->context->gem_context);
1860	if (ctx)
1861	attr = ctx->sched;
1862	rcu_read_unlock();
1863
1864	__i915_request_queue(rq, attr: &attr);
1865
1866	mutex_unlock(lock: &tl->mutex);
1867	}
1868
1869	static unsigned long local_clock_ns(unsigned int *cpu)
1870	{
1871	unsigned long t;
1872
1873	/*
1874	* Cheaply and approximately convert from nanoseconds to microseconds.
1875	* The result and subsequent calculations are also defined in the same
1876	* approximate microseconds units. The principal source of timing
1877	* error here is from the simple truncation.
1878	*
1879	* Note that local_clock() is only defined wrt to the current CPU;
1880	* the comparisons are no longer valid if we switch CPUs. Instead of
1881	* blocking preemption for the entire busywait, we can detect the CPU
1882	* switch and use that as indicator of system load and a reason to
1883	* stop busywaiting, see busywait_stop().
1884	*/
1885	*cpu = get_cpu();
1886	t = local_clock();
1887	put_cpu();
1888
1889	return t;
1890	}
1891
1892	static bool busywait_stop(unsigned long timeout, unsigned int cpu)
1893	{
1894	unsigned int this_cpu;
1895
1896	if (time_after(local_clock_ns(&this_cpu), timeout))
1897	return true;
1898
1899	return this_cpu != cpu;
1900	}
1901
1902	static bool __i915_spin_request(struct i915_request * const rq, int state)
1903	{
1904	unsigned long timeout_ns;
1905	unsigned int cpu;
1906
1907	/*
1908	* Only wait for the request if we know it is likely to complete.
1909	*
1910	* We don't track the timestamps around requests, nor the average
1911	* request length, so we do not have a good indicator that this
1912	* request will complete within the timeout. What we do know is the
1913	* order in which requests are executed by the context and so we can
1914	* tell if the request has been started. If the request is not even
1915	* running yet, it is a fair assumption that it will not complete
1916	* within our relatively short timeout.
1917	*/
1918	if (!i915_request_is_running(rq))
1919	return false;
1920
1921	/*
1922	* When waiting for high frequency requests, e.g. during synchronous
1923	* rendering split between the CPU and GPU, the finite amount of time
1924	* required to set up the irq and wait upon it limits the response
1925	* rate. By busywaiting on the request completion for a short while we
1926	* can service the high frequency waits as quick as possible. However,
1927	* if it is a slow request, we want to sleep as quickly as possible.
1928	* The tradeoff between waiting and sleeping is roughly the time it
1929	* takes to sleep on a request, on the order of a microsecond.
1930	*/
1931
1932	timeout_ns = READ_ONCE(rq->engine->props.max_busywait_duration_ns);
1933	timeout_ns += local_clock_ns(cpu: &cpu);
1934	do {
1935	if (dma_fence_is_signaled(fence: &rq->fence))
1936	return true;
1937
1938	if (signal_pending_state(state, current))
1939	break;
1940
1941	if (busywait_stop(timeout: timeout_ns, cpu))
1942	break;
1943
1944	cpu_relax();
1945	} while (!need_resched());
1946
1947	return false;
1948	}
1949
1950	struct request_wait {
1951	struct dma_fence_cb cb;
1952	struct task_struct *tsk;
1953	};
1954
1955	static void request_wait_wake(struct dma_fence fence, struct* dma_fence_cb *cb)
1956	{
1957	struct request_wait wait = container_of(cb, typeof(wait), cb);
1958
1959	wake_up_process(fetch_and_zero(&wait->tsk));
1960	}
1961
1962	/**
1963	* i915_request_wait_timeout - wait until execution of request has finished
1964	* @rq: the request to wait upon
1965	* @flags: how to wait
1966	* @timeout: how long to wait in jiffies
1967	*
1968	* i915_request_wait_timeout() waits for the request to be completed, for a
1969	* maximum of @timeout jiffies (with MAX_SCHEDULE_TIMEOUT implying an
1970	* unbounded wait).
1971	*
1972	* Returns the remaining time (in jiffies) if the request completed, which may
1973	* be zero if the request is unfinished after the timeout expires.
1974	* If the timeout is 0, it will return 1 if the fence is signaled.
1975	*
1976	* May return -EINTR is called with I915_WAIT_INTERRUPTIBLE and a signal is
1977	* pending before the request completes.
1978	*
1979	* NOTE: This function has the same wait semantics as dma-fence.
1980	*/
1981	long i915_request_wait_timeout(struct i915_request *rq,
1982	unsigned int flags,
1983	long timeout)
1984	{
1985	const int state = flags & I915_WAIT_INTERRUPTIBLE ?
1986	TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
1987	struct request_wait wait;
1988
1989	might_sleep();
1990	GEM_BUG_ON(timeout < `0`);
1991
1992	if (dma_fence_is_signaled(fence: &rq->fence))
1993	return timeout ?: `1`;
1994
1995	if (!timeout)
1996	return -ETIME;
1997
1998	trace_i915_request_wait_begin(rq, flags);
1999
2000	/*
2001	* We must never wait on the GPU while holding a lock as we
2002	* may need to perform a GPU reset. So while we don't need to
2003	* serialise wait/reset with an explicit lock, we do want
2004	* lockdep to detect potential dependency cycles.
2005	*/
2006	mutex_acquire(&rq->engine->gt->reset.mutex.dep_map, `0`, `0`, _THIS_IP_);
2007
2008	/*
2009	* Optimistic spin before touching IRQs.
2010	*
2011	* We may use a rather large value here to offset the penalty of
2012	* switching away from the active task. Frequently, the client will
2013	* wait upon an old swapbuffer to throttle itself to remain within a
2014	* frame of the gpu. If the client is running in lockstep with the gpu,
2015	* then it should not be waiting long at all, and a sleep now will incur
2016	* extra scheduler latency in producing the next frame. To try to
2017	* avoid adding the cost of enabling/disabling the interrupt to the
2018	* short wait, we first spin to see if the request would have completed
2019	* in the time taken to setup the interrupt.
2020	*
2021	* We need upto 5us to enable the irq, and upto 20us to hide the
2022	* scheduler latency of a context switch, ignoring the secondary
2023	* impacts from a context switch such as cache eviction.
2024	*
2025	* The scheme used for low-latency IO is called "hybrid interrupt
2026	* polling". The suggestion there is to sleep until just before you
2027	* expect to be woken by the device interrupt and then poll for its
2028	* completion. That requires having a good predictor for the request
2029	* duration, which we currently lack.
2030	*/
2031	if (CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT &&
2032	__i915_spin_request(rq, state))
2033	goto out;
2034
2035	/*
2036	* This client is about to stall waiting for the GPU. In many cases
2037	* this is undesirable and limits the throughput of the system, as
2038	* many clients cannot continue processing user input/output whilst
2039	* blocked. RPS autotuning may take tens of milliseconds to respond
2040	* to the GPU load and thus incurs additional latency for the client.
2041	* We can circumvent that by promoting the GPU frequency to maximum
2042	* before we sleep. This makes the GPU throttle up much more quickly
2043	* (good for benchmarks and user experience, e.g. window animations),
2044	* but at a cost of spending more power processing the workload
2045	* (bad for battery).
2046	*/
2047	if (flags & I915_WAIT_PRIORITY && !i915_request_started(rq))
2048	intel_rps_boost(rq);
2049
2050	wait.tsk = current;
2051	if (dma_fence_add_callback(fence: &rq->fence, cb: &wait.cb, func: request_wait_wake))
2052	goto out;
2053
2054	/*
2055	* Flush the submission tasklet, but only if it may help this request.
2056	*
2057	* We sometimes experience some latency between the HW interrupts and
2058	* tasklet execution (mostly due to ksoftirqd latency, but it can also
2059	* be due to lazy CS events), so lets run the tasklet manually if there
2060	* is a chance it may submit this request. If the request is not ready
2061	* to run, as it is waiting for other fences to be signaled, flushing
2062	* the tasklet is busy work without any advantage for this client.
2063	*
2064	* If the HW is being lazy, this is the last chance before we go to
2065	* sleep to catch any pending events. We will check periodically in
2066	* the heartbeat to flush the submission tasklets as a last resort
2067	* for unhappy HW.
2068	*/
2069	if (i915_request_is_ready(rq))
2070	__intel_engine_flush_submission(engine: rq->engine, sync: false);
2071
2072	for (;;) {
2073	set_current_state(state);
2074
2075	if (dma_fence_is_signaled(fence: &rq->fence))
2076	break;
2077
2078	if (signal_pending_state(state, current)) {
2079	timeout = -ERESTARTSYS;
2080	break;
2081	}
2082
2083	if (!timeout) {
2084	timeout = -ETIME;
2085	break;
2086	}
2087
2088	timeout = io_schedule_timeout(timeout);
2089	}
2090	__set_current_state(TASK_RUNNING);
2091
2092	if (READ_ONCE(wait.tsk))
2093	dma_fence_remove_callback(fence: &rq->fence, cb: &wait.cb);
2094	GEM_BUG_ON(!list_empty(&wait.cb.node));
2095
2096	out:
2097	mutex_release(&rq->engine->gt->reset.mutex.dep_map, _THIS_IP_);
2098	trace_i915_request_wait_end(rq);
2099	return timeout;
2100	}
2101
2102	/**
2103	* i915_request_wait - wait until execution of request has finished
2104	* @rq: the request to wait upon
2105	* @flags: how to wait
2106	* @timeout: how long to wait in jiffies
2107	*
2108	* i915_request_wait() waits for the request to be completed, for a
2109	* maximum of @timeout jiffies (with MAX_SCHEDULE_TIMEOUT implying an
2110	* unbounded wait).
2111	*
2112	* Returns the remaining time (in jiffies) if the request completed, which may
2113	* be zero or -ETIME if the request is unfinished after the timeout expires.
2114	* May return -EINTR is called with I915_WAIT_INTERRUPTIBLE and a signal is
2115	* pending before the request completes.
2116	*
2117	* NOTE: This function behaves differently from dma-fence wait semantics for
2118	* timeout = 0. It returns 0 on success, and -ETIME if not signaled.
2119	*/
2120	long i915_request_wait(struct i915_request *rq,
2121	unsigned int flags,
2122	long timeout)
2123	{
2124	long ret = i915_request_wait_timeout(rq, flags, timeout);
2125
2126	if (!ret)
2127	return -ETIME;
2128
2129	if (ret > `0` && !timeout)
2130	return `0`;
2131
2132	return ret;
2133	}
2134
2135	static int print_sched_attr(const struct i915_sched_attr *attr,
2136	char buf, int* x, int len)
2137	{
2138	if (attr->priority == I915_PRIORITY_INVALID)
2139	return x;
2140
2141	x += snprintf(buf: buf + x, size: len - x,
2142	fmt: " prio=%d", attr->priority);
2143
2144	return x;
2145	}
2146
2147	static char queue_status(const struct i915_request *rq)
2148	{
2149	if (i915_request_is_active(rq))
2150	return `'E'`;
2151
2152	if (i915_request_is_ready(rq))
2153	return intel_engine_is_virtual(engine: rq->engine) ? `'V'` : `'R'`;
2154
2155	return `'U'`;
2156	}
2157
2158	static const char run_status(const* struct i915_request *rq)
2159	{
2160	if (__i915_request_is_complete(rq))
2161	return "!";
2162
2163	if (__i915_request_has_started(rq))
2164	return "*";
2165
2166	if (!i915_sw_fence_signaled(fence: &rq->semaphore))
2167	return "&";
2168
2169	return "";
2170	}
2171
2172	static const char fence_status(const* struct i915_request *rq)
2173	{
2174	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
2175	return "+";
2176
2177	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags))
2178	return "-";
2179
2180	return "";
2181	}
2182
2183	void i915_request_show(struct drm_printer *m,
2184	const struct i915_request *rq,
2185	const char *prefix,
2186	int indent)
2187	{
2188	const char name = rq->fence.ops->get_timeline_name((struct* dma_fence *)&rq->fence);
2189	char buf[`80`] = "";
2190	int x = `0`;
2191
2192	/*
2193	* The prefix is used to show the queue status, for which we use
2194	* the following flags:
2195	*
2196	* U [Unready]
2197	* - initial status upon being submitted by the user
2198	*
2199	* - the request is not ready for execution as it is waiting
2200	* for external fences
2201	*
2202	* R [Ready]
2203	* - all fences the request was waiting on have been signaled,
2204	* and the request is now ready for execution and will be
2205	* in a backend queue
2206	*
2207	* - a ready request may still need to wait on semaphores
2208	* [internal fences]
2209	*
2210	* V [Ready/virtual]
2211	* - same as ready, but queued over multiple backends
2212	*
2213	* E [Executing]
2214	* - the request has been transferred from the backend queue and
2215	* submitted for execution on HW
2216	*
2217	* - a completed request may still be regarded as executing, its
2218	* status may not be updated until it is retired and removed
2219	* from the lists
2220	*/
2221
2222	x = print_sched_attr(attr: &rq->sched.attr, buf, x, len: sizeof(buf));
2223
2224	drm_printf(p: m, f: "%s%.*s%c %llx:%lld%s%s %s @ %dms: %s\n",
2225	prefix, indent, " ",
2226	queue_status(rq),
2227	rq->fence.context, rq->fence.seqno,
2228	run_status(rq),
2229	fence_status(rq),
2230	buf,
2231	jiffies_to_msecs(j: jiffies - rq->emitted_jiffies),
2232	name);
2233	}
2234
2235	static bool engine_match_ring(struct intel_engine_cs engine, struct* i915_request *rq)
2236	{
2237	u32 ring = ENGINE_READ(engine, RING_START);
2238
2239	return ring == i915_ggtt_offset(vma: rq->ring->vma);
2240	}
2241
2242	static bool match_ring(struct i915_request *rq)
2243	{
2244	struct intel_engine_cs *engine;
2245	bool found;
2246	int i;
2247
2248	if (!intel_engine_is_virtual(engine: rq->engine))
2249	return engine_match_ring(engine: rq->engine, rq);
2250
2251	found = false;
2252	i = `0`;
2253	while ((engine = intel_engine_get_sibling(engine: rq->engine, sibling: i++))) {
2254	found = engine_match_ring(engine, rq);
2255	if (found)
2256	break;
2257	}
2258
2259	return found;
2260	}
2261
2262	enum i915_request_state i915_test_request_state(struct i915_request *rq)
2263	{
2264	if (i915_request_completed(rq))
2265	return I915_REQUEST_COMPLETE;
2266
2267	if (!i915_request_started(rq))
2268	return I915_REQUEST_PENDING;
2269
2270	if (match_ring(rq))
2271	return I915_REQUEST_ACTIVE;
2272
2273	return I915_REQUEST_QUEUED;
2274	}
2275
2276	#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
2277	#include "selftests/mock_request.c"
2278	#include "selftests/i915_request.c"
2279	#endif
2280
2281	void i915_request_module_exit(void)
2282	{
2283	kmem_cache_destroy(s: slab_execute_cbs);
2284	kmem_cache_destroy(s: slab_requests);
2285	}
2286
2287	int __init i915_request_module_init(void)
2288	{
2289	slab_requests =
2290	kmem_cache_create(name: "i915_request",
2291	size: sizeof(struct i915_request),
2292	align: __alignof__(struct i915_request),
2293	SLAB_HWCACHE_ALIGN \|
2294	SLAB_RECLAIM_ACCOUNT \|
2295	SLAB_TYPESAFE_BY_RCU,
2296	ctor: __i915_request_ctor);
2297	if (!slab_requests)
2298	return -ENOMEM;
2299
2300	slab_execute_cbs = KMEM_CACHE(execute_cb,
2301	SLAB_HWCACHE_ALIGN \|
2302	SLAB_RECLAIM_ACCOUNT \|
2303	SLAB_TYPESAFE_BY_RCU);
2304	if (!slab_execute_cbs)
2305	goto err_requests;
2306
2307	return `0`;
2308
2309	err_requests:
2310	kmem_cache_destroy(s: slab_requests);
2311	return -ENOMEM;
2312	}
2313

source code of linux/drivers/gpu/drm/i915/i915_request.c