intel_timeline.c source code [linux/drivers/gpu/drm/i915/gt/intel_timeline.c]

1	// SPDX-License-Identifier: MIT
2	/*
3	* Copyright © 2016-2018 Intel Corporation
4	*/
5
6	#include <drm/drm_cache.h>
7
8	#include "gem/i915_gem_internal.h"
9
10	#include "i915_active.h"
11	#include "i915_drv.h"
12	#include "i915_syncmap.h"
13	#include "intel_gt.h"
14	#include "intel_ring.h"
15	#include "intel_timeline.h"
16
17	#define TIMELINE_SEQNO_BYTES 8
18
19	static struct i915_vma hwsp_alloc(struct* intel_gt *gt)
20	{
21	struct drm_i915_private *i915 = gt->i915;
22	struct drm_i915_gem_object *obj;
23	struct i915_vma *vma;
24
25	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
26	if (IS_ERR(ptr: obj))
27	return ERR_CAST(ptr: obj);
28
29	i915_gem_object_set_cache_coherency(obj, cache_level: I915_CACHE_LLC);
30
31	vma = i915_vma_instance(obj, vm: &gt->ggtt->vm, NULL);
32	if (IS_ERR(ptr: vma))
33	i915_gem_object_put(obj);
34
35	return vma;
36	}
37
38	static void __timeline_retire(struct i915_active *active)
39	{
40	struct intel_timeline *tl =
41	container_of(active, typeof(*tl), active);
42
43	i915_vma_unpin(vma: tl->hwsp_ggtt);
44	intel_timeline_put(timeline: tl);
45	}
46
47	static int __timeline_active(struct i915_active *active)
48	{
49	struct intel_timeline *tl =
50	container_of(active, typeof(*tl), active);
51
52	__i915_vma_pin(vma: tl->hwsp_ggtt);
53	intel_timeline_get(timeline: tl);
54	return `0`;
55	}
56
57	I915_SELFTEST_EXPORT int
58	intel_timeline_pin_map(struct intel_timeline *timeline)
59	{
60	struct drm_i915_gem_object *obj = timeline->hwsp_ggtt->obj;
61	u32 ofs = offset_in_page(timeline->hwsp_offset);
62	void *vaddr;
63
64	vaddr = i915_gem_object_pin_map(obj, type: I915_MAP_WB);
65	if (IS_ERR(ptr: vaddr))
66	return PTR_ERR(ptr: vaddr);
67
68	timeline->hwsp_map = vaddr;
69	timeline->hwsp_seqno = memset(vaddr + ofs, `0`, TIMELINE_SEQNO_BYTES);
70	drm_clflush_virt_range(addr: vaddr + ofs, TIMELINE_SEQNO_BYTES);
71
72	return `0`;
73	}
74
75	static int intel_timeline_init(struct intel_timeline *timeline,
76	struct intel_gt *gt,
77	struct i915_vma *hwsp,
78	unsigned int offset)
79	{
80	kref_init(kref: &timeline->kref);
81	atomic_set(v: &timeline->pin_count, i: `0`);
82
83	timeline->gt = gt;
84
85	if (hwsp) {
86	timeline->hwsp_offset = offset;
87	timeline->hwsp_ggtt = i915_vma_get(vma: hwsp);
88	} else {
89	timeline->has_initial_breadcrumb = true;
90	hwsp = hwsp_alloc(gt);
91	if (IS_ERR(ptr: hwsp))
92	return PTR_ERR(ptr: hwsp);
93	timeline->hwsp_ggtt = hwsp;
94	}
95
96	timeline->hwsp_map = NULL;
97	timeline->hwsp_seqno = (void )(long*)timeline->hwsp_offset;
98
99	GEM_BUG_ON(timeline->hwsp_offset >= hwsp->size);
100
101	timeline->fence_context = dma_fence_context_alloc(num: `1`);
102
103	mutex_init(&timeline->mutex);
104
105	INIT_ACTIVE_FENCE(&timeline->last_request);
106	INIT_LIST_HEAD(list: &timeline->requests);
107
108	i915_syncmap_init(root: &timeline->sync);
109	i915_active_init(&timeline->active, __timeline_active,
110	__timeline_retire, `0`);
111
112	return `0`;
113	}
114
115	void intel_gt_init_timelines(struct intel_gt *gt)
116	{
117	struct intel_gt_timelines *timelines = &gt->timelines;
118
119	spin_lock_init(&timelines->lock);
120	INIT_LIST_HEAD(list: &timelines->active_list);
121	}
122
123	static void intel_timeline_fini(struct rcu_head *rcu)
124	{
125	struct intel_timeline *timeline =
126	container_of(rcu, struct intel_timeline, rcu);
127
128	if (timeline->hwsp_map)
129	i915_gem_object_unpin_map(obj: timeline->hwsp_ggtt->obj);
130
131	i915_vma_put(vma: timeline->hwsp_ggtt);
132	i915_active_fini(ref: &timeline->active);
133
134	/*
135	* A small race exists between intel_gt_retire_requests_timeout and
136	* intel_timeline_exit which could result in the syncmap not getting
137	* free'd. Rather than work to hard to seal this race, simply cleanup
138	* the syncmap on fini.
139	*/
140	i915_syncmap_free(root: &timeline->sync);
141
142	kfree(objp: timeline);
143	}
144
145	struct intel_timeline *
146	__intel_timeline_create(struct intel_gt *gt,
147	struct i915_vma *global_hwsp,
148	unsigned int offset)
149	{
150	struct intel_timeline *timeline;
151	int err;
152
153	timeline = kzalloc(size: sizeof(*timeline), GFP_KERNEL);
154	if (!timeline)
155	return ERR_PTR(error: -ENOMEM);
156
157	err = intel_timeline_init(timeline, gt, hwsp: global_hwsp, offset);
158	if (err) {
159	kfree(objp: timeline);
160	return ERR_PTR(error: err);
161	}
162
163	return timeline;
164	}
165
166	struct intel_timeline *
167	intel_timeline_create_from_engine(struct intel_engine_cs *engine,
168	unsigned int offset)
169	{
170	struct i915_vma *hwsp = engine->status_page.vma;
171	struct intel_timeline *tl;
172
173	tl = __intel_timeline_create(gt: engine->gt, global_hwsp: hwsp, offset);
174	if (IS_ERR(ptr: tl))
175	return tl;
176
177	/ Borrow a nearby lock; we only create these timelines during init /
178	mutex_lock(&hwsp->vm->mutex);
179	list_add_tail(new: &tl->engine_link, head: &engine->status_page.timelines);
180	mutex_unlock(lock: &hwsp->vm->mutex);
181
182	return tl;
183	}
184
185	void __intel_timeline_pin(struct intel_timeline *tl)
186	{
187	GEM_BUG_ON(!atomic_read(&tl->pin_count));
188	atomic_inc(v: &tl->pin_count);
189	}
190
191	int intel_timeline_pin(struct intel_timeline tl, struct* i915_gem_ww_ctx *ww)
192	{
193	int err;
194
195	if (atomic_add_unless(v: &tl->pin_count, a: `1`, u: `0`))
196	return `0`;
197
198	if (!tl->hwsp_map) {
199	err = intel_timeline_pin_map(timeline: tl);
200	if (err)
201	return err;
202	}
203
204	err = i915_ggtt_pin(vma: tl->hwsp_ggtt, ww, align: `0`, PIN_HIGH);
205	if (err)
206	return err;
207
208	tl->hwsp_offset =
209	i915_ggtt_offset(vma: tl->hwsp_ggtt) +
210	offset_in_page(tl->hwsp_offset);
211	GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n",
212	tl->fence_context, tl->hwsp_offset);
213
214	i915_active_acquire(ref: &tl->active);
215	if (atomic_fetch_inc(v: &tl->pin_count)) {
216	i915_active_release(ref: &tl->active);
217	__i915_vma_unpin(vma: tl->hwsp_ggtt);
218	}
219
220	return `0`;
221	}
222
223	void intel_timeline_reset_seqno(const struct intel_timeline *tl)
224	{
225	u32 hwsp_seqno = (u32 )tl->hwsp_seqno;
226	/ Must be pinned to be writable, and no requests in flight. /
227	GEM_BUG_ON(!atomic_read(&tl->pin_count));
228
229	memset(hwsp_seqno + `1`, `0`, TIMELINE_SEQNO_BYTES - sizeof(*hwsp_seqno));
230	WRITE_ONCE(*hwsp_seqno, tl->seqno);
231	drm_clflush_virt_range(addr: hwsp_seqno, TIMELINE_SEQNO_BYTES);
232	}
233
234	void intel_timeline_enter(struct intel_timeline *tl)
235	{
236	struct intel_gt_timelines *timelines = &tl->gt->timelines;
237
238	/*
239	* Pretend we are serialised by the timeline->mutex.
240	*
241	* While generally true, there are a few exceptions to the rule
242	* for the engine->kernel_context being used to manage power
243	* transitions. As the engine_park may be called from under any
244	* timeline, it uses the power mutex as a global serialisation
245	* lock to prevent any other request entering its timeline.
246	*
247	* The rule is generally tl->mutex, otherwise engine->wakeref.mutex.
248	*
249	* However, intel_gt_retire_request() does not know which engine
250	* it is retiring along and so cannot partake in the engine-pm
251	* barrier, and there we use the tl->active_count as a means to
252	* pin the timeline in the active_list while the locks are dropped.
253	* Ergo, as that is outside of the engine-pm barrier, we need to
254	* use atomic to manipulate tl->active_count.
255	*/
256	lockdep_assert_held(&tl->mutex);
257
258	if (atomic_add_unless(v: &tl->active_count, a: `1`, u: `0`))
259	return;
260
261	spin_lock(lock: &timelines->lock);
262	if (!atomic_fetch_inc(v: &tl->active_count)) {
263	/*
264	* The HWSP is volatile, and may have been lost while inactive,
265	* e.g. across suspend/resume. Be paranoid, and ensure that
266	* the HWSP value matches our seqno so we don't proclaim
267	* the next request as already complete.
268	*/
269	intel_timeline_reset_seqno(tl);
270	list_add_tail(new: &tl->link, head: &timelines->active_list);
271	}
272	spin_unlock(lock: &timelines->lock);
273	}
274
275	void intel_timeline_exit(struct intel_timeline *tl)
276	{
277	struct intel_gt_timelines *timelines = &tl->gt->timelines;
278
279	/ See intel_timeline_enter() /
280	lockdep_assert_held(&tl->mutex);
281
282	GEM_BUG_ON(!atomic_read(&tl->active_count));
283	if (atomic_add_unless(v: &tl->active_count, a: -`1`, u: `1`))
284	return;
285
286	spin_lock(lock: &timelines->lock);
287	if (atomic_dec_and_test(v: &tl->active_count))
288	list_del(entry: &tl->link);
289	spin_unlock(lock: &timelines->lock);
290
291	/*
292	* Since this timeline is idle, all bariers upon which we were waiting
293	* must also be complete and so we can discard the last used barriers
294	* without loss of information.
295	*/
296	i915_syncmap_free(root: &tl->sync);
297	}
298
299	static u32 timeline_advance(struct intel_timeline *tl)
300	{
301	GEM_BUG_ON(!atomic_read(&tl->pin_count));
302	GEM_BUG_ON(tl->seqno & tl->has_initial_breadcrumb);
303
304	return tl->seqno += `1` + tl->has_initial_breadcrumb;
305	}
306
307	static noinline int
308	__intel_timeline_get_seqno(struct intel_timeline *tl,
309	u32 *seqno)
310	{
311	u32 next_ofs = offset_in_page(tl->hwsp_offset + TIMELINE_SEQNO_BYTES);
312
313	/ w/a: bit 5 needs to be zero for MI_FLUSH_DW address. /
314	if (TIMELINE_SEQNO_BYTES <= BIT(`5`) && (next_ofs & BIT(`5`)))
315	next_ofs = offset_in_page(next_ofs + BIT(`5`));
316
317	tl->hwsp_offset = i915_ggtt_offset(vma: tl->hwsp_ggtt) + next_ofs;
318	tl->hwsp_seqno = tl->hwsp_map + next_ofs;
319	intel_timeline_reset_seqno(tl);
320
321	*seqno = timeline_advance(tl);
322	GEM_BUG_ON(i915_seqno_passed(tl->hwsp_seqno, seqno));
323	return `0`;
324	}
325
326	int intel_timeline_get_seqno(struct intel_timeline *tl,
327	struct i915_request *rq,
328	u32 *seqno)
329	{
330	*seqno = timeline_advance(tl);
331
332	/ Replace the HWSP on wraparound for HW semaphores /
333	if (unlikely(!*seqno && tl->has_initial_breadcrumb))
334	return __intel_timeline_get_seqno(tl, seqno);
335
336	return `0`;
337	}
338
339	int intel_timeline_read_hwsp(struct i915_request *from,
340	struct i915_request *to,
341	u32 *hwsp)
342	{
343	struct intel_timeline *tl;
344	int err;
345
346	rcu_read_lock();
347	tl = rcu_dereference(from->timeline);
348	if (i915_request_signaled(rq: from) \|\|
349	!i915_active_acquire_if_busy(ref: &tl->active))
350	tl = NULL;
351
352	if (tl) {
353	/ hwsp_offset may wraparound, so use from->hwsp_seqno /
354	*hwsp = i915_ggtt_offset(vma: tl->hwsp_ggtt) +
355	offset_in_page(from->hwsp_seqno);
356	}
357
358	/ ensure we wait on the right request, if not, we completed /
359	if (tl && __i915_request_is_complete(rq: from)) {
360	i915_active_release(ref: &tl->active);
361	tl = NULL;
362	}
363	rcu_read_unlock();
364
365	if (!tl)
366	return `1`;
367
368	/ Can't do semaphore waits on kernel context /
369	if (!tl->has_initial_breadcrumb) {
370	err = -EINVAL;
371	goto out;
372	}
373
374	err = i915_active_add_request(ref: &tl->active, rq: to);
375
376	out:
377	i915_active_release(ref: &tl->active);
378	return err;
379	}
380
381	void intel_timeline_unpin(struct intel_timeline *tl)
382	{
383	GEM_BUG_ON(!atomic_read(&tl->pin_count));
384	if (!atomic_dec_and_test(v: &tl->pin_count))
385	return;
386
387	i915_active_release(ref: &tl->active);
388	__i915_vma_unpin(vma: tl->hwsp_ggtt);
389	}
390
391	void __intel_timeline_free(struct kref *kref)
392	{
393	struct intel_timeline *timeline =
394	container_of(kref, typeof(*timeline), kref);
395
396	GEM_BUG_ON(atomic_read(&timeline->pin_count));
397	GEM_BUG_ON(!list_empty(&timeline->requests));
398	GEM_BUG_ON(timeline->retire);
399
400	call_rcu(head: &timeline->rcu, func: intel_timeline_fini);
401	}
402
403	void intel_gt_fini_timelines(struct intel_gt *gt)
404	{
405	struct intel_gt_timelines *timelines = &gt->timelines;
406
407	GEM_BUG_ON(!list_empty(&timelines->active_list));
408	}
409
410	void intel_gt_show_timelines(struct intel_gt *gt,
411	struct drm_printer *m,
412	void (show_request)(struct* drm_printer *m,
413	const struct i915_request *rq,
414	const char *prefix,
415	int indent))
416	{
417	struct intel_gt_timelines *timelines = &gt->timelines;
418	struct intel_timeline tl, tn;
419	LIST_HEAD(free);
420
421	spin_lock(lock: &timelines->lock);
422	list_for_each_entry_safe(tl, tn, &timelines->active_list, link) {
423	unsigned long count, ready, inflight;
424	struct i915_request rq, rn;
425	struct dma_fence *fence;
426
427	if (!mutex_trylock(lock: &tl->mutex)) {
428	drm_printf(p: m, f: "Timeline %llx: busy; skipping\n",
429	tl->fence_context);
430	continue;
431	}
432
433	intel_timeline_get(timeline: tl);
434	GEM_BUG_ON(!atomic_read(&tl->active_count));
435	atomic_inc(v: &tl->active_count); / pin the list element /
436	spin_unlock(lock: &timelines->lock);
437
438	count = `0`;
439	ready = `0`;
440	inflight = `0`;
441	list_for_each_entry_safe(rq, rn, &tl->requests, link) {
442	if (i915_request_completed(rq))
443	continue;
444
445	count++;
446	if (i915_request_is_ready(rq))
447	ready++;
448	if (i915_request_is_active(rq))
449	inflight++;
450	}
451
452	drm_printf(p: m, f: "Timeline %llx: { ", tl->fence_context);
453	drm_printf(p: m, f: "count: %lu, ready: %lu, inflight: %lu",
454	count, ready, inflight);
455	drm_printf(p: m, f: ", seqno: { current: %d, last: %d }",
456	*tl->hwsp_seqno, tl->seqno);
457	fence = i915_active_fence_get(active: &tl->last_request);
458	if (fence) {
459	drm_printf(p: m, f: ", engine: %s",
460	to_request(fence)->engine->name);
461	dma_fence_put(fence);
462	}
463	drm_printf(p: m, f: " }\n");
464
465	if (show_request) {
466	list_for_each_entry_safe(rq, rn, &tl->requests, link)
467	show_request(m, rq, "", `2`);
468	}
469
470	mutex_unlock(lock: &tl->mutex);
471	spin_lock(lock: &timelines->lock);
472
473	/ Resume list iteration after reacquiring spinlock /
474	list_safe_reset_next(tl, tn, link);
475	if (atomic_dec_and_test(v: &tl->active_count))
476	list_del(entry: &tl->link);
477
478	/ Defer the final release to after the spinlock /
479	if (refcount_dec_and_test(r: &tl->kref.refcount)) {
480	GEM_BUG_ON(atomic_read(&tl->active_count));
481	list_add(new: &tl->link, head: &free);
482	}
483	}
484	spin_unlock(lock: &timelines->lock);
485
486	list_for_each_entry_safe(tl, tn, &free, link)
487	__intel_timeline_free(kref: &tl->kref);
488	}
489
490	#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
491	#include "gt/selftests/mock_timeline.c"
492	#include "gt/selftest_timeline.c"
493	#endif
494

source code of linux/drivers/gpu/drm/i915/gt/intel_timeline.c