selftest_timeline.c source code [linux/drivers/gpu/drm/i915/gt/selftest_timeline.c]

1	// SPDX-License-Identifier: MIT
2	/*
3	* Copyright © 2017-2018 Intel Corporation
4	*/
5
6	#include <linux/prime_numbers.h>
7	#include <linux/string_helpers.h>
8
9	#include "intel_context.h"
10	#include "intel_engine_heartbeat.h"
11	#include "intel_engine_pm.h"
12	#include "intel_engine_regs.h"
13	#include "intel_gpu_commands.h"
14	#include "intel_gt.h"
15	#include "intel_gt_requests.h"
16	#include "intel_ring.h"
17	#include "selftest_engine_heartbeat.h"
18
19	#include "../selftests/i915_random.h"
20	#include "../i915_selftest.h"
21
22	#include "selftests/igt_flush_test.h"
23	#include "selftests/lib_sw_fence.h"
24	#include "selftests/mock_gem_device.h"
25	#include "selftests/mock_timeline.h"
26
27	static struct page hwsp_page(struct* intel_timeline *tl)
28	{
29	struct drm_i915_gem_object *obj = tl->hwsp_ggtt->obj;
30
31	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
32	return sg_page(sg: obj->mm.pages->sgl);
33	}
34
35	static unsigned long hwsp_cacheline(struct intel_timeline *tl)
36	{
37	unsigned long address = (unsigned long)page_address(hwsp_page(tl));
38
39	return (address + offset_in_page(tl->hwsp_offset)) / TIMELINE_SEQNO_BYTES;
40	}
41
42	static int selftest_tl_pin(struct intel_timeline *tl)
43	{
44	struct i915_gem_ww_ctx ww;
45	int err;
46
47	i915_gem_ww_ctx_init(ctx: &ww, intr: false);
48	retry:
49	err = i915_gem_object_lock(obj: tl->hwsp_ggtt->obj, ww: &ww);
50	if (!err)
51	err = intel_timeline_pin(tl, ww: &ww);
52
53	if (err == -EDEADLK) {
54	err = i915_gem_ww_ctx_backoff(ctx: &ww);
55	if (!err)
56	goto retry;
57	}
58	i915_gem_ww_ctx_fini(ctx: &ww);
59	return err;
60	}
61
62	/ Only half of seqno's are usable, see __intel_timeline_get_seqno() /
63	#define CACHELINES_PER_PAGE (PAGE_SIZE / TIMELINE_SEQNO_BYTES / 2)
64
65	struct mock_hwsp_freelist {
66	struct intel_gt *gt;
67	struct radix_tree_root cachelines;
68	struct intel_timeline **history;
69	unsigned long count, max;
70	struct rnd_state prng;
71	};
72
73	enum {
74	SHUFFLE = BIT(`0`),
75	};
76
77	static void __mock_hwsp_record(struct mock_hwsp_freelist *state,
78	unsigned int idx,
79	struct intel_timeline *tl)
80	{
81	tl = xchg(&state->history[idx], tl);
82	if (tl) {
83	radix_tree_delete(&state->cachelines, hwsp_cacheline(tl));
84	intel_timeline_unpin(tl);
85	intel_timeline_put(timeline: tl);
86	}
87	}
88
89	static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state,
90	unsigned int count,
91	unsigned int flags)
92	{
93	struct intel_timeline *tl;
94	unsigned int idx;
95
96	while (count--) {
97	unsigned long cacheline;
98	int err;
99
100	tl = intel_timeline_create(gt: state->gt);
101	if (IS_ERR(ptr: tl))
102	return PTR_ERR(ptr: tl);
103
104	err = selftest_tl_pin(tl);
105	if (err) {
106	intel_timeline_put(timeline: tl);
107	return err;
108	}
109
110	cacheline = hwsp_cacheline(tl);
111	err = radix_tree_insert(&state->cachelines, index: cacheline, tl);
112	if (err) {
113	if (err == -EEXIST) {
114	pr_err("HWSP cacheline %lu already used; duplicate allocation!\n",
115	cacheline);
116	}
117	intel_timeline_unpin(tl);
118	intel_timeline_put(timeline: tl);
119	return err;
120	}
121
122	idx = state->count++ % state->max;
123	__mock_hwsp_record(state, idx, tl);
124	}
125
126	if (flags & SHUFFLE)
127	i915_prandom_shuffle(arr: state->history,
128	elsz: sizeof(*state->history),
129	min(state->count, state->max),
130	state: &state->prng);
131
132	count = i915_prandom_u32_max_state(min(state->count, state->max),
133	state: &state->prng);
134	while (count--) {
135	idx = --state->count % state->max;
136	__mock_hwsp_record(state, idx, NULL);
137	}
138
139	return `0`;
140	}
141
142	static int mock_hwsp_freelist(void *arg)
143	{
144	struct mock_hwsp_freelist state;
145	struct drm_i915_private *i915;
146	const struct {
147	const char *name;
148	unsigned int flags;
149	} phases[] = {
150	{ "linear", `0` },
151	{ "shuffled", SHUFFLE },
152	{ },
153	}, *p;
154	unsigned int na;
155	int err = `0`;
156
157	i915 = mock_gem_device();
158	if (!i915)
159	return -ENOMEM;
160
161	INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL);
162	state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed);
163
164	state.gt = to_gt(i915);
165
166	/*
167	* Create a bunch of timelines and check that their HWSP do not overlap.
168	* Free some, and try again.
169	*/
170
171	state.max = PAGE_SIZE / sizeof(*state.history);
172	state.count = `0`;
173	state.history = kcalloc(n: state.max, size: sizeof(*state.history), GFP_KERNEL);
174	if (!state.history) {
175	err = -ENOMEM;
176	goto err_put;
177	}
178
179	for (p = phases; p->name; p++) {
180	pr_debug("%s(%s)\n", __func__, p->name);
181	for_each_prime_number_from(na, `1`, `2` * CACHELINES_PER_PAGE) {
182	err = __mock_hwsp_timeline(state: &state, count: na, flags: p->flags);
183	if (err)
184	goto out;
185	}
186	}
187
188	out:
189	for (na = `0`; na < state.max; na++)
190	__mock_hwsp_record(state: &state, idx: na, NULL);
191	kfree(objp: state.history);
192	err_put:
193	mock_destroy_device(i915);
194	return err;
195	}
196
197	struct __igt_sync {
198	const char *name;
199	u32 seqno;
200	bool expected;
201	bool set;
202	};
203
204	static int __igt_sync(struct intel_timeline *tl,
205	u64 ctx,
206	const struct __igt_sync *p,
207	const char *name)
208	{
209	int ret;
210
211	if (__intel_timeline_sync_is_later(tl, context: ctx, seqno: p->seqno) != p->expected) {
212	pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n",
213	name, p->name, ctx, p->seqno, str_yes_no(p->expected));
214	return -EINVAL;
215	}
216
217	if (p->set) {
218	ret = __intel_timeline_sync_set(tl, context: ctx, seqno: p->seqno);
219	if (ret)
220	return ret;
221	}
222
223	return `0`;
224	}
225
226	static int igt_sync(void *arg)
227	{
228	const struct __igt_sync pass[] = {
229	{ "unset", `0`, false, false },
230	{ "new", `0`, false, true },
231	{ "0a", `0`, true, true },
232	{ "1a", `1`, false, true },
233	{ "1b", `1`, true, true },
234	{ "0b", `0`, true, false },
235	{ "2a", `2`, false, true },
236	{ "4", `4`, false, true },
237	{ "INT_MAX", INT_MAX, false, true },
238	{ "INT_MAX-1", INT_MAX-`1`, true, false },
239	{ "INT_MAX+1", (u32)INT_MAX+`1`, false, true },
240	{ "INT_MAX", INT_MAX, true, false },
241	{ "UINT_MAX", UINT_MAX, false, true },
242	{ "wrap", `0`, false, true },
243	{ "unwrap", UINT_MAX, true, false },
244	{},
245	}, *p;
246	struct intel_timeline tl;
247	int order, offset;
248	int ret = -ENODEV;
249
250	mock_timeline_init(timeline: &tl, context: `0`);
251	for (p = pass; p->name; p++) {
252	for (order = `1`; order < `64`; order++) {
253	for (offset = -`1`; offset <= (order > `1`); offset++) {
254	u64 ctx = BIT_ULL(order) + offset;
255
256	ret = __igt_sync(tl: &tl, ctx, p, name: "1");
257	if (ret)
258	goto out;
259	}
260	}
261	}
262	mock_timeline_fini(timeline: &tl);
263
264	mock_timeline_init(timeline: &tl, context: `0`);
265	for (order = `1`; order < `64`; order++) {
266	for (offset = -`1`; offset <= (order > `1`); offset++) {
267	u64 ctx = BIT_ULL(order) + offset;
268
269	for (p = pass; p->name; p++) {
270	ret = __igt_sync(tl: &tl, ctx, p, name: "2");
271	if (ret)
272	goto out;
273	}
274	}
275	}
276
277	out:
278	mock_timeline_fini(timeline: &tl);
279	return ret;
280	}
281
282	static unsigned int random_engine(struct rnd_state *rnd)
283	{
284	return i915_prandom_u32_max_state(ep_ro: I915_NUM_ENGINES, state: rnd);
285	}
286
287	static int bench_sync(void *arg)
288	{
289	struct rnd_state prng;
290	struct intel_timeline tl;
291	unsigned long end_time, count;
292	u64 prng32_1M;
293	ktime_t kt;
294	int order, last_order;
295
296	mock_timeline_init(timeline: &tl, context: `0`);
297
298	/ Lookups from cache are very fast and so the random number generation*
299	* and the loop itself becomes a significant factor in the per-iteration
300	* timings. We try to compensate the results by measuring the overhead
301	* of the prng and subtract it from the reported results.
302	*/
303	prandom_seed_state(state: &prng, seed: i915_selftest.random_seed);
304	count = `0`;
305	kt = ktime_get();
306	end_time = jiffies + HZ/`10`;
307	do {
308	u32 x;
309
310	/ Make sure the compiler doesn't optimise away the prng call /
311	WRITE_ONCE(x, prandom_u32_state(&prng));
312
313	count++;
314	} while (!time_after(jiffies, end_time));
315	kt = ktime_sub(ktime_get(), kt);
316	pr_debug("%s: %lu random evaluations, %lluns/prng\n",
317	__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
318	prng32_1M = div64_ul(ktime_to_ns(kt) << `20`, count);
319
320	/ Benchmark (only) setting random context ids /
321	prandom_seed_state(state: &prng, seed: i915_selftest.random_seed);
322	count = `0`;
323	kt = ktime_get();
324	end_time = jiffies + HZ/`10`;
325	do {
326	u64 id = i915_prandom_u64_state(rnd: &prng);
327
328	__intel_timeline_sync_set(tl: &tl, context: id, seqno: `0`);
329	count++;
330	} while (!time_after(jiffies, end_time));
331	kt = ktime_sub(ktime_get(), kt);
332	kt = ktime_sub_ns(kt, (count * prng32_1M * `2`) >> `20`);
333	pr_info("%s: %lu random insertions, %lluns/insert\n",
334	__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
335
336	/ Benchmark looking up the exact same context ids as we just set /
337	prandom_seed_state(state: &prng, seed: i915_selftest.random_seed);
338	end_time = count;
339	kt = ktime_get();
340	while (end_time--) {
341	u64 id = i915_prandom_u64_state(rnd: &prng);
342
343	if (!__intel_timeline_sync_is_later(tl: &tl, context: id, seqno: `0`)) {
344	mock_timeline_fini(timeline: &tl);
345	pr_err("Lookup of %llu failed\n", id);
346	return -EINVAL;
347	}
348	}
349	kt = ktime_sub(ktime_get(), kt);
350	kt = ktime_sub_ns(kt, (count * prng32_1M * `2`) >> `20`);
351	pr_info("%s: %lu random lookups, %lluns/lookup\n",
352	__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
353
354	mock_timeline_fini(timeline: &tl);
355	cond_resched();
356
357	mock_timeline_init(timeline: &tl, context: `0`);
358
359	/ Benchmark setting the first N (in order) contexts /
360	count = `0`;
361	kt = ktime_get();
362	end_time = jiffies + HZ/`10`;
363	do {
364	__intel_timeline_sync_set(tl: &tl, context: count++, seqno: `0`);
365	} while (!time_after(jiffies, end_time));
366	kt = ktime_sub(ktime_get(), kt);
367	pr_info("%s: %lu in-order insertions, %lluns/insert\n",
368	__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
369
370	/ Benchmark looking up the exact same context ids as we just set /
371	end_time = count;
372	kt = ktime_get();
373	while (end_time--) {
374	if (!__intel_timeline_sync_is_later(tl: &tl, context: end_time, seqno: `0`)) {
375	pr_err("Lookup of %lu failed\n", end_time);
376	mock_timeline_fini(timeline: &tl);
377	return -EINVAL;
378	}
379	}
380	kt = ktime_sub(ktime_get(), kt);
381	pr_info("%s: %lu in-order lookups, %lluns/lookup\n",
382	__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
383
384	mock_timeline_fini(timeline: &tl);
385	cond_resched();
386
387	mock_timeline_init(timeline: &tl, context: `0`);
388
389	/ Benchmark searching for a random context id and maybe changing it /
390	prandom_seed_state(state: &prng, seed: i915_selftest.random_seed);
391	count = `0`;
392	kt = ktime_get();
393	end_time = jiffies + HZ/`10`;
394	do {
395	u32 id = random_engine(rnd: &prng);
396	u32 seqno = prandom_u32_state(state: &prng);
397
398	if (!__intel_timeline_sync_is_later(tl: &tl, context: id, seqno))
399	__intel_timeline_sync_set(tl: &tl, context: id, seqno);
400
401	count++;
402	} while (!time_after(jiffies, end_time));
403	kt = ktime_sub(ktime_get(), kt);
404	kt = ktime_sub_ns(kt, (count * prng32_1M * `2`) >> `20`);
405	pr_info("%s: %lu repeated insert/lookups, %lluns/op\n",
406	__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
407	mock_timeline_fini(timeline: &tl);
408	cond_resched();
409
410	/ Benchmark searching for a known context id and changing the seqno /
411	for (last_order = `1`, order = `1`; order < `32`;
412	({ int tmp = last_order; last_order = order; order += tmp; })) {
413	unsigned int mask = BIT(order) - `1`;
414
415	mock_timeline_init(timeline: &tl, context: `0`);
416
417	count = `0`;
418	kt = ktime_get();
419	end_time = jiffies + HZ/`10`;
420	do {
421	/ Without assuming too many details of the underlying*
422	* implementation, try to identify its phase-changes
423	* (if any)!
424	*/
425	u64 id = (u64)(count & mask) << order;
426
427	__intel_timeline_sync_is_later(tl: &tl, context: id, seqno: `0`);
428	__intel_timeline_sync_set(tl: &tl, context: id, seqno: `0`);
429
430	count++;
431	} while (!time_after(jiffies, end_time));
432	kt = ktime_sub(ktime_get(), kt);
433	pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n",
434	__func__, count, order,
435	(long long)div64_ul(ktime_to_ns(kt), count));
436	mock_timeline_fini(timeline: &tl);
437	cond_resched();
438	}
439
440	return `0`;
441	}
442
443	int intel_timeline_mock_selftests(void)
444	{
445	static const struct i915_subtest tests[] = {
446	SUBTEST(mock_hwsp_freelist),
447	SUBTEST(igt_sync),
448	SUBTEST(bench_sync),
449	};
450
451	return i915_subtests(tests, NULL);
452	}
453
454	static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value)
455	{
456	u32 *cs;
457
458	cs = intel_ring_begin(rq, num_dwords: `4`);
459	if (IS_ERR(ptr: cs))
460	return PTR_ERR(ptr: cs);
461
462	if (GRAPHICS_VER(rq->i915) >= `8`) {
463	*cs++ = MI_STORE_DWORD_IMM_GEN4 \| MI_USE_GGTT;
464	*cs++ = addr;
465	*cs++ = `0`;
466	*cs++ = value;
467	} else if (GRAPHICS_VER(rq->i915) >= `4`) {
468	*cs++ = MI_STORE_DWORD_IMM_GEN4 \| MI_USE_GGTT;
469	*cs++ = `0`;
470	*cs++ = addr;
471	*cs++ = value;
472	} else {
473	*cs++ = MI_STORE_DWORD_IMM \| MI_MEM_VIRTUAL;
474	*cs++ = addr;
475	*cs++ = value;
476	*cs++ = MI_NOOP;
477	}
478
479	intel_ring_advance(rq, cs);
480
481	return `0`;
482	}
483
484	static struct i915_request *
485	checked_tl_write(struct intel_timeline tl, struct* intel_engine_cs *engine, u32 value)
486	{
487	struct i915_request *rq;
488	int err;
489
490	err = selftest_tl_pin(tl);
491	if (err) {
492	rq = ERR_PTR(error: err);
493	goto out;
494	}
495
496	if (READ_ONCE(*tl->hwsp_seqno) != tl->seqno) {
497	pr_err("Timeline created with incorrect breadcrumb, found %x, expected %x\n",
498	*tl->hwsp_seqno, tl->seqno);
499	intel_timeline_unpin(tl);
500	return ERR_PTR(error: -EINVAL);
501	}
502
503	rq = intel_engine_create_kernel_request(engine);
504	if (IS_ERR(ptr: rq))
505	goto out_unpin;
506
507	i915_request_get(rq);
508
509	err = emit_ggtt_store_dw(rq, addr: tl->hwsp_offset, value);
510	i915_request_add(rq);
511	if (err) {
512	i915_request_put(rq);
513	rq = ERR_PTR(error: err);
514	}
515
516	out_unpin:
517	intel_timeline_unpin(tl);
518	out:
519	if (IS_ERR(ptr: rq))
520	pr_err("Failed to write to timeline!\n");
521	return rq;
522	}
523
524	static int live_hwsp_engine(void *arg)
525	{
526	#define NUM_TIMELINES 4096
527	struct intel_gt *gt = arg;
528	struct intel_timeline **timelines;
529	struct intel_engine_cs *engine;
530	enum intel_engine_id id;
531	unsigned long count, n;
532	int err = `0`;
533
534	/*
535	* Create a bunch of timelines and check we can write
536	* independently to each of their breadcrumb slots.
537	*/
538
539	timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
540	size: sizeof(*timelines),
541	GFP_KERNEL);
542	if (!timelines)
543	return -ENOMEM;
544
545	count = `0`;
546	for_each_engine(engine, gt, id) {
547	if (!intel_engine_can_store_dword(engine))
548	continue;
549
550	intel_engine_pm_get(engine);
551
552	for (n = `0`; n < NUM_TIMELINES; n++) {
553	struct intel_timeline *tl;
554	struct i915_request *rq;
555
556	tl = intel_timeline_create(gt);
557	if (IS_ERR(ptr: tl)) {
558	err = PTR_ERR(ptr: tl);
559	break;
560	}
561
562	rq = checked_tl_write(tl, engine, value: count);
563	if (IS_ERR(ptr: rq)) {
564	intel_timeline_put(timeline: tl);
565	err = PTR_ERR(ptr: rq);
566	break;
567	}
568
569	timelines[count++] = tl;
570	i915_request_put(rq);
571	}
572
573	intel_engine_pm_put(engine);
574	if (err)
575	break;
576	}
577
578	if (igt_flush_test(i915: gt->i915))
579	err = -EIO;
580
581	for (n = `0`; n < count; n++) {
582	struct intel_timeline *tl = timelines[n];
583
584	if (!err && READ_ONCE(*tl->hwsp_seqno) != n) {
585	GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x, found 0x%x\n",
586	n, tl->fence_context, tl->hwsp_offset, *tl->hwsp_seqno);
587	GEM_TRACE_DUMP();
588	err = -EINVAL;
589	}
590	intel_timeline_put(timeline: tl);
591	}
592
593	kvfree(addr: timelines);
594	return err;
595	#undef NUM_TIMELINES
596	}
597
598	static int live_hwsp_alternate(void *arg)
599	{
600	#define NUM_TIMELINES 4096
601	struct intel_gt *gt = arg;
602	struct intel_timeline **timelines;
603	struct intel_engine_cs *engine;
604	enum intel_engine_id id;
605	unsigned long count, n;
606	int err = `0`;
607
608	/*
609	* Create a bunch of timelines and check we can write
610	* independently to each of their breadcrumb slots with adjacent
611	* engines.
612	*/
613
614	timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
615	size: sizeof(*timelines),
616	GFP_KERNEL);
617	if (!timelines)
618	return -ENOMEM;
619
620	count = `0`;
621	for (n = `0`; n < NUM_TIMELINES; n++) {
622	for_each_engine(engine, gt, id) {
623	struct intel_timeline *tl;
624	struct i915_request *rq;
625
626	if (!intel_engine_can_store_dword(engine))
627	continue;
628
629	tl = intel_timeline_create(gt);
630	if (IS_ERR(ptr: tl)) {
631	err = PTR_ERR(ptr: tl);
632	goto out;
633	}
634
635	intel_engine_pm_get(engine);
636	rq = checked_tl_write(tl, engine, value: count);
637	intel_engine_pm_put(engine);
638	if (IS_ERR(ptr: rq)) {
639	intel_timeline_put(timeline: tl);
640	err = PTR_ERR(ptr: rq);
641	goto out;
642	}
643
644	timelines[count++] = tl;
645	i915_request_put(rq);
646	}
647	}
648
649	out:
650	if (igt_flush_test(i915: gt->i915))
651	err = -EIO;
652
653	for (n = `0`; n < count; n++) {
654	struct intel_timeline *tl = timelines[n];
655
656	if (!err && READ_ONCE(*tl->hwsp_seqno) != n) {
657	GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x, found 0x%x\n",
658	n, tl->fence_context, tl->hwsp_offset, *tl->hwsp_seqno);
659	GEM_TRACE_DUMP();
660	err = -EINVAL;
661	}
662	intel_timeline_put(timeline: tl);
663	}
664
665	kvfree(addr: timelines);
666	return err;
667	#undef NUM_TIMELINES
668	}
669
670	static int live_hwsp_wrap(void *arg)
671	{
672	struct intel_gt *gt = arg;
673	struct intel_engine_cs *engine;
674	struct intel_timeline *tl;
675	enum intel_engine_id id;
676	int err = `0`;
677
678	/*
679	* Across a seqno wrap, we need to keep the old cacheline alive for
680	* foreign GPU references.
681	*/
682
683	tl = intel_timeline_create(gt);
684	if (IS_ERR(ptr: tl))
685	return PTR_ERR(ptr: tl);
686
687	if (!tl->has_initial_breadcrumb)
688	goto out_free;
689
690	err = selftest_tl_pin(tl);
691	if (err)
692	goto out_free;
693
694	for_each_engine(engine, gt, id) {
695	const u32 *hwsp_seqno[`2`];
696	struct i915_request *rq;
697	u32 seqno[`2`];
698
699	if (!intel_engine_can_store_dword(engine))
700	continue;
701
702	rq = intel_engine_create_kernel_request(engine);
703	if (IS_ERR(ptr: rq)) {
704	err = PTR_ERR(ptr: rq);
705	goto out;
706	}
707
708	tl->seqno = -`4u`;
709
710	mutex_lock_nested(lock: &tl->mutex, SINGLE_DEPTH_NESTING);
711	err = intel_timeline_get_seqno(tl, rq, seqno: &seqno[`0`]);
712	mutex_unlock(lock: &tl->mutex);
713	if (err) {
714	i915_request_add(rq);
715	goto out;
716	}
717	pr_debug("seqno[0]:%08x, hwsp_offset:%08x\n",
718	seqno[`0`], tl->hwsp_offset);
719
720	err = emit_ggtt_store_dw(rq, addr: tl->hwsp_offset, value: seqno[`0`]);
721	if (err) {
722	i915_request_add(rq);
723	goto out;
724	}
725	hwsp_seqno[`0`] = tl->hwsp_seqno;
726
727	mutex_lock_nested(lock: &tl->mutex, SINGLE_DEPTH_NESTING);
728	err = intel_timeline_get_seqno(tl, rq, seqno: &seqno[`1`]);
729	mutex_unlock(lock: &tl->mutex);
730	if (err) {
731	i915_request_add(rq);
732	goto out;
733	}
734	pr_debug("seqno[1]:%08x, hwsp_offset:%08x\n",
735	seqno[`1`], tl->hwsp_offset);
736
737	err = emit_ggtt_store_dw(rq, addr: tl->hwsp_offset, value: seqno[`1`]);
738	if (err) {
739	i915_request_add(rq);
740	goto out;
741	}
742	hwsp_seqno[`1`] = tl->hwsp_seqno;
743
744	/ With wrap should come a new hwsp /
745	GEM_BUG_ON(seqno[`1`] >= seqno[`0`]);
746	GEM_BUG_ON(hwsp_seqno[`0`] == hwsp_seqno[`1`]);
747
748	i915_request_add(rq);
749
750	if (i915_request_wait(rq, flags: `0`, HZ / `5`) < `0`) {
751	pr_err("Wait for timeline writes timed out!\n");
752	err = -EIO;
753	goto out;
754	}
755
756	if (READ_ONCE(*hwsp_seqno[`0`]) != seqno[`0`] \|\|
757	READ_ONCE(*hwsp_seqno[`1`]) != seqno[`1`]) {
758	pr_err("Bad timeline values: found (%x, %x), expected (%x, %x)\n",
759	hwsp_seqno[`0`], hwsp_seqno[`1`],
760	seqno[`0`], seqno[`1`]);
761	err = -EINVAL;
762	goto out;
763	}
764
765	intel_gt_retire_requests(gt); / recycle HWSP /
766	}
767
768	out:
769	if (igt_flush_test(i915: gt->i915))
770	err = -EIO;
771
772	intel_timeline_unpin(tl);
773	out_free:
774	intel_timeline_put(timeline: tl);
775	return err;
776	}
777
778	static int emit_read_hwsp(struct i915_request *rq,
779	u32 seqno, u32 hwsp,
780	u32 *addr)
781	{
782	const u32 gpr = i915_mmio_reg_offset(GEN8_RING_CS_GPR(rq->engine->mmio_base, `0`));
783	u32 *cs;
784
785	cs = intel_ring_begin(rq, num_dwords: `12`);
786	if (IS_ERR(ptr: cs))
787	return PTR_ERR(ptr: cs);
788
789	*cs++ = MI_STORE_DWORD_IMM_GEN4 \| MI_USE_GGTT;
790	cs++ = addr;
791	*cs++ = `0`;
792	*cs++ = seqno;
793	*addr += `4`;
794
795	*cs++ = MI_LOAD_REGISTER_MEM_GEN8 \| MI_USE_GGTT;
796	*cs++ = gpr;
797	*cs++ = hwsp;
798	*cs++ = `0`;
799
800	*cs++ = MI_STORE_REGISTER_MEM_GEN8 \| MI_USE_GGTT;
801	*cs++ = gpr;
802	cs++ = addr;
803	*cs++ = `0`;
804	*addr += `4`;
805
806	intel_ring_advance(rq, cs);
807
808	return `0`;
809	}
810
811	struct hwsp_watcher {
812	struct i915_vma *vma;
813	struct i915_request *rq;
814	u32 addr;
815	u32 *map;
816	};
817
818	static bool cmp_lt(u32 a, u32 b)
819	{
820	return a < b;
821	}
822
823	static bool cmp_gte(u32 a, u32 b)
824	{
825	return a >= b;
826	}
827
828	static int setup_watcher(struct hwsp_watcher w, struct* intel_gt *gt,
829	struct intel_timeline *tl)
830	{
831	struct drm_i915_gem_object *obj;
832	struct i915_vma *vma;
833
834	obj = i915_gem_object_create_internal(i915: gt->i915, SZ_2M);
835	if (IS_ERR(ptr: obj))
836	return PTR_ERR(ptr: obj);
837
838	/ keep the same cache settings as timeline /
839	i915_gem_object_set_pat_index(obj, pat_index: tl->hwsp_ggtt->obj->pat_index);
840	w->map = i915_gem_object_pin_map_unlocked(obj,
841	page_unmask_bits(tl->hwsp_ggtt->obj->mm.mapping));
842	if (IS_ERR(ptr: w->map)) {
843	i915_gem_object_put(obj);
844	return PTR_ERR(ptr: w->map);
845	}
846
847	vma = i915_gem_object_ggtt_pin(obj, NULL, size: `0`, alignment: `0`, flags: `0`);
848	if (IS_ERR(ptr: vma)) {
849	i915_gem_object_put(obj);
850	return PTR_ERR(ptr: vma);
851	}
852
853	w->vma = vma;
854	w->addr = i915_ggtt_offset(vma);
855	return `0`;
856	}
857
858	static void switch_tl_lock(struct i915_request from, struct* i915_request *to)
859	{
860	/ some light mutex juggling required; think co-routines /
861
862	if (from) {
863	lockdep_unpin_lock(&from->context->timeline->mutex, from->cookie);
864	mutex_unlock(lock: &from->context->timeline->mutex);
865	}
866
867	if (to) {
868	mutex_lock(&to->context->timeline->mutex);
869	to->cookie = lockdep_pin_lock(&to->context->timeline->mutex);
870	}
871	}
872
873	static int create_watcher(struct hwsp_watcher *w,
874	struct intel_engine_cs *engine,
875	int ringsz)
876	{
877	struct intel_context *ce;
878
879	ce = intel_context_create(engine);
880	if (IS_ERR(ptr: ce))
881	return PTR_ERR(ptr: ce);
882
883	ce->ring_size = ringsz;
884	w->rq = intel_context_create_request(ce);
885	intel_context_put(ce);
886	if (IS_ERR(ptr: w->rq))
887	return PTR_ERR(ptr: w->rq);
888
889	w->addr = i915_ggtt_offset(vma: w->vma);
890
891	switch_tl_lock(from: w->rq, NULL);
892
893	return `0`;
894	}
895
896	static int check_watcher(struct hwsp_watcher w, const* char *name,
897	bool (*op)(u32 hwsp, u32 seqno))
898	{
899	struct i915_request *rq = fetch_and_zero(&w->rq);
900	u32 offset, end;
901	int err;
902
903	GEM_BUG_ON(w->addr - i915_ggtt_offset(w->vma) > w->vma->size);
904
905	i915_request_get(rq);
906	switch_tl_lock(NULL, to: rq);
907	i915_request_add(rq);
908
909	if (i915_request_wait(rq, flags: `0`, HZ) < `0`) {
910	err = -ETIME;
911	goto out;
912	}
913
914	err = `0`;
915	offset = `0`;
916	end = (w->addr - i915_ggtt_offset(vma: w->vma)) / sizeof(*w->map);
917	while (offset < end) {
918	if (!op(w->map[offset + `1`], w->map[offset])) {
919	pr_err("Watcher '%s' found HWSP value %x for seqno %x\n",
920	name, w->map[offset + `1`], w->map[offset]);
921	err = -EINVAL;
922	}
923
924	offset += `2`;
925	}
926
927	out:
928	i915_request_put(rq);
929	return err;
930	}
931
932	static void cleanup_watcher(struct hwsp_watcher *w)
933	{
934	if (w->rq) {
935	switch_tl_lock(NULL, to: w->rq);
936
937	i915_request_add(rq: w->rq);
938	}
939
940	i915_vma_unpin_and_release(p_vma: &w->vma, I915_VMA_RELEASE_MAP);
941	}
942
943	static bool retire_requests(struct intel_timeline *tl)
944	{
945	struct i915_request rq, rn;
946
947	mutex_lock(&tl->mutex);
948	list_for_each_entry_safe(rq, rn, &tl->requests, link)
949	if (!i915_request_retire(rq))
950	break;
951	mutex_unlock(lock: &tl->mutex);
952
953	return !i915_active_fence_isset(active: &tl->last_request);
954	}
955
956	static struct i915_request wrap_timeline(struct* i915_request *rq)
957	{
958	struct intel_context *ce = rq->context;
959	struct intel_timeline *tl = ce->timeline;
960	u32 seqno = rq->fence.seqno;
961
962	while (tl->seqno >= seqno) { / Cause a wrap /
963	i915_request_put(rq);
964	rq = intel_context_create_request(ce);
965	if (IS_ERR(ptr: rq))
966	return rq;
967
968	i915_request_get(rq);
969	i915_request_add(rq);
970	}
971
972	i915_request_put(rq);
973	rq = i915_request_create(ce);
974	if (IS_ERR(ptr: rq))
975	return rq;
976
977	i915_request_get(rq);
978	i915_request_add(rq);
979
980	return rq;
981	}
982
983	static int live_hwsp_read(void *arg)
984	{
985	struct intel_gt *gt = arg;
986	struct hwsp_watcher watcher[`2`] = {};
987	struct intel_engine_cs *engine;
988	struct intel_timeline *tl;
989	enum intel_engine_id id;
990	int err = `0`;
991	int i;
992
993	/*
994	* If we take a reference to the HWSP for reading on the GPU, that
995	* read may be arbitrarily delayed (either by foreign fence or
996	* priority saturation) and a wrap can happen within 30 minutes.
997	* When the GPU read is finally submitted it should be correct,
998	* even across multiple wraps.
999	*/
1000
1001	if (GRAPHICS_VER(gt->i915) < `8`) / CS convenience [SRM/LRM] /
1002	return `0`;
1003
1004	tl = intel_timeline_create(gt);
1005	if (IS_ERR(ptr: tl))
1006	return PTR_ERR(ptr: tl);
1007
1008	if (!tl->has_initial_breadcrumb)
1009	goto out_free;
1010
1011	selftest_tl_pin(tl);
1012
1013	for (i = `0`; i < ARRAY_SIZE(watcher); i++) {
1014	err = setup_watcher(w: &watcher[i], gt, tl);
1015	if (err)
1016	goto out;
1017	}
1018
1019	for_each_engine(engine, gt, id) {
1020	struct intel_context *ce;
1021	unsigned long count = `0`;
1022	IGT_TIMEOUT(end_time);
1023
1024	/ Create a request we can use for remote reading of the HWSP /
1025	err = create_watcher(w: &watcher[`1`], engine, SZ_512K);
1026	if (err)
1027	goto out;
1028
1029	do {
1030	struct i915_sw_fence *submit;
1031	struct i915_request *rq;
1032	u32 hwsp, dummy;
1033
1034	submit = heap_fence_create(GFP_KERNEL);
1035	if (!submit) {
1036	err = -ENOMEM;
1037	goto out;
1038	}
1039
1040	err = create_watcher(w: &watcher[`0`], engine, SZ_4K);
1041	if (err)
1042	goto out;
1043
1044	ce = intel_context_create(engine);
1045	if (IS_ERR(ptr: ce)) {
1046	err = PTR_ERR(ptr: ce);
1047	goto out;
1048	}
1049
1050	ce->timeline = intel_timeline_get(timeline: tl);
1051
1052	/ Ensure timeline is mapped, done during first pin /
1053	err = intel_context_pin(ce);
1054	if (err) {
1055	intel_context_put(ce);
1056	goto out;
1057	}
1058
1059	/*
1060	* Start at a new wrap, and set seqno right before another wrap,
1061	* saving 30 minutes of nops
1062	*/
1063	tl->seqno = -`12u` + `2` * (count & `3`);
1064	__intel_timeline_get_seqno(tl, seqno: &dummy);
1065
1066	rq = i915_request_create(ce);
1067	if (IS_ERR(ptr: rq)) {
1068	err = PTR_ERR(ptr: rq);
1069	intel_context_unpin(ce);
1070	intel_context_put(ce);
1071	goto out;
1072	}
1073
1074	err = i915_sw_fence_await_dma_fence(fence: &rq->submit,
1075	dma: &watcher[`0`].rq->fence, timeout: `0`,
1076	GFP_KERNEL);
1077	if (err < `0`) {
1078	i915_request_add(rq);
1079	intel_context_unpin(ce);
1080	intel_context_put(ce);
1081	goto out;
1082	}
1083
1084	switch_tl_lock(from: rq, to: watcher[`0`].rq);
1085	err = intel_timeline_read_hwsp(from: rq, to: watcher[`0`].rq, hwsp: &hwsp);
1086	if (err == `0`)
1087	err = emit_read_hwsp(rq: watcher[`0`].rq, / before /
1088	seqno: rq->fence.seqno, hwsp,
1089	addr: &watcher[`0`].addr);
1090	switch_tl_lock(from: watcher[`0`].rq, to: rq);
1091	if (err) {
1092	i915_request_add(rq);
1093	intel_context_unpin(ce);
1094	intel_context_put(ce);
1095	goto out;
1096	}
1097
1098	switch_tl_lock(from: rq, to: watcher[`1`].rq);
1099	err = intel_timeline_read_hwsp(from: rq, to: watcher[`1`].rq, hwsp: &hwsp);
1100	if (err == `0`)
1101	err = emit_read_hwsp(rq: watcher[`1`].rq, / after /
1102	seqno: rq->fence.seqno, hwsp,
1103	addr: &watcher[`1`].addr);
1104	switch_tl_lock(from: watcher[`1`].rq, to: rq);
1105	if (err) {
1106	i915_request_add(rq);
1107	intel_context_unpin(ce);
1108	intel_context_put(ce);
1109	goto out;
1110	}
1111
1112	i915_request_get(rq);
1113	i915_request_add(rq);
1114
1115	rq = wrap_timeline(rq);
1116	intel_context_unpin(ce);
1117	intel_context_put(ce);
1118	if (IS_ERR(ptr: rq)) {
1119	err = PTR_ERR(ptr: rq);
1120	goto out;
1121	}
1122
1123	err = i915_sw_fence_await_dma_fence(fence: &watcher[`1`].rq->submit,
1124	dma: &rq->fence, timeout: `0`,
1125	GFP_KERNEL);
1126	if (err < `0`) {
1127	i915_request_put(rq);
1128	goto out;
1129	}
1130
1131	err = check_watcher(w: &watcher[`0`], name: "before", op: cmp_lt);
1132	i915_sw_fence_commit(fence: submit);
1133	heap_fence_put(fence: submit);
1134	if (err) {
1135	i915_request_put(rq);
1136	goto out;
1137	}
1138	count++;
1139
1140	/ Flush the timeline before manually wrapping again /
1141	if (i915_request_wait(rq,
1142	I915_WAIT_INTERRUPTIBLE,
1143	HZ) < `0`) {
1144	err = -ETIME;
1145	i915_request_put(rq);
1146	goto out;
1147	}
1148	retire_requests(tl);
1149	i915_request_put(rq);
1150
1151	/ Single requests are limited to half a ring at most /
1152	if (`8` * watcher[`1`].rq->ring->emit >
1153	`3` * watcher[`1`].rq->ring->size)
1154	break;
1155
1156	} while (!__igt_timeout(timeout: end_time, NULL) &&
1157	count < (PAGE_SIZE / TIMELINE_SEQNO_BYTES - `1`) / `2`);
1158
1159	pr_info("%s: simulated %lu wraps\n", engine->name, count);
1160	err = check_watcher(w: &watcher[`1`], name: "after", op: cmp_gte);
1161	if (err)
1162	goto out;
1163	}
1164
1165	out:
1166	for (i = `0`; i < ARRAY_SIZE(watcher); i++)
1167	cleanup_watcher(w: &watcher[i]);
1168
1169	intel_timeline_unpin(tl);
1170
1171	if (igt_flush_test(i915: gt->i915))
1172	err = -EIO;
1173
1174	out_free:
1175	intel_timeline_put(timeline: tl);
1176	return err;
1177	}
1178
1179	static int live_hwsp_rollover_kernel(void *arg)
1180	{
1181	struct intel_gt *gt = arg;
1182	struct intel_engine_cs *engine;
1183	enum intel_engine_id id;
1184	int err = `0`;
1185
1186	/*
1187	* Run the host for long enough, and even the kernel context will
1188	* see a seqno rollover.
1189	*/
1190
1191	for_each_engine(engine, gt, id) {
1192	struct intel_context *ce = engine->kernel_context;
1193	struct intel_timeline *tl = ce->timeline;
1194	struct i915_request *rq[`3`] = {};
1195	int i;
1196
1197	st_engine_heartbeat_disable(engine);
1198	if (intel_gt_wait_for_idle(gt, HZ / `2`)) {
1199	err = -EIO;
1200	goto out;
1201	}
1202
1203	GEM_BUG_ON(i915_active_fence_isset(&tl->last_request));
1204	tl->seqno = -`2u`;
1205	WRITE_ONCE((u32 )tl->hwsp_seqno, tl->seqno);
1206
1207	for (i = `0`; i < ARRAY_SIZE(rq); i++) {
1208	struct i915_request *this;
1209
1210	this = i915_request_create(ce);
1211	if (IS_ERR(ptr: this)) {
1212	err = PTR_ERR(ptr: this);
1213	goto out;
1214	}
1215
1216	pr_debug("%s: create fence.seqnp:%d\n",
1217	engine->name,
1218	lower_32_bits(this->fence.seqno));
1219
1220	GEM_BUG_ON(rcu_access_pointer(this->timeline) != tl);
1221
1222	rq[i] = i915_request_get(rq: this);
1223	i915_request_add(rq: this);
1224	}
1225
1226	/ We expected a wrap! /
1227	GEM_BUG_ON(rq[`2`]->fence.seqno > rq[`0`]->fence.seqno);
1228
1229	if (i915_request_wait(rq: rq[`2`], flags: `0`, HZ / `5`) < `0`) {
1230	pr_err("Wait for timeline wrap timed out!\n");
1231	err = -EIO;
1232	goto out;
1233	}
1234
1235	for (i = `0`; i < ARRAY_SIZE(rq); i++) {
1236	if (!i915_request_completed(rq: rq[i])) {
1237	pr_err("Pre-wrap request not completed!\n");
1238	err = -EINVAL;
1239	goto out;
1240	}
1241	}
1242
1243	out:
1244	for (i = `0`; i < ARRAY_SIZE(rq); i++)
1245	i915_request_put(rq: rq[i]);
1246	st_engine_heartbeat_enable(engine);
1247	if (err)
1248	break;
1249	}
1250
1251	if (igt_flush_test(i915: gt->i915))
1252	err = -EIO;
1253
1254	return err;
1255	}
1256
1257	static int live_hwsp_rollover_user(void *arg)
1258	{
1259	struct intel_gt *gt = arg;
1260	struct intel_engine_cs *engine;
1261	enum intel_engine_id id;
1262	int err = `0`;
1263
1264	/*
1265	* Simulate a long running user context, and force the seqno wrap
1266	* on the user's timeline.
1267	*/
1268
1269	for_each_engine(engine, gt, id) {
1270	struct i915_request *rq[`3`] = {};
1271	struct intel_timeline *tl;
1272	struct intel_context *ce;
1273	int i;
1274
1275	ce = intel_context_create(engine);
1276	if (IS_ERR(ptr: ce))
1277	return PTR_ERR(ptr: ce);
1278
1279	err = intel_context_alloc_state(ce);
1280	if (err)
1281	goto out;
1282
1283	tl = ce->timeline;
1284	if (!tl->has_initial_breadcrumb)
1285	goto out;
1286
1287	err = intel_context_pin(ce);
1288	if (err)
1289	goto out;
1290
1291	tl->seqno = -`4u`;
1292	WRITE_ONCE((u32 )tl->hwsp_seqno, tl->seqno);
1293
1294	for (i = `0`; i < ARRAY_SIZE(rq); i++) {
1295	struct i915_request *this;
1296
1297	this = intel_context_create_request(ce);
1298	if (IS_ERR(ptr: this)) {
1299	err = PTR_ERR(ptr: this);
1300	goto out_unpin;
1301	}
1302
1303	pr_debug("%s: create fence.seqnp:%d\n",
1304	engine->name,
1305	lower_32_bits(this->fence.seqno));
1306
1307	GEM_BUG_ON(rcu_access_pointer(this->timeline) != tl);
1308
1309	rq[i] = i915_request_get(rq: this);
1310	i915_request_add(rq: this);
1311	}
1312
1313	/ We expected a wrap! /
1314	GEM_BUG_ON(rq[`2`]->fence.seqno > rq[`0`]->fence.seqno);
1315
1316	if (i915_request_wait(rq: rq[`2`], flags: `0`, HZ / `5`) < `0`) {
1317	pr_err("Wait for timeline wrap timed out!\n");
1318	err = -EIO;
1319	goto out_unpin;
1320	}
1321
1322	for (i = `0`; i < ARRAY_SIZE(rq); i++) {
1323	if (!i915_request_completed(rq: rq[i])) {
1324	pr_err("Pre-wrap request not completed!\n");
1325	err = -EINVAL;
1326	goto out_unpin;
1327	}
1328	}
1329	out_unpin:
1330	intel_context_unpin(ce);
1331	out:
1332	for (i = `0`; i < ARRAY_SIZE(rq); i++)
1333	i915_request_put(rq: rq[i]);
1334	intel_context_put(ce);
1335	if (err)
1336	break;
1337	}
1338
1339	if (igt_flush_test(i915: gt->i915))
1340	err = -EIO;
1341
1342	return err;
1343	}
1344
1345	static int live_hwsp_recycle(void *arg)
1346	{
1347	struct intel_gt *gt = arg;
1348	struct intel_engine_cs *engine;
1349	enum intel_engine_id id;
1350	unsigned long count;
1351	int err = `0`;
1352
1353	/*
1354	* Check seqno writes into one timeline at a time. We expect to
1355	* recycle the breadcrumb slot between iterations and neither
1356	* want to confuse ourselves or the GPU.
1357	*/
1358
1359	count = `0`;
1360	for_each_engine(engine, gt, id) {
1361	IGT_TIMEOUT(end_time);
1362
1363	if (!intel_engine_can_store_dword(engine))
1364	continue;
1365
1366	intel_engine_pm_get(engine);
1367
1368	do {
1369	struct intel_timeline *tl;
1370	struct i915_request *rq;
1371
1372	tl = intel_timeline_create(gt);
1373	if (IS_ERR(ptr: tl)) {
1374	err = PTR_ERR(ptr: tl);
1375	break;
1376	}
1377
1378	rq = checked_tl_write(tl, engine, value: count);
1379	if (IS_ERR(ptr: rq)) {
1380	intel_timeline_put(timeline: tl);
1381	err = PTR_ERR(ptr: rq);
1382	break;
1383	}
1384
1385	if (i915_request_wait(rq, flags: `0`, HZ / `5`) < `0`) {
1386	pr_err("Wait for timeline writes timed out!\n");
1387	i915_request_put(rq);
1388	intel_timeline_put(timeline: tl);
1389	err = -EIO;
1390	break;
1391	}
1392
1393	if (READ_ONCE(*tl->hwsp_seqno) != count) {
1394	GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x found 0x%x\n",
1395	count, tl->fence_context,
1396	tl->hwsp_offset, *tl->hwsp_seqno);
1397	GEM_TRACE_DUMP();
1398	err = -EINVAL;
1399	}
1400
1401	i915_request_put(rq);
1402	intel_timeline_put(timeline: tl);
1403	count++;
1404
1405	if (err)
1406	break;
1407	} while (!__igt_timeout(timeout: end_time, NULL));
1408
1409	intel_engine_pm_put(engine);
1410	if (err)
1411	break;
1412	}
1413
1414	return err;
1415	}
1416
1417	int intel_timeline_live_selftests(struct drm_i915_private *i915)
1418	{
1419	static const struct i915_subtest tests[] = {
1420	SUBTEST(live_hwsp_recycle),
1421	SUBTEST(live_hwsp_engine),
1422	SUBTEST(live_hwsp_alternate),
1423	SUBTEST(live_hwsp_wrap),
1424	SUBTEST(live_hwsp_read),
1425	SUBTEST(live_hwsp_rollover_kernel),
1426	SUBTEST(live_hwsp_rollover_user),
1427	};
1428
1429	if (intel_gt_is_wedged(gt: to_gt(i915)))
1430	return `0`;
1431
1432	return intel_gt_live_subtests(tests, to_gt(i915));
1433	}
1434

source code of linux/drivers/gpu/drm/i915/gt/selftest_timeline.c