selftest_engine_pm.c source code [linux/drivers/gpu/drm/i915/gt/selftest_engine_pm.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/*
3	* Copyright © 2018 Intel Corporation
4	*/
5
6	#include <linux/sort.h>
7
8	#include "gt/intel_gt_print.h"
9	#include "i915_selftest.h"
10	#include "intel_engine_regs.h"
11	#include "intel_gpu_commands.h"
12	#include "intel_gt_clock_utils.h"
13	#include "selftest_engine.h"
14	#include "selftest_engine_heartbeat.h"
15	#include "selftests/igt_atomic.h"
16	#include "selftests/igt_flush_test.h"
17	#include "selftests/igt_spinner.h"
18
19	#define COUNT 5
20
21	static int cmp_u64(const void A, const* void *B)
22	{
23	const u64 a = A, b = B;
24
25	return a - b;
26	}
27
28	static u64 trifilter(u64 *a)
29	{
30	sort(base: a, COUNT, size: sizeof(*a), cmp_func: cmp_u64, NULL);
31	return (a[`1`] + `2` * a[`2`] + a[`3`]) >> `2`;
32	}
33
34	static u32 emit_wait(u32 cs, u32 offset, int op, u32 value)
35	{
36	*cs++ = MI_SEMAPHORE_WAIT \|
37	MI_SEMAPHORE_GLOBAL_GTT \|
38	MI_SEMAPHORE_POLL \|
39	op;
40	*cs++ = value;
41	*cs++ = offset;
42	*cs++ = `0`;
43
44	return cs;
45	}
46
47	static u32 emit_store(u32 cs, u32 offset, u32 value)
48	{
49	*cs++ = MI_STORE_DWORD_IMM_GEN4 \| MI_USE_GGTT;
50	*cs++ = offset;
51	*cs++ = `0`;
52	*cs++ = value;
53
54	return cs;
55	}
56
57	static u32 emit_srm(u32 cs, i915_reg_t reg, u32 offset)
58	{
59	*cs++ = MI_STORE_REGISTER_MEM_GEN8 \| MI_USE_GGTT;
60	*cs++ = i915_mmio_reg_offset(reg);
61	*cs++ = offset;
62	*cs++ = `0`;
63
64	return cs;
65	}
66
67	static void write_semaphore(u32 *x, u32 value)
68	{
69	WRITE_ONCE(*x, value);
70	wmb();
71	}
72
73	static int __measure_timestamps(struct intel_context *ce,
74	u64 dt, u64 d_ring, u64 *d_ctx)
75	{
76	struct intel_engine_cs *engine = ce->engine;
77	u32 *sema = memset32(s: engine->status_page.addr + `1000`, v: `0`, n: `5`);
78	u32 offset = i915_ggtt_offset(vma: engine->status_page.vma);
79	struct i915_request *rq;
80	u32 *cs;
81
82	rq = intel_context_create_request(ce);
83	if (IS_ERR(ptr: rq))
84	return PTR_ERR(ptr: rq);
85
86	cs = intel_ring_begin(rq, num_dwords: `28`);
87	if (IS_ERR(ptr: cs)) {
88	i915_request_add(rq);
89	return PTR_ERR(ptr: cs);
90	}
91
92	/ Signal & wait for start /
93	cs = emit_store(cs, offset: offset + `4008`, value: `1`);
94	cs = emit_wait(cs, offset: offset + `4008`, MI_SEMAPHORE_SAD_NEQ_SDD, value: `1`);
95
96	cs = emit_srm(cs, RING_TIMESTAMP(engine->mmio_base), offset: offset + `4000`);
97	cs = emit_srm(cs, RING_CTX_TIMESTAMP(engine->mmio_base), offset: offset + `4004`);
98
99	/ Busy wait /
100	cs = emit_wait(cs, offset: offset + `4008`, MI_SEMAPHORE_SAD_EQ_SDD, value: `1`);
101
102	cs = emit_srm(cs, RING_TIMESTAMP(engine->mmio_base), offset: offset + `4016`);
103	cs = emit_srm(cs, RING_CTX_TIMESTAMP(engine->mmio_base), offset: offset + `4012`);
104
105	intel_ring_advance(rq, cs);
106	i915_request_get(rq);
107	i915_request_add(rq);
108	intel_engine_flush_submission(engine);
109
110	/ Wait for the request to start executing, that then waits for us /
111	while (READ_ONCE(sema[`2`]) == `0`)
112	cpu_relax();
113
114	/ Run the request for a 100us, sampling timestamps before/after /
115	local_irq_disable();
116	write_semaphore(x: &sema[`2`], value: `0`);
117	while (READ_ONCE(sema[`1`]) == `0`) / wait for the gpu to catch up /
118	cpu_relax();
119	*dt = local_clock();
120	udelay(`100`);
121	dt = local_clock() - dt;
122	write_semaphore(x: &sema[`2`], value: `1`);
123	local_irq_enable();
124
125	if (i915_request_wait(rq, flags: `0`, HZ / `2`) < `0`) {
126	i915_request_put(rq);
127	return -ETIME;
128	}
129	i915_request_put(rq);
130
131	pr_debug("%s CTX_TIMESTAMP: [%x, %x], RING_TIMESTAMP: [%x, %x]\n",
132	engine->name, sema[`1`], sema[`3`], sema[`0`], sema[`4`]);
133
134	*d_ctx = sema[`3`] - sema[`1`];
135	*d_ring = sema[`4`] - sema[`0`];
136	return `0`;
137	}
138
139	static int __live_engine_timestamps(struct intel_engine_cs *engine)
140	{
141	u64 s_ring[COUNT], s_ctx[COUNT], st[COUNT], d_ring, d_ctx, dt;
142	struct intel_context *ce;
143	int i, err = `0`;
144
145	ce = intel_context_create(engine);
146	if (IS_ERR(ptr: ce))
147	return PTR_ERR(ptr: ce);
148
149	for (i = `0`; i < COUNT; i++) {
150	err = __measure_timestamps(ce, dt: &st[i], d_ring: &s_ring[i], d_ctx: &s_ctx[i]);
151	if (err)
152	break;
153	}
154	intel_context_put(ce);
155	if (err)
156	return err;
157
158	dt = trifilter(a: st);
159	d_ring = trifilter(a: s_ring);
160	d_ctx = trifilter(a: s_ctx);
161
162	pr_info("%s elapsed:%lldns, CTX_TIMESTAMP:%lldns, RING_TIMESTAMP:%lldns\n",
163	engine->name, dt,
164	intel_gt_clock_interval_to_ns(engine->gt, d_ctx),
165	intel_gt_clock_interval_to_ns(engine->gt, d_ring));
166
167	d_ring = intel_gt_clock_interval_to_ns(gt: engine->gt, count: d_ring);
168	if (`3` * dt > `4` * d_ring \|\| `4` * dt < `3` * d_ring) {
169	pr_err("%s Mismatch between ring timestamp and walltime!\n",
170	engine->name);
171	return -EINVAL;
172	}
173
174	d_ring = trifilter(a: s_ring);
175	d_ctx = trifilter(a: s_ctx);
176
177	d_ctx *= engine->gt->clock_frequency;
178	if (GRAPHICS_VER(engine->i915) == `11`)
179	d_ring = `12500000`; /* Fixed 80ns for GEN11 ctx timestamp? /
180	else
181	d_ring *= engine->gt->clock_frequency;
182
183	if (`3` * d_ctx > `4` * d_ring \|\| `4` * d_ctx < `3` * d_ring) {
184	pr_err("%s Mismatch between ring and context timestamps!\n",
185	engine->name);
186	return -EINVAL;
187	}
188
189	return `0`;
190	}
191
192	static int live_engine_timestamps(void *arg)
193	{
194	struct intel_gt *gt = arg;
195	struct intel_engine_cs *engine;
196	enum intel_engine_id id;
197
198	/*
199	* Check that CS_TIMESTAMP / CTX_TIMESTAMP are in sync, i.e. share
200	* the same CS clock.
201	*/
202
203	if (GRAPHICS_VER(gt->i915) < `8`)
204	return `0`;
205
206	for_each_engine(engine, gt, id) {
207	int err;
208
209	st_engine_heartbeat_disable(engine);
210	err = __live_engine_timestamps(engine);
211	st_engine_heartbeat_enable(engine);
212	if (err)
213	return err;
214	}
215
216	return `0`;
217	}
218
219	static int __spin_until_busier(struct intel_engine_cs *engine, ktime_t busyness)
220	{
221	ktime_t start, unused, dt;
222
223	if (!intel_engine_uses_guc(engine))
224	return `0`;
225
226	/*
227	* In GuC mode of submission, the busyness stats may get updated after
228	* the batch starts running. Poll for a change in busyness and timeout
229	* after 500 us.
230	*/
231	start = ktime_get();
232	while (intel_engine_get_busy_time(engine, now: &unused) == busyness) {
233	dt = ktime_get() - start;
234	if (dt > `10000000`) {
235	pr_err("active wait timed out %lld\n", dt);
236	ENGINE_TRACE(engine, "active wait time out %lld\n", dt);
237	return -ETIME;
238	}
239	}
240
241	return `0`;
242	}
243
244	static int live_engine_busy_stats(void *arg)
245	{
246	struct intel_gt *gt = arg;
247	struct intel_engine_cs *engine;
248	enum intel_engine_id id;
249	struct igt_spinner spin;
250	int err = `0`;
251
252	/*
253	* Check that if an engine supports busy-stats, they tell the truth.
254	*/
255
256	if (igt_spinner_init(spin: &spin, gt))
257	return -ENOMEM;
258
259	GEM_BUG_ON(intel_gt_pm_is_awake(gt));
260	for_each_engine(engine, gt, id) {
261	struct i915_request *rq;
262	ktime_t busyness, dummy;
263	ktime_t de, dt;
264	ktime_t t[`2`];
265
266	if (!intel_engine_supports_stats(engine))
267	continue;
268
269	if (!intel_engine_can_store_dword(engine))
270	continue;
271
272	if (intel_gt_pm_wait_for_idle(gt)) {
273	err = -EBUSY;
274	break;
275	}
276
277	st_engine_heartbeat_disable(engine);
278
279	ENGINE_TRACE(engine, "measuring idle time\n");
280	preempt_disable();
281	de = intel_engine_get_busy_time(engine, now: &t[`0`]);
282	udelay(`100`);
283	de = ktime_sub(intel_engine_get_busy_time(engine, &t[`1`]), de);
284	preempt_enable();
285	dt = ktime_sub(t[`1`], t[`0`]);
286	if (de < `0` \|\| de > `10`) {
287	pr_err("%s: reported %lldns [%d%%] busyness while sleeping [for %lldns]\n",
288	engine->name,
289	de, (int)div64_u64(`100` * de, dt), dt);
290	GEM_TRACE_DUMP();
291	err = -EINVAL;
292	goto end;
293	}
294
295	/ 100% busy /
296	rq = igt_spinner_create_request(spin: &spin,
297	ce: engine->kernel_context,
298	MI_NOOP);
299	if (IS_ERR(ptr: rq)) {
300	err = PTR_ERR(ptr: rq);
301	goto end;
302	}
303	i915_request_add(rq);
304
305	busyness = intel_engine_get_busy_time(engine, now: &dummy);
306	if (!igt_wait_for_spinner(spin: &spin, rq)) {
307	intel_gt_set_wedged(gt: engine->gt);
308	err = -ETIME;
309	goto end;
310	}
311
312	err = __spin_until_busier(engine, busyness);
313	if (err) {
314	GEM_TRACE_DUMP();
315	goto end;
316	}
317
318	ENGINE_TRACE(engine, "measuring busy time\n");
319	preempt_disable();
320	de = intel_engine_get_busy_time(engine, now: &t[`0`]);
321	mdelay(`100`);
322	de = ktime_sub(intel_engine_get_busy_time(engine, &t[`1`]), de);
323	preempt_enable();
324	dt = ktime_sub(t[`1`], t[`0`]);
325	if (`100` * de < `95` * dt \|\| `95` * de > `100` * dt) {
326	pr_err("%s: reported %lldns [%d%%] busyness while spinning [for %lldns]\n",
327	engine->name,
328	de, (int)div64_u64(`100` * de, dt), dt);
329	GEM_TRACE_DUMP();
330	err = -EINVAL;
331	goto end;
332	}
333
334	end:
335	st_engine_heartbeat_enable(engine);
336	igt_spinner_end(spin: &spin);
337	if (igt_flush_test(i915: gt->i915))
338	err = -EIO;
339	if (err)
340	break;
341	}
342
343	igt_spinner_fini(spin: &spin);
344	if (igt_flush_test(i915: gt->i915))
345	err = -EIO;
346	return err;
347	}
348
349	static int live_engine_pm(void *arg)
350	{
351	struct intel_gt *gt = arg;
352	struct intel_engine_cs *engine;
353	enum intel_engine_id id;
354
355	/*
356	* Check we can call intel_engine_pm_put from any context. No
357	* failures are reported directly, but if we mess up lockdep should
358	* tell us.
359	*/
360	if (intel_gt_pm_wait_for_idle(gt)) {
361	pr_err("Unable to flush GT pm before test\n");
362	return -EBUSY;
363	}
364
365	GEM_BUG_ON(intel_gt_pm_is_awake(gt));
366	for_each_engine(engine, gt, id) {
367	const typeof(igt_atomic_phases) p;
368
369	for (p = igt_atomic_phases; p->name; p++) {
370	/*
371	* Acquisition is always synchronous, except if we
372	* know that the engine is already awake, in which
373	* case we should use intel_engine_pm_get_if_awake()
374	* to atomically grab the wakeref.
375	*
376	* In practice,
377	* intel_engine_pm_get();
378	* intel_engine_pm_put();
379	* occurs in one thread, while simultaneously
380	* intel_engine_pm_get_if_awake();
381	* intel_engine_pm_put();
382	* occurs from atomic context in another.
383	*/
384	GEM_BUG_ON(intel_engine_pm_is_awake(engine));
385	intel_engine_pm_get(engine);
386
387	p->critical_section_begin();
388	if (!intel_engine_pm_get_if_awake(engine))
389	pr_err("intel_engine_pm_get_if_awake(%s) failed under %s\n",
390	engine->name, p->name);
391	else
392	intel_engine_pm_put_async(engine);
393	intel_engine_pm_put_async(engine);
394	p->critical_section_end();
395
396	intel_engine_pm_flush(engine);
397
398	if (intel_engine_pm_is_awake(engine)) {
399	pr_err("%s is still awake after flushing pm\n",
400	engine->name);
401	return -EINVAL;
402	}
403
404	/ gt wakeref is async (deferred to workqueue) /
405	if (intel_gt_pm_wait_for_idle(gt)) {
406	gt_err(gt, "GT failed to idle\n");
407	return -EINVAL;
408	}
409	}
410	}
411
412	return `0`;
413	}
414
415	int live_engine_pm_selftests(struct intel_gt *gt)
416	{
417	static const struct i915_subtest tests[] = {
418	SUBTEST(live_engine_timestamps),
419	SUBTEST(live_engine_busy_stats),
420	SUBTEST(live_engine_pm),
421	};
422
423	return intel_gt_live_subtests(tests, gt);
424	}
425

source code of linux/drivers/gpu/drm/i915/gt/selftest_engine_pm.c