1// SPDX-License-Identifier: MIT
2/*
3 * Copyright © 2018 Intel Corporation
4 */
5
6#include <linux/sort.h>
7
8#include "i915_drv.h"
9
10#include "intel_gt_requests.h"
11#include "i915_selftest.h"
12#include "selftest_engine_heartbeat.h"
13
14static void reset_heartbeat(struct intel_engine_cs *engine)
15{
16 intel_engine_set_heartbeat(engine,
17 delay: engine->defaults.heartbeat_interval_ms);
18}
19
20static int timeline_sync(struct intel_timeline *tl)
21{
22 struct dma_fence *fence;
23 long timeout;
24
25 fence = i915_active_fence_get(active: &tl->last_request);
26 if (!fence)
27 return 0;
28
29 timeout = dma_fence_wait_timeout(fence, intr: true, HZ / 2);
30 dma_fence_put(fence);
31 if (timeout < 0)
32 return timeout;
33
34 return 0;
35}
36
37static int engine_sync_barrier(struct intel_engine_cs *engine)
38{
39 return timeline_sync(tl: engine->kernel_context->timeline);
40}
41
42struct pulse {
43 struct i915_active active;
44 struct kref kref;
45};
46
47static int pulse_active(struct i915_active *active)
48{
49 kref_get(kref: &container_of(active, struct pulse, active)->kref);
50 return 0;
51}
52
53static void pulse_free(struct kref *kref)
54{
55 struct pulse *p = container_of(kref, typeof(*p), kref);
56
57 i915_active_fini(ref: &p->active);
58 kfree(objp: p);
59}
60
61static void pulse_put(struct pulse *p)
62{
63 kref_put(kref: &p->kref, release: pulse_free);
64}
65
66static void pulse_retire(struct i915_active *active)
67{
68 pulse_put(container_of(active, struct pulse, active));
69}
70
71static struct pulse *pulse_create(void)
72{
73 struct pulse *p;
74
75 p = kmalloc(size: sizeof(*p), GFP_KERNEL);
76 if (!p)
77 return p;
78
79 kref_init(kref: &p->kref);
80 i915_active_init(&p->active, pulse_active, pulse_retire, 0);
81
82 return p;
83}
84
85static void pulse_unlock_wait(struct pulse *p)
86{
87 wait_var_event_timeout(&p->active, i915_active_is_idle(&p->active), HZ);
88}
89
90static int __live_idle_pulse(struct intel_engine_cs *engine,
91 int (*fn)(struct intel_engine_cs *cs))
92{
93 struct pulse *p;
94 int err;
95
96 GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
97
98 p = pulse_create();
99 if (!p)
100 return -ENOMEM;
101
102 err = i915_active_acquire(ref: &p->active);
103 if (err)
104 goto out;
105
106 err = i915_active_acquire_preallocate_barrier(ref: &p->active, engine);
107 if (err) {
108 i915_active_release(ref: &p->active);
109 goto out;
110 }
111
112 i915_active_acquire_barrier(ref: &p->active);
113 i915_active_release(ref: &p->active);
114
115 GEM_BUG_ON(i915_active_is_idle(&p->active));
116 GEM_BUG_ON(llist_empty(&engine->barrier_tasks));
117
118 err = fn(engine);
119 if (err)
120 goto out;
121
122 GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
123
124 if (engine_sync_barrier(engine)) {
125 struct drm_printer m = drm_err_printer(drm: &engine->i915->drm, prefix: "pulse");
126
127 drm_printf(p: &m, f: "%s: no heartbeat pulse?\n", engine->name);
128 intel_engine_dump(engine, m: &m, header: "%s", engine->name);
129
130 err = -ETIME;
131 goto out;
132 }
133
134 GEM_BUG_ON(READ_ONCE(engine->serial) != engine->wakeref_serial);
135
136 pulse_unlock_wait(p); /* synchronize with the retirement callback */
137
138 if (!i915_active_is_idle(ref: &p->active)) {
139 struct drm_printer m = drm_err_printer(drm: &engine->i915->drm, prefix: "pulse");
140
141 drm_printf(p: &m, f: "%s: heartbeat pulse did not flush idle tasks\n",
142 engine->name);
143 i915_active_print(ref: &p->active, m: &m);
144
145 err = -EINVAL;
146 goto out;
147 }
148
149out:
150 pulse_put(p);
151 return err;
152}
153
154static int live_idle_flush(void *arg)
155{
156 struct intel_gt *gt = arg;
157 struct intel_engine_cs *engine;
158 enum intel_engine_id id;
159 int err = 0;
160
161 /* Check that we can flush the idle barriers */
162
163 for_each_engine(engine, gt, id) {
164 st_engine_heartbeat_disable(engine);
165 err = __live_idle_pulse(engine, fn: intel_engine_flush_barriers);
166 st_engine_heartbeat_enable(engine);
167 if (err)
168 break;
169 }
170
171 return err;
172}
173
174static int live_idle_pulse(void *arg)
175{
176 struct intel_gt *gt = arg;
177 struct intel_engine_cs *engine;
178 enum intel_engine_id id;
179 int err = 0;
180
181 /* Check that heartbeat pulses flush the idle barriers */
182
183 for_each_engine(engine, gt, id) {
184 st_engine_heartbeat_disable(engine);
185 err = __live_idle_pulse(engine, fn: intel_engine_pulse);
186 st_engine_heartbeat_enable(engine);
187 if (err && err != -ENODEV)
188 break;
189
190 err = 0;
191 }
192
193 return err;
194}
195
196static int cmp_u32(const void *_a, const void *_b)
197{
198 const u32 *a = _a, *b = _b;
199
200 return *a - *b;
201}
202
203static int __live_heartbeat_fast(struct intel_engine_cs *engine)
204{
205 const unsigned int error_threshold = max(20000u, jiffies_to_usecs(6));
206 struct intel_context *ce;
207 struct i915_request *rq;
208 ktime_t t0, t1;
209 u32 times[5];
210 int err;
211 int i;
212
213 ce = intel_context_create(engine);
214 if (IS_ERR(ptr: ce))
215 return PTR_ERR(ptr: ce);
216
217 intel_engine_pm_get(engine);
218
219 err = intel_engine_set_heartbeat(engine, delay: 1);
220 if (err)
221 goto err_pm;
222
223 for (i = 0; i < ARRAY_SIZE(times); i++) {
224 do {
225 /* Manufacture a tick */
226 intel_engine_park_heartbeat(engine);
227 GEM_BUG_ON(engine->heartbeat.systole);
228 engine->serial++; /* pretend we are not idle! */
229 intel_engine_unpark_heartbeat(engine);
230
231 flush_delayed_work(dwork: &engine->heartbeat.work);
232 if (!delayed_work_pending(&engine->heartbeat.work)) {
233 pr_err("%s: heartbeat %d did not start\n",
234 engine->name, i);
235 err = -EINVAL;
236 goto err_pm;
237 }
238
239 rcu_read_lock();
240 rq = READ_ONCE(engine->heartbeat.systole);
241 if (rq)
242 rq = i915_request_get_rcu(rq);
243 rcu_read_unlock();
244 } while (!rq);
245
246 t0 = ktime_get();
247 while (rq == READ_ONCE(engine->heartbeat.systole))
248 yield(); /* work is on the local cpu! */
249 t1 = ktime_get();
250
251 i915_request_put(rq);
252 times[i] = ktime_us_delta(later: t1, earlier: t0);
253 }
254
255 sort(base: times, ARRAY_SIZE(times), size: sizeof(times[0]), cmp_func: cmp_u32, NULL);
256
257 pr_info("%s: Heartbeat delay: %uus [%u, %u]\n",
258 engine->name,
259 times[ARRAY_SIZE(times) / 2],
260 times[0],
261 times[ARRAY_SIZE(times) - 1]);
262
263 /*
264 * Ideally, the upper bound on min work delay would be something like
265 * 2 * 2 (worst), +1 for scheduling, +1 for slack. In practice, we
266 * are, even with system_wq_highpri, at the mercy of the CPU scheduler
267 * and may be stuck behind some slow work for many millisecond. Such
268 * as our very own display workers.
269 */
270 if (times[ARRAY_SIZE(times) / 2] > error_threshold) {
271 pr_err("%s: Heartbeat delay was %uus, expected less than %dus\n",
272 engine->name,
273 times[ARRAY_SIZE(times) / 2],
274 error_threshold);
275 err = -EINVAL;
276 }
277
278 reset_heartbeat(engine);
279err_pm:
280 intel_engine_pm_put(engine);
281 intel_context_put(ce);
282 return err;
283}
284
285static int live_heartbeat_fast(void *arg)
286{
287 struct intel_gt *gt = arg;
288 struct intel_engine_cs *engine;
289 enum intel_engine_id id;
290 int err = 0;
291
292 /* Check that the heartbeat ticks at the desired rate. */
293 if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL)
294 return 0;
295
296 for_each_engine(engine, gt, id) {
297 err = __live_heartbeat_fast(engine);
298 if (err)
299 break;
300 }
301
302 return err;
303}
304
305static int __live_heartbeat_off(struct intel_engine_cs *engine)
306{
307 int err;
308
309 intel_engine_pm_get(engine);
310
311 engine->serial++;
312 flush_delayed_work(dwork: &engine->heartbeat.work);
313 if (!delayed_work_pending(&engine->heartbeat.work)) {
314 pr_err("%s: heartbeat not running\n",
315 engine->name);
316 err = -EINVAL;
317 goto err_pm;
318 }
319
320 err = intel_engine_set_heartbeat(engine, delay: 0);
321 if (err)
322 goto err_pm;
323
324 engine->serial++;
325 flush_delayed_work(dwork: &engine->heartbeat.work);
326 if (delayed_work_pending(&engine->heartbeat.work)) {
327 pr_err("%s: heartbeat still running\n",
328 engine->name);
329 err = -EINVAL;
330 goto err_beat;
331 }
332
333 if (READ_ONCE(engine->heartbeat.systole)) {
334 pr_err("%s: heartbeat still allocated\n",
335 engine->name);
336 err = -EINVAL;
337 goto err_beat;
338 }
339
340err_beat:
341 reset_heartbeat(engine);
342err_pm:
343 intel_engine_pm_put(engine);
344 return err;
345}
346
347static int live_heartbeat_off(void *arg)
348{
349 struct intel_gt *gt = arg;
350 struct intel_engine_cs *engine;
351 enum intel_engine_id id;
352 int err = 0;
353
354 /* Check that we can turn off heartbeat and not interrupt VIP */
355 if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL)
356 return 0;
357
358 for_each_engine(engine, gt, id) {
359 if (!intel_engine_has_preemption(engine))
360 continue;
361
362 err = __live_heartbeat_off(engine);
363 if (err)
364 break;
365 }
366
367 return err;
368}
369
370int intel_heartbeat_live_selftests(struct drm_i915_private *i915)
371{
372 static const struct i915_subtest tests[] = {
373 SUBTEST(live_idle_flush),
374 SUBTEST(live_idle_pulse),
375 SUBTEST(live_heartbeat_fast),
376 SUBTEST(live_heartbeat_off),
377 };
378 int saved_hangcheck;
379 int err;
380
381 if (intel_gt_is_wedged(gt: to_gt(i915)))
382 return 0;
383
384 saved_hangcheck = i915->params.enable_hangcheck;
385 i915->params.enable_hangcheck = INT_MAX;
386
387 err = intel_gt_live_subtests(tests, to_gt(i915));
388
389 i915->params.enable_hangcheck = saved_hangcheck;
390 return err;
391}
392
393void st_engine_heartbeat_disable(struct intel_engine_cs *engine)
394{
395 engine->props.heartbeat_interval_ms = 0;
396
397 intel_engine_pm_get(engine);
398 intel_engine_park_heartbeat(engine);
399}
400
401void st_engine_heartbeat_enable(struct intel_engine_cs *engine)
402{
403 intel_engine_pm_put(engine);
404
405 engine->props.heartbeat_interval_ms =
406 engine->defaults.heartbeat_interval_ms;
407}
408
409void st_engine_heartbeat_disable_no_pm(struct intel_engine_cs *engine)
410{
411 engine->props.heartbeat_interval_ms = 0;
412
413 /*
414 * Park the heartbeat but without holding the PM lock as that
415 * makes the engines appear not-idle. Note that if/when unpark
416 * is called due to the PM lock being acquired later the
417 * heartbeat still won't be enabled because of the above = 0.
418 */
419 if (intel_engine_pm_get_if_awake(engine)) {
420 intel_engine_park_heartbeat(engine);
421 intel_engine_pm_put(engine);
422 }
423}
424
425void st_engine_heartbeat_enable_no_pm(struct intel_engine_cs *engine)
426{
427 engine->props.heartbeat_interval_ms =
428 engine->defaults.heartbeat_interval_ms;
429}
430

source code of linux/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c