1 | // SPDX-License-Identifier: MIT |
2 | /* |
3 | * Copyright © 2018 Intel Corporation |
4 | */ |
5 | |
6 | #include <linux/sort.h> |
7 | |
8 | #include "i915_drv.h" |
9 | |
10 | #include "intel_gt_requests.h" |
11 | #include "i915_selftest.h" |
12 | #include "selftest_engine_heartbeat.h" |
13 | |
14 | static void reset_heartbeat(struct intel_engine_cs *engine) |
15 | { |
16 | intel_engine_set_heartbeat(engine, |
17 | delay: engine->defaults.heartbeat_interval_ms); |
18 | } |
19 | |
20 | static int timeline_sync(struct intel_timeline *tl) |
21 | { |
22 | struct dma_fence *fence; |
23 | long timeout; |
24 | |
25 | fence = i915_active_fence_get(active: &tl->last_request); |
26 | if (!fence) |
27 | return 0; |
28 | |
29 | timeout = dma_fence_wait_timeout(fence, intr: true, HZ / 2); |
30 | dma_fence_put(fence); |
31 | if (timeout < 0) |
32 | return timeout; |
33 | |
34 | return 0; |
35 | } |
36 | |
37 | static int engine_sync_barrier(struct intel_engine_cs *engine) |
38 | { |
39 | return timeline_sync(tl: engine->kernel_context->timeline); |
40 | } |
41 | |
42 | struct pulse { |
43 | struct i915_active active; |
44 | struct kref kref; |
45 | }; |
46 | |
47 | static int pulse_active(struct i915_active *active) |
48 | { |
49 | kref_get(kref: &container_of(active, struct pulse, active)->kref); |
50 | return 0; |
51 | } |
52 | |
53 | static void pulse_free(struct kref *kref) |
54 | { |
55 | struct pulse *p = container_of(kref, typeof(*p), kref); |
56 | |
57 | i915_active_fini(ref: &p->active); |
58 | kfree(objp: p); |
59 | } |
60 | |
61 | static void pulse_put(struct pulse *p) |
62 | { |
63 | kref_put(kref: &p->kref, release: pulse_free); |
64 | } |
65 | |
66 | static void pulse_retire(struct i915_active *active) |
67 | { |
68 | pulse_put(container_of(active, struct pulse, active)); |
69 | } |
70 | |
71 | static struct pulse *pulse_create(void) |
72 | { |
73 | struct pulse *p; |
74 | |
75 | p = kmalloc(size: sizeof(*p), GFP_KERNEL); |
76 | if (!p) |
77 | return p; |
78 | |
79 | kref_init(kref: &p->kref); |
80 | i915_active_init(&p->active, pulse_active, pulse_retire, 0); |
81 | |
82 | return p; |
83 | } |
84 | |
85 | static void pulse_unlock_wait(struct pulse *p) |
86 | { |
87 | wait_var_event_timeout(&p->active, i915_active_is_idle(&p->active), HZ); |
88 | } |
89 | |
90 | static int __live_idle_pulse(struct intel_engine_cs *engine, |
91 | int (*fn)(struct intel_engine_cs *cs)) |
92 | { |
93 | struct pulse *p; |
94 | int err; |
95 | |
96 | GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); |
97 | |
98 | p = pulse_create(); |
99 | if (!p) |
100 | return -ENOMEM; |
101 | |
102 | err = i915_active_acquire(ref: &p->active); |
103 | if (err) |
104 | goto out; |
105 | |
106 | err = i915_active_acquire_preallocate_barrier(ref: &p->active, engine); |
107 | if (err) { |
108 | i915_active_release(ref: &p->active); |
109 | goto out; |
110 | } |
111 | |
112 | i915_active_acquire_barrier(ref: &p->active); |
113 | i915_active_release(ref: &p->active); |
114 | |
115 | GEM_BUG_ON(i915_active_is_idle(&p->active)); |
116 | GEM_BUG_ON(llist_empty(&engine->barrier_tasks)); |
117 | |
118 | err = fn(engine); |
119 | if (err) |
120 | goto out; |
121 | |
122 | GEM_BUG_ON(!llist_empty(&engine->barrier_tasks)); |
123 | |
124 | if (engine_sync_barrier(engine)) { |
125 | struct drm_printer m = drm_err_printer(drm: &engine->i915->drm, prefix: "pulse" ); |
126 | |
127 | drm_printf(p: &m, f: "%s: no heartbeat pulse?\n" , engine->name); |
128 | intel_engine_dump(engine, m: &m, header: "%s" , engine->name); |
129 | |
130 | err = -ETIME; |
131 | goto out; |
132 | } |
133 | |
134 | GEM_BUG_ON(READ_ONCE(engine->serial) != engine->wakeref_serial); |
135 | |
136 | pulse_unlock_wait(p); /* synchronize with the retirement callback */ |
137 | |
138 | if (!i915_active_is_idle(ref: &p->active)) { |
139 | struct drm_printer m = drm_err_printer(drm: &engine->i915->drm, prefix: "pulse" ); |
140 | |
141 | drm_printf(p: &m, f: "%s: heartbeat pulse did not flush idle tasks\n" , |
142 | engine->name); |
143 | i915_active_print(ref: &p->active, m: &m); |
144 | |
145 | err = -EINVAL; |
146 | goto out; |
147 | } |
148 | |
149 | out: |
150 | pulse_put(p); |
151 | return err; |
152 | } |
153 | |
154 | static int live_idle_flush(void *arg) |
155 | { |
156 | struct intel_gt *gt = arg; |
157 | struct intel_engine_cs *engine; |
158 | enum intel_engine_id id; |
159 | int err = 0; |
160 | |
161 | /* Check that we can flush the idle barriers */ |
162 | |
163 | for_each_engine(engine, gt, id) { |
164 | st_engine_heartbeat_disable(engine); |
165 | err = __live_idle_pulse(engine, fn: intel_engine_flush_barriers); |
166 | st_engine_heartbeat_enable(engine); |
167 | if (err) |
168 | break; |
169 | } |
170 | |
171 | return err; |
172 | } |
173 | |
174 | static int live_idle_pulse(void *arg) |
175 | { |
176 | struct intel_gt *gt = arg; |
177 | struct intel_engine_cs *engine; |
178 | enum intel_engine_id id; |
179 | int err = 0; |
180 | |
181 | /* Check that heartbeat pulses flush the idle barriers */ |
182 | |
183 | for_each_engine(engine, gt, id) { |
184 | st_engine_heartbeat_disable(engine); |
185 | err = __live_idle_pulse(engine, fn: intel_engine_pulse); |
186 | st_engine_heartbeat_enable(engine); |
187 | if (err && err != -ENODEV) |
188 | break; |
189 | |
190 | err = 0; |
191 | } |
192 | |
193 | return err; |
194 | } |
195 | |
196 | static int cmp_u32(const void *_a, const void *_b) |
197 | { |
198 | const u32 *a = _a, *b = _b; |
199 | |
200 | return *a - *b; |
201 | } |
202 | |
203 | static int __live_heartbeat_fast(struct intel_engine_cs *engine) |
204 | { |
205 | const unsigned int error_threshold = max(20000u, jiffies_to_usecs(6)); |
206 | struct intel_context *ce; |
207 | struct i915_request *rq; |
208 | ktime_t t0, t1; |
209 | u32 times[5]; |
210 | int err; |
211 | int i; |
212 | |
213 | ce = intel_context_create(engine); |
214 | if (IS_ERR(ptr: ce)) |
215 | return PTR_ERR(ptr: ce); |
216 | |
217 | intel_engine_pm_get(engine); |
218 | |
219 | err = intel_engine_set_heartbeat(engine, delay: 1); |
220 | if (err) |
221 | goto err_pm; |
222 | |
223 | for (i = 0; i < ARRAY_SIZE(times); i++) { |
224 | do { |
225 | /* Manufacture a tick */ |
226 | intel_engine_park_heartbeat(engine); |
227 | GEM_BUG_ON(engine->heartbeat.systole); |
228 | engine->serial++; /* pretend we are not idle! */ |
229 | intel_engine_unpark_heartbeat(engine); |
230 | |
231 | flush_delayed_work(dwork: &engine->heartbeat.work); |
232 | if (!delayed_work_pending(&engine->heartbeat.work)) { |
233 | pr_err("%s: heartbeat %d did not start\n" , |
234 | engine->name, i); |
235 | err = -EINVAL; |
236 | goto err_pm; |
237 | } |
238 | |
239 | rcu_read_lock(); |
240 | rq = READ_ONCE(engine->heartbeat.systole); |
241 | if (rq) |
242 | rq = i915_request_get_rcu(rq); |
243 | rcu_read_unlock(); |
244 | } while (!rq); |
245 | |
246 | t0 = ktime_get(); |
247 | while (rq == READ_ONCE(engine->heartbeat.systole)) |
248 | yield(); /* work is on the local cpu! */ |
249 | t1 = ktime_get(); |
250 | |
251 | i915_request_put(rq); |
252 | times[i] = ktime_us_delta(later: t1, earlier: t0); |
253 | } |
254 | |
255 | sort(base: times, ARRAY_SIZE(times), size: sizeof(times[0]), cmp_func: cmp_u32, NULL); |
256 | |
257 | pr_info("%s: Heartbeat delay: %uus [%u, %u]\n" , |
258 | engine->name, |
259 | times[ARRAY_SIZE(times) / 2], |
260 | times[0], |
261 | times[ARRAY_SIZE(times) - 1]); |
262 | |
263 | /* |
264 | * Ideally, the upper bound on min work delay would be something like |
265 | * 2 * 2 (worst), +1 for scheduling, +1 for slack. In practice, we |
266 | * are, even with system_wq_highpri, at the mercy of the CPU scheduler |
267 | * and may be stuck behind some slow work for many millisecond. Such |
268 | * as our very own display workers. |
269 | */ |
270 | if (times[ARRAY_SIZE(times) / 2] > error_threshold) { |
271 | pr_err("%s: Heartbeat delay was %uus, expected less than %dus\n" , |
272 | engine->name, |
273 | times[ARRAY_SIZE(times) / 2], |
274 | error_threshold); |
275 | err = -EINVAL; |
276 | } |
277 | |
278 | reset_heartbeat(engine); |
279 | err_pm: |
280 | intel_engine_pm_put(engine); |
281 | intel_context_put(ce); |
282 | return err; |
283 | } |
284 | |
285 | static int live_heartbeat_fast(void *arg) |
286 | { |
287 | struct intel_gt *gt = arg; |
288 | struct intel_engine_cs *engine; |
289 | enum intel_engine_id id; |
290 | int err = 0; |
291 | |
292 | /* Check that the heartbeat ticks at the desired rate. */ |
293 | if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL) |
294 | return 0; |
295 | |
296 | for_each_engine(engine, gt, id) { |
297 | err = __live_heartbeat_fast(engine); |
298 | if (err) |
299 | break; |
300 | } |
301 | |
302 | return err; |
303 | } |
304 | |
305 | static int __live_heartbeat_off(struct intel_engine_cs *engine) |
306 | { |
307 | int err; |
308 | |
309 | intel_engine_pm_get(engine); |
310 | |
311 | engine->serial++; |
312 | flush_delayed_work(dwork: &engine->heartbeat.work); |
313 | if (!delayed_work_pending(&engine->heartbeat.work)) { |
314 | pr_err("%s: heartbeat not running\n" , |
315 | engine->name); |
316 | err = -EINVAL; |
317 | goto err_pm; |
318 | } |
319 | |
320 | err = intel_engine_set_heartbeat(engine, delay: 0); |
321 | if (err) |
322 | goto err_pm; |
323 | |
324 | engine->serial++; |
325 | flush_delayed_work(dwork: &engine->heartbeat.work); |
326 | if (delayed_work_pending(&engine->heartbeat.work)) { |
327 | pr_err("%s: heartbeat still running\n" , |
328 | engine->name); |
329 | err = -EINVAL; |
330 | goto err_beat; |
331 | } |
332 | |
333 | if (READ_ONCE(engine->heartbeat.systole)) { |
334 | pr_err("%s: heartbeat still allocated\n" , |
335 | engine->name); |
336 | err = -EINVAL; |
337 | goto err_beat; |
338 | } |
339 | |
340 | err_beat: |
341 | reset_heartbeat(engine); |
342 | err_pm: |
343 | intel_engine_pm_put(engine); |
344 | return err; |
345 | } |
346 | |
347 | static int live_heartbeat_off(void *arg) |
348 | { |
349 | struct intel_gt *gt = arg; |
350 | struct intel_engine_cs *engine; |
351 | enum intel_engine_id id; |
352 | int err = 0; |
353 | |
354 | /* Check that we can turn off heartbeat and not interrupt VIP */ |
355 | if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL) |
356 | return 0; |
357 | |
358 | for_each_engine(engine, gt, id) { |
359 | if (!intel_engine_has_preemption(engine)) |
360 | continue; |
361 | |
362 | err = __live_heartbeat_off(engine); |
363 | if (err) |
364 | break; |
365 | } |
366 | |
367 | return err; |
368 | } |
369 | |
370 | int intel_heartbeat_live_selftests(struct drm_i915_private *i915) |
371 | { |
372 | static const struct i915_subtest tests[] = { |
373 | SUBTEST(live_idle_flush), |
374 | SUBTEST(live_idle_pulse), |
375 | SUBTEST(live_heartbeat_fast), |
376 | SUBTEST(live_heartbeat_off), |
377 | }; |
378 | int saved_hangcheck; |
379 | int err; |
380 | |
381 | if (intel_gt_is_wedged(gt: to_gt(i915))) |
382 | return 0; |
383 | |
384 | saved_hangcheck = i915->params.enable_hangcheck; |
385 | i915->params.enable_hangcheck = INT_MAX; |
386 | |
387 | err = intel_gt_live_subtests(tests, to_gt(i915)); |
388 | |
389 | i915->params.enable_hangcheck = saved_hangcheck; |
390 | return err; |
391 | } |
392 | |
393 | void st_engine_heartbeat_disable(struct intel_engine_cs *engine) |
394 | { |
395 | engine->props.heartbeat_interval_ms = 0; |
396 | |
397 | intel_engine_pm_get(engine); |
398 | intel_engine_park_heartbeat(engine); |
399 | } |
400 | |
401 | void st_engine_heartbeat_enable(struct intel_engine_cs *engine) |
402 | { |
403 | intel_engine_pm_put(engine); |
404 | |
405 | engine->props.heartbeat_interval_ms = |
406 | engine->defaults.heartbeat_interval_ms; |
407 | } |
408 | |
409 | void st_engine_heartbeat_disable_no_pm(struct intel_engine_cs *engine) |
410 | { |
411 | engine->props.heartbeat_interval_ms = 0; |
412 | |
413 | /* |
414 | * Park the heartbeat but without holding the PM lock as that |
415 | * makes the engines appear not-idle. Note that if/when unpark |
416 | * is called due to the PM lock being acquired later the |
417 | * heartbeat still won't be enabled because of the above = 0. |
418 | */ |
419 | if (intel_engine_pm_get_if_awake(engine)) { |
420 | intel_engine_park_heartbeat(engine); |
421 | intel_engine_pm_put(engine); |
422 | } |
423 | } |
424 | |
425 | void st_engine_heartbeat_enable_no_pm(struct intel_engine_cs *engine) |
426 | { |
427 | engine->props.heartbeat_interval_ms = |
428 | engine->defaults.heartbeat_interval_ms; |
429 | } |
430 | |