1 | // SPDX-License-Identifier: MIT |
2 | /* |
3 | * Copyright © 2017-2018 Intel Corporation |
4 | */ |
5 | |
6 | #include <linux/prime_numbers.h> |
7 | #include <linux/string_helpers.h> |
8 | |
9 | #include "intel_context.h" |
10 | #include "intel_engine_heartbeat.h" |
11 | #include "intel_engine_pm.h" |
12 | #include "intel_engine_regs.h" |
13 | #include "intel_gpu_commands.h" |
14 | #include "intel_gt.h" |
15 | #include "intel_gt_requests.h" |
16 | #include "intel_ring.h" |
17 | #include "selftest_engine_heartbeat.h" |
18 | |
19 | #include "../selftests/i915_random.h" |
20 | #include "../i915_selftest.h" |
21 | |
22 | #include "selftests/igt_flush_test.h" |
23 | #include "selftests/lib_sw_fence.h" |
24 | #include "selftests/mock_gem_device.h" |
25 | #include "selftests/mock_timeline.h" |
26 | |
27 | static struct page *hwsp_page(struct intel_timeline *tl) |
28 | { |
29 | struct drm_i915_gem_object *obj = tl->hwsp_ggtt->obj; |
30 | |
31 | GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); |
32 | return sg_page(sg: obj->mm.pages->sgl); |
33 | } |
34 | |
35 | static unsigned long hwsp_cacheline(struct intel_timeline *tl) |
36 | { |
37 | unsigned long address = (unsigned long)page_address(hwsp_page(tl)); |
38 | |
39 | return (address + offset_in_page(tl->hwsp_offset)) / TIMELINE_SEQNO_BYTES; |
40 | } |
41 | |
42 | static int selftest_tl_pin(struct intel_timeline *tl) |
43 | { |
44 | struct i915_gem_ww_ctx ww; |
45 | int err; |
46 | |
47 | i915_gem_ww_ctx_init(ctx: &ww, intr: false); |
48 | retry: |
49 | err = i915_gem_object_lock(obj: tl->hwsp_ggtt->obj, ww: &ww); |
50 | if (!err) |
51 | err = intel_timeline_pin(tl, ww: &ww); |
52 | |
53 | if (err == -EDEADLK) { |
54 | err = i915_gem_ww_ctx_backoff(ctx: &ww); |
55 | if (!err) |
56 | goto retry; |
57 | } |
58 | i915_gem_ww_ctx_fini(ctx: &ww); |
59 | return err; |
60 | } |
61 | |
62 | /* Only half of seqno's are usable, see __intel_timeline_get_seqno() */ |
63 | #define CACHELINES_PER_PAGE (PAGE_SIZE / TIMELINE_SEQNO_BYTES / 2) |
64 | |
65 | struct mock_hwsp_freelist { |
66 | struct intel_gt *gt; |
67 | struct radix_tree_root cachelines; |
68 | struct intel_timeline **history; |
69 | unsigned long count, max; |
70 | struct rnd_state prng; |
71 | }; |
72 | |
73 | enum { |
74 | SHUFFLE = BIT(0), |
75 | }; |
76 | |
77 | static void __mock_hwsp_record(struct mock_hwsp_freelist *state, |
78 | unsigned int idx, |
79 | struct intel_timeline *tl) |
80 | { |
81 | tl = xchg(&state->history[idx], tl); |
82 | if (tl) { |
83 | radix_tree_delete(&state->cachelines, hwsp_cacheline(tl)); |
84 | intel_timeline_unpin(tl); |
85 | intel_timeline_put(timeline: tl); |
86 | } |
87 | } |
88 | |
89 | static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state, |
90 | unsigned int count, |
91 | unsigned int flags) |
92 | { |
93 | struct intel_timeline *tl; |
94 | unsigned int idx; |
95 | |
96 | while (count--) { |
97 | unsigned long cacheline; |
98 | int err; |
99 | |
100 | tl = intel_timeline_create(gt: state->gt); |
101 | if (IS_ERR(ptr: tl)) |
102 | return PTR_ERR(ptr: tl); |
103 | |
104 | err = selftest_tl_pin(tl); |
105 | if (err) { |
106 | intel_timeline_put(timeline: tl); |
107 | return err; |
108 | } |
109 | |
110 | cacheline = hwsp_cacheline(tl); |
111 | err = radix_tree_insert(&state->cachelines, index: cacheline, tl); |
112 | if (err) { |
113 | if (err == -EEXIST) { |
114 | pr_err("HWSP cacheline %lu already used; duplicate allocation!\n" , |
115 | cacheline); |
116 | } |
117 | intel_timeline_unpin(tl); |
118 | intel_timeline_put(timeline: tl); |
119 | return err; |
120 | } |
121 | |
122 | idx = state->count++ % state->max; |
123 | __mock_hwsp_record(state, idx, tl); |
124 | } |
125 | |
126 | if (flags & SHUFFLE) |
127 | i915_prandom_shuffle(arr: state->history, |
128 | elsz: sizeof(*state->history), |
129 | min(state->count, state->max), |
130 | state: &state->prng); |
131 | |
132 | count = i915_prandom_u32_max_state(min(state->count, state->max), |
133 | state: &state->prng); |
134 | while (count--) { |
135 | idx = --state->count % state->max; |
136 | __mock_hwsp_record(state, idx, NULL); |
137 | } |
138 | |
139 | return 0; |
140 | } |
141 | |
142 | static int mock_hwsp_freelist(void *arg) |
143 | { |
144 | struct mock_hwsp_freelist state; |
145 | struct drm_i915_private *i915; |
146 | const struct { |
147 | const char *name; |
148 | unsigned int flags; |
149 | } phases[] = { |
150 | { "linear" , 0 }, |
151 | { "shuffled" , SHUFFLE }, |
152 | { }, |
153 | }, *p; |
154 | unsigned int na; |
155 | int err = 0; |
156 | |
157 | i915 = mock_gem_device(); |
158 | if (!i915) |
159 | return -ENOMEM; |
160 | |
161 | INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL); |
162 | state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed); |
163 | |
164 | state.gt = to_gt(i915); |
165 | |
166 | /* |
167 | * Create a bunch of timelines and check that their HWSP do not overlap. |
168 | * Free some, and try again. |
169 | */ |
170 | |
171 | state.max = PAGE_SIZE / sizeof(*state.history); |
172 | state.count = 0; |
173 | state.history = kcalloc(n: state.max, size: sizeof(*state.history), GFP_KERNEL); |
174 | if (!state.history) { |
175 | err = -ENOMEM; |
176 | goto err_put; |
177 | } |
178 | |
179 | for (p = phases; p->name; p++) { |
180 | pr_debug("%s(%s)\n" , __func__, p->name); |
181 | for_each_prime_number_from(na, 1, 2 * CACHELINES_PER_PAGE) { |
182 | err = __mock_hwsp_timeline(state: &state, count: na, flags: p->flags); |
183 | if (err) |
184 | goto out; |
185 | } |
186 | } |
187 | |
188 | out: |
189 | for (na = 0; na < state.max; na++) |
190 | __mock_hwsp_record(state: &state, idx: na, NULL); |
191 | kfree(objp: state.history); |
192 | err_put: |
193 | mock_destroy_device(i915); |
194 | return err; |
195 | } |
196 | |
197 | struct __igt_sync { |
198 | const char *name; |
199 | u32 seqno; |
200 | bool expected; |
201 | bool set; |
202 | }; |
203 | |
204 | static int __igt_sync(struct intel_timeline *tl, |
205 | u64 ctx, |
206 | const struct __igt_sync *p, |
207 | const char *name) |
208 | { |
209 | int ret; |
210 | |
211 | if (__intel_timeline_sync_is_later(tl, context: ctx, seqno: p->seqno) != p->expected) { |
212 | pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n" , |
213 | name, p->name, ctx, p->seqno, str_yes_no(p->expected)); |
214 | return -EINVAL; |
215 | } |
216 | |
217 | if (p->set) { |
218 | ret = __intel_timeline_sync_set(tl, context: ctx, seqno: p->seqno); |
219 | if (ret) |
220 | return ret; |
221 | } |
222 | |
223 | return 0; |
224 | } |
225 | |
226 | static int igt_sync(void *arg) |
227 | { |
228 | const struct __igt_sync pass[] = { |
229 | { "unset" , 0, false, false }, |
230 | { "new" , 0, false, true }, |
231 | { "0a" , 0, true, true }, |
232 | { "1a" , 1, false, true }, |
233 | { "1b" , 1, true, true }, |
234 | { "0b" , 0, true, false }, |
235 | { "2a" , 2, false, true }, |
236 | { "4" , 4, false, true }, |
237 | { "INT_MAX" , INT_MAX, false, true }, |
238 | { "INT_MAX-1" , INT_MAX-1, true, false }, |
239 | { "INT_MAX+1" , (u32)INT_MAX+1, false, true }, |
240 | { "INT_MAX" , INT_MAX, true, false }, |
241 | { "UINT_MAX" , UINT_MAX, false, true }, |
242 | { "wrap" , 0, false, true }, |
243 | { "unwrap" , UINT_MAX, true, false }, |
244 | {}, |
245 | }, *p; |
246 | struct intel_timeline tl; |
247 | int order, offset; |
248 | int ret = -ENODEV; |
249 | |
250 | mock_timeline_init(timeline: &tl, context: 0); |
251 | for (p = pass; p->name; p++) { |
252 | for (order = 1; order < 64; order++) { |
253 | for (offset = -1; offset <= (order > 1); offset++) { |
254 | u64 ctx = BIT_ULL(order) + offset; |
255 | |
256 | ret = __igt_sync(tl: &tl, ctx, p, name: "1" ); |
257 | if (ret) |
258 | goto out; |
259 | } |
260 | } |
261 | } |
262 | mock_timeline_fini(timeline: &tl); |
263 | |
264 | mock_timeline_init(timeline: &tl, context: 0); |
265 | for (order = 1; order < 64; order++) { |
266 | for (offset = -1; offset <= (order > 1); offset++) { |
267 | u64 ctx = BIT_ULL(order) + offset; |
268 | |
269 | for (p = pass; p->name; p++) { |
270 | ret = __igt_sync(tl: &tl, ctx, p, name: "2" ); |
271 | if (ret) |
272 | goto out; |
273 | } |
274 | } |
275 | } |
276 | |
277 | out: |
278 | mock_timeline_fini(timeline: &tl); |
279 | return ret; |
280 | } |
281 | |
282 | static unsigned int random_engine(struct rnd_state *rnd) |
283 | { |
284 | return i915_prandom_u32_max_state(ep_ro: I915_NUM_ENGINES, state: rnd); |
285 | } |
286 | |
287 | static int bench_sync(void *arg) |
288 | { |
289 | struct rnd_state prng; |
290 | struct intel_timeline tl; |
291 | unsigned long end_time, count; |
292 | u64 prng32_1M; |
293 | ktime_t kt; |
294 | int order, last_order; |
295 | |
296 | mock_timeline_init(timeline: &tl, context: 0); |
297 | |
298 | /* Lookups from cache are very fast and so the random number generation |
299 | * and the loop itself becomes a significant factor in the per-iteration |
300 | * timings. We try to compensate the results by measuring the overhead |
301 | * of the prng and subtract it from the reported results. |
302 | */ |
303 | prandom_seed_state(state: &prng, seed: i915_selftest.random_seed); |
304 | count = 0; |
305 | kt = ktime_get(); |
306 | end_time = jiffies + HZ/10; |
307 | do { |
308 | u32 x; |
309 | |
310 | /* Make sure the compiler doesn't optimise away the prng call */ |
311 | WRITE_ONCE(x, prandom_u32_state(&prng)); |
312 | |
313 | count++; |
314 | } while (!time_after(jiffies, end_time)); |
315 | kt = ktime_sub(ktime_get(), kt); |
316 | pr_debug("%s: %lu random evaluations, %lluns/prng\n" , |
317 | __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); |
318 | prng32_1M = div64_ul(ktime_to_ns(kt) << 20, count); |
319 | |
320 | /* Benchmark (only) setting random context ids */ |
321 | prandom_seed_state(state: &prng, seed: i915_selftest.random_seed); |
322 | count = 0; |
323 | kt = ktime_get(); |
324 | end_time = jiffies + HZ/10; |
325 | do { |
326 | u64 id = i915_prandom_u64_state(rnd: &prng); |
327 | |
328 | __intel_timeline_sync_set(tl: &tl, context: id, seqno: 0); |
329 | count++; |
330 | } while (!time_after(jiffies, end_time)); |
331 | kt = ktime_sub(ktime_get(), kt); |
332 | kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20); |
333 | pr_info("%s: %lu random insertions, %lluns/insert\n" , |
334 | __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); |
335 | |
336 | /* Benchmark looking up the exact same context ids as we just set */ |
337 | prandom_seed_state(state: &prng, seed: i915_selftest.random_seed); |
338 | end_time = count; |
339 | kt = ktime_get(); |
340 | while (end_time--) { |
341 | u64 id = i915_prandom_u64_state(rnd: &prng); |
342 | |
343 | if (!__intel_timeline_sync_is_later(tl: &tl, context: id, seqno: 0)) { |
344 | mock_timeline_fini(timeline: &tl); |
345 | pr_err("Lookup of %llu failed\n" , id); |
346 | return -EINVAL; |
347 | } |
348 | } |
349 | kt = ktime_sub(ktime_get(), kt); |
350 | kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20); |
351 | pr_info("%s: %lu random lookups, %lluns/lookup\n" , |
352 | __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); |
353 | |
354 | mock_timeline_fini(timeline: &tl); |
355 | cond_resched(); |
356 | |
357 | mock_timeline_init(timeline: &tl, context: 0); |
358 | |
359 | /* Benchmark setting the first N (in order) contexts */ |
360 | count = 0; |
361 | kt = ktime_get(); |
362 | end_time = jiffies + HZ/10; |
363 | do { |
364 | __intel_timeline_sync_set(tl: &tl, context: count++, seqno: 0); |
365 | } while (!time_after(jiffies, end_time)); |
366 | kt = ktime_sub(ktime_get(), kt); |
367 | pr_info("%s: %lu in-order insertions, %lluns/insert\n" , |
368 | __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); |
369 | |
370 | /* Benchmark looking up the exact same context ids as we just set */ |
371 | end_time = count; |
372 | kt = ktime_get(); |
373 | while (end_time--) { |
374 | if (!__intel_timeline_sync_is_later(tl: &tl, context: end_time, seqno: 0)) { |
375 | pr_err("Lookup of %lu failed\n" , end_time); |
376 | mock_timeline_fini(timeline: &tl); |
377 | return -EINVAL; |
378 | } |
379 | } |
380 | kt = ktime_sub(ktime_get(), kt); |
381 | pr_info("%s: %lu in-order lookups, %lluns/lookup\n" , |
382 | __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); |
383 | |
384 | mock_timeline_fini(timeline: &tl); |
385 | cond_resched(); |
386 | |
387 | mock_timeline_init(timeline: &tl, context: 0); |
388 | |
389 | /* Benchmark searching for a random context id and maybe changing it */ |
390 | prandom_seed_state(state: &prng, seed: i915_selftest.random_seed); |
391 | count = 0; |
392 | kt = ktime_get(); |
393 | end_time = jiffies + HZ/10; |
394 | do { |
395 | u32 id = random_engine(rnd: &prng); |
396 | u32 seqno = prandom_u32_state(state: &prng); |
397 | |
398 | if (!__intel_timeline_sync_is_later(tl: &tl, context: id, seqno)) |
399 | __intel_timeline_sync_set(tl: &tl, context: id, seqno); |
400 | |
401 | count++; |
402 | } while (!time_after(jiffies, end_time)); |
403 | kt = ktime_sub(ktime_get(), kt); |
404 | kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20); |
405 | pr_info("%s: %lu repeated insert/lookups, %lluns/op\n" , |
406 | __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); |
407 | mock_timeline_fini(timeline: &tl); |
408 | cond_resched(); |
409 | |
410 | /* Benchmark searching for a known context id and changing the seqno */ |
411 | for (last_order = 1, order = 1; order < 32; |
412 | ({ int tmp = last_order; last_order = order; order += tmp; })) { |
413 | unsigned int mask = BIT(order) - 1; |
414 | |
415 | mock_timeline_init(timeline: &tl, context: 0); |
416 | |
417 | count = 0; |
418 | kt = ktime_get(); |
419 | end_time = jiffies + HZ/10; |
420 | do { |
421 | /* Without assuming too many details of the underlying |
422 | * implementation, try to identify its phase-changes |
423 | * (if any)! |
424 | */ |
425 | u64 id = (u64)(count & mask) << order; |
426 | |
427 | __intel_timeline_sync_is_later(tl: &tl, context: id, seqno: 0); |
428 | __intel_timeline_sync_set(tl: &tl, context: id, seqno: 0); |
429 | |
430 | count++; |
431 | } while (!time_after(jiffies, end_time)); |
432 | kt = ktime_sub(ktime_get(), kt); |
433 | pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n" , |
434 | __func__, count, order, |
435 | (long long)div64_ul(ktime_to_ns(kt), count)); |
436 | mock_timeline_fini(timeline: &tl); |
437 | cond_resched(); |
438 | } |
439 | |
440 | return 0; |
441 | } |
442 | |
443 | int intel_timeline_mock_selftests(void) |
444 | { |
445 | static const struct i915_subtest tests[] = { |
446 | SUBTEST(mock_hwsp_freelist), |
447 | SUBTEST(igt_sync), |
448 | SUBTEST(bench_sync), |
449 | }; |
450 | |
451 | return i915_subtests(tests, NULL); |
452 | } |
453 | |
454 | static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value) |
455 | { |
456 | u32 *cs; |
457 | |
458 | cs = intel_ring_begin(rq, num_dwords: 4); |
459 | if (IS_ERR(ptr: cs)) |
460 | return PTR_ERR(ptr: cs); |
461 | |
462 | if (GRAPHICS_VER(rq->i915) >= 8) { |
463 | *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; |
464 | *cs++ = addr; |
465 | *cs++ = 0; |
466 | *cs++ = value; |
467 | } else if (GRAPHICS_VER(rq->i915) >= 4) { |
468 | *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; |
469 | *cs++ = 0; |
470 | *cs++ = addr; |
471 | *cs++ = value; |
472 | } else { |
473 | *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; |
474 | *cs++ = addr; |
475 | *cs++ = value; |
476 | *cs++ = MI_NOOP; |
477 | } |
478 | |
479 | intel_ring_advance(rq, cs); |
480 | |
481 | return 0; |
482 | } |
483 | |
484 | static struct i915_request * |
485 | checked_tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value) |
486 | { |
487 | struct i915_request *rq; |
488 | int err; |
489 | |
490 | err = selftest_tl_pin(tl); |
491 | if (err) { |
492 | rq = ERR_PTR(error: err); |
493 | goto out; |
494 | } |
495 | |
496 | if (READ_ONCE(*tl->hwsp_seqno) != tl->seqno) { |
497 | pr_err("Timeline created with incorrect breadcrumb, found %x, expected %x\n" , |
498 | *tl->hwsp_seqno, tl->seqno); |
499 | intel_timeline_unpin(tl); |
500 | return ERR_PTR(error: -EINVAL); |
501 | } |
502 | |
503 | rq = intel_engine_create_kernel_request(engine); |
504 | if (IS_ERR(ptr: rq)) |
505 | goto out_unpin; |
506 | |
507 | i915_request_get(rq); |
508 | |
509 | err = emit_ggtt_store_dw(rq, addr: tl->hwsp_offset, value); |
510 | i915_request_add(rq); |
511 | if (err) { |
512 | i915_request_put(rq); |
513 | rq = ERR_PTR(error: err); |
514 | } |
515 | |
516 | out_unpin: |
517 | intel_timeline_unpin(tl); |
518 | out: |
519 | if (IS_ERR(ptr: rq)) |
520 | pr_err("Failed to write to timeline!\n" ); |
521 | return rq; |
522 | } |
523 | |
524 | static int live_hwsp_engine(void *arg) |
525 | { |
526 | #define NUM_TIMELINES 4096 |
527 | struct intel_gt *gt = arg; |
528 | struct intel_timeline **timelines; |
529 | struct intel_engine_cs *engine; |
530 | enum intel_engine_id id; |
531 | unsigned long count, n; |
532 | int err = 0; |
533 | |
534 | /* |
535 | * Create a bunch of timelines and check we can write |
536 | * independently to each of their breadcrumb slots. |
537 | */ |
538 | |
539 | timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES, |
540 | size: sizeof(*timelines), |
541 | GFP_KERNEL); |
542 | if (!timelines) |
543 | return -ENOMEM; |
544 | |
545 | count = 0; |
546 | for_each_engine(engine, gt, id) { |
547 | if (!intel_engine_can_store_dword(engine)) |
548 | continue; |
549 | |
550 | intel_engine_pm_get(engine); |
551 | |
552 | for (n = 0; n < NUM_TIMELINES; n++) { |
553 | struct intel_timeline *tl; |
554 | struct i915_request *rq; |
555 | |
556 | tl = intel_timeline_create(gt); |
557 | if (IS_ERR(ptr: tl)) { |
558 | err = PTR_ERR(ptr: tl); |
559 | break; |
560 | } |
561 | |
562 | rq = checked_tl_write(tl, engine, value: count); |
563 | if (IS_ERR(ptr: rq)) { |
564 | intel_timeline_put(timeline: tl); |
565 | err = PTR_ERR(ptr: rq); |
566 | break; |
567 | } |
568 | |
569 | timelines[count++] = tl; |
570 | i915_request_put(rq); |
571 | } |
572 | |
573 | intel_engine_pm_put(engine); |
574 | if (err) |
575 | break; |
576 | } |
577 | |
578 | if (igt_flush_test(i915: gt->i915)) |
579 | err = -EIO; |
580 | |
581 | for (n = 0; n < count; n++) { |
582 | struct intel_timeline *tl = timelines[n]; |
583 | |
584 | if (!err && READ_ONCE(*tl->hwsp_seqno) != n) { |
585 | GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x, found 0x%x\n" , |
586 | n, tl->fence_context, tl->hwsp_offset, *tl->hwsp_seqno); |
587 | GEM_TRACE_DUMP(); |
588 | err = -EINVAL; |
589 | } |
590 | intel_timeline_put(timeline: tl); |
591 | } |
592 | |
593 | kvfree(addr: timelines); |
594 | return err; |
595 | #undef NUM_TIMELINES |
596 | } |
597 | |
598 | static int live_hwsp_alternate(void *arg) |
599 | { |
600 | #define NUM_TIMELINES 4096 |
601 | struct intel_gt *gt = arg; |
602 | struct intel_timeline **timelines; |
603 | struct intel_engine_cs *engine; |
604 | enum intel_engine_id id; |
605 | unsigned long count, n; |
606 | int err = 0; |
607 | |
608 | /* |
609 | * Create a bunch of timelines and check we can write |
610 | * independently to each of their breadcrumb slots with adjacent |
611 | * engines. |
612 | */ |
613 | |
614 | timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES, |
615 | size: sizeof(*timelines), |
616 | GFP_KERNEL); |
617 | if (!timelines) |
618 | return -ENOMEM; |
619 | |
620 | count = 0; |
621 | for (n = 0; n < NUM_TIMELINES; n++) { |
622 | for_each_engine(engine, gt, id) { |
623 | struct intel_timeline *tl; |
624 | struct i915_request *rq; |
625 | |
626 | if (!intel_engine_can_store_dword(engine)) |
627 | continue; |
628 | |
629 | tl = intel_timeline_create(gt); |
630 | if (IS_ERR(ptr: tl)) { |
631 | err = PTR_ERR(ptr: tl); |
632 | goto out; |
633 | } |
634 | |
635 | intel_engine_pm_get(engine); |
636 | rq = checked_tl_write(tl, engine, value: count); |
637 | intel_engine_pm_put(engine); |
638 | if (IS_ERR(ptr: rq)) { |
639 | intel_timeline_put(timeline: tl); |
640 | err = PTR_ERR(ptr: rq); |
641 | goto out; |
642 | } |
643 | |
644 | timelines[count++] = tl; |
645 | i915_request_put(rq); |
646 | } |
647 | } |
648 | |
649 | out: |
650 | if (igt_flush_test(i915: gt->i915)) |
651 | err = -EIO; |
652 | |
653 | for (n = 0; n < count; n++) { |
654 | struct intel_timeline *tl = timelines[n]; |
655 | |
656 | if (!err && READ_ONCE(*tl->hwsp_seqno) != n) { |
657 | GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x, found 0x%x\n" , |
658 | n, tl->fence_context, tl->hwsp_offset, *tl->hwsp_seqno); |
659 | GEM_TRACE_DUMP(); |
660 | err = -EINVAL; |
661 | } |
662 | intel_timeline_put(timeline: tl); |
663 | } |
664 | |
665 | kvfree(addr: timelines); |
666 | return err; |
667 | #undef NUM_TIMELINES |
668 | } |
669 | |
670 | static int live_hwsp_wrap(void *arg) |
671 | { |
672 | struct intel_gt *gt = arg; |
673 | struct intel_engine_cs *engine; |
674 | struct intel_timeline *tl; |
675 | enum intel_engine_id id; |
676 | int err = 0; |
677 | |
678 | /* |
679 | * Across a seqno wrap, we need to keep the old cacheline alive for |
680 | * foreign GPU references. |
681 | */ |
682 | |
683 | tl = intel_timeline_create(gt); |
684 | if (IS_ERR(ptr: tl)) |
685 | return PTR_ERR(ptr: tl); |
686 | |
687 | if (!tl->has_initial_breadcrumb) |
688 | goto out_free; |
689 | |
690 | err = selftest_tl_pin(tl); |
691 | if (err) |
692 | goto out_free; |
693 | |
694 | for_each_engine(engine, gt, id) { |
695 | const u32 *hwsp_seqno[2]; |
696 | struct i915_request *rq; |
697 | u32 seqno[2]; |
698 | |
699 | if (!intel_engine_can_store_dword(engine)) |
700 | continue; |
701 | |
702 | rq = intel_engine_create_kernel_request(engine); |
703 | if (IS_ERR(ptr: rq)) { |
704 | err = PTR_ERR(ptr: rq); |
705 | goto out; |
706 | } |
707 | |
708 | tl->seqno = -4u; |
709 | |
710 | mutex_lock_nested(lock: &tl->mutex, SINGLE_DEPTH_NESTING); |
711 | err = intel_timeline_get_seqno(tl, rq, seqno: &seqno[0]); |
712 | mutex_unlock(lock: &tl->mutex); |
713 | if (err) { |
714 | i915_request_add(rq); |
715 | goto out; |
716 | } |
717 | pr_debug("seqno[0]:%08x, hwsp_offset:%08x\n" , |
718 | seqno[0], tl->hwsp_offset); |
719 | |
720 | err = emit_ggtt_store_dw(rq, addr: tl->hwsp_offset, value: seqno[0]); |
721 | if (err) { |
722 | i915_request_add(rq); |
723 | goto out; |
724 | } |
725 | hwsp_seqno[0] = tl->hwsp_seqno; |
726 | |
727 | mutex_lock_nested(lock: &tl->mutex, SINGLE_DEPTH_NESTING); |
728 | err = intel_timeline_get_seqno(tl, rq, seqno: &seqno[1]); |
729 | mutex_unlock(lock: &tl->mutex); |
730 | if (err) { |
731 | i915_request_add(rq); |
732 | goto out; |
733 | } |
734 | pr_debug("seqno[1]:%08x, hwsp_offset:%08x\n" , |
735 | seqno[1], tl->hwsp_offset); |
736 | |
737 | err = emit_ggtt_store_dw(rq, addr: tl->hwsp_offset, value: seqno[1]); |
738 | if (err) { |
739 | i915_request_add(rq); |
740 | goto out; |
741 | } |
742 | hwsp_seqno[1] = tl->hwsp_seqno; |
743 | |
744 | /* With wrap should come a new hwsp */ |
745 | GEM_BUG_ON(seqno[1] >= seqno[0]); |
746 | GEM_BUG_ON(hwsp_seqno[0] == hwsp_seqno[1]); |
747 | |
748 | i915_request_add(rq); |
749 | |
750 | if (i915_request_wait(rq, flags: 0, HZ / 5) < 0) { |
751 | pr_err("Wait for timeline writes timed out!\n" ); |
752 | err = -EIO; |
753 | goto out; |
754 | } |
755 | |
756 | if (READ_ONCE(*hwsp_seqno[0]) != seqno[0] || |
757 | READ_ONCE(*hwsp_seqno[1]) != seqno[1]) { |
758 | pr_err("Bad timeline values: found (%x, %x), expected (%x, %x)\n" , |
759 | *hwsp_seqno[0], *hwsp_seqno[1], |
760 | seqno[0], seqno[1]); |
761 | err = -EINVAL; |
762 | goto out; |
763 | } |
764 | |
765 | intel_gt_retire_requests(gt); /* recycle HWSP */ |
766 | } |
767 | |
768 | out: |
769 | if (igt_flush_test(i915: gt->i915)) |
770 | err = -EIO; |
771 | |
772 | intel_timeline_unpin(tl); |
773 | out_free: |
774 | intel_timeline_put(timeline: tl); |
775 | return err; |
776 | } |
777 | |
778 | static int emit_read_hwsp(struct i915_request *rq, |
779 | u32 seqno, u32 hwsp, |
780 | u32 *addr) |
781 | { |
782 | const u32 gpr = i915_mmio_reg_offset(GEN8_RING_CS_GPR(rq->engine->mmio_base, 0)); |
783 | u32 *cs; |
784 | |
785 | cs = intel_ring_begin(rq, num_dwords: 12); |
786 | if (IS_ERR(ptr: cs)) |
787 | return PTR_ERR(ptr: cs); |
788 | |
789 | *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; |
790 | *cs++ = *addr; |
791 | *cs++ = 0; |
792 | *cs++ = seqno; |
793 | *addr += 4; |
794 | |
795 | *cs++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_USE_GGTT; |
796 | *cs++ = gpr; |
797 | *cs++ = hwsp; |
798 | *cs++ = 0; |
799 | |
800 | *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; |
801 | *cs++ = gpr; |
802 | *cs++ = *addr; |
803 | *cs++ = 0; |
804 | *addr += 4; |
805 | |
806 | intel_ring_advance(rq, cs); |
807 | |
808 | return 0; |
809 | } |
810 | |
811 | struct hwsp_watcher { |
812 | struct i915_vma *vma; |
813 | struct i915_request *rq; |
814 | u32 addr; |
815 | u32 *map; |
816 | }; |
817 | |
818 | static bool cmp_lt(u32 a, u32 b) |
819 | { |
820 | return a < b; |
821 | } |
822 | |
823 | static bool cmp_gte(u32 a, u32 b) |
824 | { |
825 | return a >= b; |
826 | } |
827 | |
828 | static int setup_watcher(struct hwsp_watcher *w, struct intel_gt *gt, |
829 | struct intel_timeline *tl) |
830 | { |
831 | struct drm_i915_gem_object *obj; |
832 | struct i915_vma *vma; |
833 | |
834 | obj = i915_gem_object_create_internal(i915: gt->i915, SZ_2M); |
835 | if (IS_ERR(ptr: obj)) |
836 | return PTR_ERR(ptr: obj); |
837 | |
838 | /* keep the same cache settings as timeline */ |
839 | i915_gem_object_set_pat_index(obj, pat_index: tl->hwsp_ggtt->obj->pat_index); |
840 | w->map = i915_gem_object_pin_map_unlocked(obj, |
841 | page_unmask_bits(tl->hwsp_ggtt->obj->mm.mapping)); |
842 | if (IS_ERR(ptr: w->map)) { |
843 | i915_gem_object_put(obj); |
844 | return PTR_ERR(ptr: w->map); |
845 | } |
846 | |
847 | vma = i915_gem_object_ggtt_pin(obj, NULL, size: 0, alignment: 0, flags: 0); |
848 | if (IS_ERR(ptr: vma)) { |
849 | i915_gem_object_put(obj); |
850 | return PTR_ERR(ptr: vma); |
851 | } |
852 | |
853 | w->vma = vma; |
854 | w->addr = i915_ggtt_offset(vma); |
855 | return 0; |
856 | } |
857 | |
858 | static void switch_tl_lock(struct i915_request *from, struct i915_request *to) |
859 | { |
860 | /* some light mutex juggling required; think co-routines */ |
861 | |
862 | if (from) { |
863 | lockdep_unpin_lock(&from->context->timeline->mutex, from->cookie); |
864 | mutex_unlock(lock: &from->context->timeline->mutex); |
865 | } |
866 | |
867 | if (to) { |
868 | mutex_lock(&to->context->timeline->mutex); |
869 | to->cookie = lockdep_pin_lock(&to->context->timeline->mutex); |
870 | } |
871 | } |
872 | |
873 | static int create_watcher(struct hwsp_watcher *w, |
874 | struct intel_engine_cs *engine, |
875 | int ringsz) |
876 | { |
877 | struct intel_context *ce; |
878 | |
879 | ce = intel_context_create(engine); |
880 | if (IS_ERR(ptr: ce)) |
881 | return PTR_ERR(ptr: ce); |
882 | |
883 | ce->ring_size = ringsz; |
884 | w->rq = intel_context_create_request(ce); |
885 | intel_context_put(ce); |
886 | if (IS_ERR(ptr: w->rq)) |
887 | return PTR_ERR(ptr: w->rq); |
888 | |
889 | w->addr = i915_ggtt_offset(vma: w->vma); |
890 | |
891 | switch_tl_lock(from: w->rq, NULL); |
892 | |
893 | return 0; |
894 | } |
895 | |
896 | static int check_watcher(struct hwsp_watcher *w, const char *name, |
897 | bool (*op)(u32 hwsp, u32 seqno)) |
898 | { |
899 | struct i915_request *rq = fetch_and_zero(&w->rq); |
900 | u32 offset, end; |
901 | int err; |
902 | |
903 | GEM_BUG_ON(w->addr - i915_ggtt_offset(w->vma) > w->vma->size); |
904 | |
905 | i915_request_get(rq); |
906 | switch_tl_lock(NULL, to: rq); |
907 | i915_request_add(rq); |
908 | |
909 | if (i915_request_wait(rq, flags: 0, HZ) < 0) { |
910 | err = -ETIME; |
911 | goto out; |
912 | } |
913 | |
914 | err = 0; |
915 | offset = 0; |
916 | end = (w->addr - i915_ggtt_offset(vma: w->vma)) / sizeof(*w->map); |
917 | while (offset < end) { |
918 | if (!op(w->map[offset + 1], w->map[offset])) { |
919 | pr_err("Watcher '%s' found HWSP value %x for seqno %x\n" , |
920 | name, w->map[offset + 1], w->map[offset]); |
921 | err = -EINVAL; |
922 | } |
923 | |
924 | offset += 2; |
925 | } |
926 | |
927 | out: |
928 | i915_request_put(rq); |
929 | return err; |
930 | } |
931 | |
932 | static void cleanup_watcher(struct hwsp_watcher *w) |
933 | { |
934 | if (w->rq) { |
935 | switch_tl_lock(NULL, to: w->rq); |
936 | |
937 | i915_request_add(rq: w->rq); |
938 | } |
939 | |
940 | i915_vma_unpin_and_release(p_vma: &w->vma, I915_VMA_RELEASE_MAP); |
941 | } |
942 | |
943 | static bool retire_requests(struct intel_timeline *tl) |
944 | { |
945 | struct i915_request *rq, *rn; |
946 | |
947 | mutex_lock(&tl->mutex); |
948 | list_for_each_entry_safe(rq, rn, &tl->requests, link) |
949 | if (!i915_request_retire(rq)) |
950 | break; |
951 | mutex_unlock(lock: &tl->mutex); |
952 | |
953 | return !i915_active_fence_isset(active: &tl->last_request); |
954 | } |
955 | |
956 | static struct i915_request *wrap_timeline(struct i915_request *rq) |
957 | { |
958 | struct intel_context *ce = rq->context; |
959 | struct intel_timeline *tl = ce->timeline; |
960 | u32 seqno = rq->fence.seqno; |
961 | |
962 | while (tl->seqno >= seqno) { /* Cause a wrap */ |
963 | i915_request_put(rq); |
964 | rq = intel_context_create_request(ce); |
965 | if (IS_ERR(ptr: rq)) |
966 | return rq; |
967 | |
968 | i915_request_get(rq); |
969 | i915_request_add(rq); |
970 | } |
971 | |
972 | i915_request_put(rq); |
973 | rq = i915_request_create(ce); |
974 | if (IS_ERR(ptr: rq)) |
975 | return rq; |
976 | |
977 | i915_request_get(rq); |
978 | i915_request_add(rq); |
979 | |
980 | return rq; |
981 | } |
982 | |
983 | static int live_hwsp_read(void *arg) |
984 | { |
985 | struct intel_gt *gt = arg; |
986 | struct hwsp_watcher watcher[2] = {}; |
987 | struct intel_engine_cs *engine; |
988 | struct intel_timeline *tl; |
989 | enum intel_engine_id id; |
990 | int err = 0; |
991 | int i; |
992 | |
993 | /* |
994 | * If we take a reference to the HWSP for reading on the GPU, that |
995 | * read may be arbitrarily delayed (either by foreign fence or |
996 | * priority saturation) and a wrap can happen within 30 minutes. |
997 | * When the GPU read is finally submitted it should be correct, |
998 | * even across multiple wraps. |
999 | */ |
1000 | |
1001 | if (GRAPHICS_VER(gt->i915) < 8) /* CS convenience [SRM/LRM] */ |
1002 | return 0; |
1003 | |
1004 | tl = intel_timeline_create(gt); |
1005 | if (IS_ERR(ptr: tl)) |
1006 | return PTR_ERR(ptr: tl); |
1007 | |
1008 | if (!tl->has_initial_breadcrumb) |
1009 | goto out_free; |
1010 | |
1011 | selftest_tl_pin(tl); |
1012 | |
1013 | for (i = 0; i < ARRAY_SIZE(watcher); i++) { |
1014 | err = setup_watcher(w: &watcher[i], gt, tl); |
1015 | if (err) |
1016 | goto out; |
1017 | } |
1018 | |
1019 | for_each_engine(engine, gt, id) { |
1020 | struct intel_context *ce; |
1021 | unsigned long count = 0; |
1022 | IGT_TIMEOUT(end_time); |
1023 | |
1024 | /* Create a request we can use for remote reading of the HWSP */ |
1025 | err = create_watcher(w: &watcher[1], engine, SZ_512K); |
1026 | if (err) |
1027 | goto out; |
1028 | |
1029 | do { |
1030 | struct i915_sw_fence *submit; |
1031 | struct i915_request *rq; |
1032 | u32 hwsp, dummy; |
1033 | |
1034 | submit = heap_fence_create(GFP_KERNEL); |
1035 | if (!submit) { |
1036 | err = -ENOMEM; |
1037 | goto out; |
1038 | } |
1039 | |
1040 | err = create_watcher(w: &watcher[0], engine, SZ_4K); |
1041 | if (err) |
1042 | goto out; |
1043 | |
1044 | ce = intel_context_create(engine); |
1045 | if (IS_ERR(ptr: ce)) { |
1046 | err = PTR_ERR(ptr: ce); |
1047 | goto out; |
1048 | } |
1049 | |
1050 | ce->timeline = intel_timeline_get(timeline: tl); |
1051 | |
1052 | /* Ensure timeline is mapped, done during first pin */ |
1053 | err = intel_context_pin(ce); |
1054 | if (err) { |
1055 | intel_context_put(ce); |
1056 | goto out; |
1057 | } |
1058 | |
1059 | /* |
1060 | * Start at a new wrap, and set seqno right before another wrap, |
1061 | * saving 30 minutes of nops |
1062 | */ |
1063 | tl->seqno = -12u + 2 * (count & 3); |
1064 | __intel_timeline_get_seqno(tl, seqno: &dummy); |
1065 | |
1066 | rq = i915_request_create(ce); |
1067 | if (IS_ERR(ptr: rq)) { |
1068 | err = PTR_ERR(ptr: rq); |
1069 | intel_context_unpin(ce); |
1070 | intel_context_put(ce); |
1071 | goto out; |
1072 | } |
1073 | |
1074 | err = i915_sw_fence_await_dma_fence(fence: &rq->submit, |
1075 | dma: &watcher[0].rq->fence, timeout: 0, |
1076 | GFP_KERNEL); |
1077 | if (err < 0) { |
1078 | i915_request_add(rq); |
1079 | intel_context_unpin(ce); |
1080 | intel_context_put(ce); |
1081 | goto out; |
1082 | } |
1083 | |
1084 | switch_tl_lock(from: rq, to: watcher[0].rq); |
1085 | err = intel_timeline_read_hwsp(from: rq, to: watcher[0].rq, hwsp: &hwsp); |
1086 | if (err == 0) |
1087 | err = emit_read_hwsp(rq: watcher[0].rq, /* before */ |
1088 | seqno: rq->fence.seqno, hwsp, |
1089 | addr: &watcher[0].addr); |
1090 | switch_tl_lock(from: watcher[0].rq, to: rq); |
1091 | if (err) { |
1092 | i915_request_add(rq); |
1093 | intel_context_unpin(ce); |
1094 | intel_context_put(ce); |
1095 | goto out; |
1096 | } |
1097 | |
1098 | switch_tl_lock(from: rq, to: watcher[1].rq); |
1099 | err = intel_timeline_read_hwsp(from: rq, to: watcher[1].rq, hwsp: &hwsp); |
1100 | if (err == 0) |
1101 | err = emit_read_hwsp(rq: watcher[1].rq, /* after */ |
1102 | seqno: rq->fence.seqno, hwsp, |
1103 | addr: &watcher[1].addr); |
1104 | switch_tl_lock(from: watcher[1].rq, to: rq); |
1105 | if (err) { |
1106 | i915_request_add(rq); |
1107 | intel_context_unpin(ce); |
1108 | intel_context_put(ce); |
1109 | goto out; |
1110 | } |
1111 | |
1112 | i915_request_get(rq); |
1113 | i915_request_add(rq); |
1114 | |
1115 | rq = wrap_timeline(rq); |
1116 | intel_context_unpin(ce); |
1117 | intel_context_put(ce); |
1118 | if (IS_ERR(ptr: rq)) { |
1119 | err = PTR_ERR(ptr: rq); |
1120 | goto out; |
1121 | } |
1122 | |
1123 | err = i915_sw_fence_await_dma_fence(fence: &watcher[1].rq->submit, |
1124 | dma: &rq->fence, timeout: 0, |
1125 | GFP_KERNEL); |
1126 | if (err < 0) { |
1127 | i915_request_put(rq); |
1128 | goto out; |
1129 | } |
1130 | |
1131 | err = check_watcher(w: &watcher[0], name: "before" , op: cmp_lt); |
1132 | i915_sw_fence_commit(fence: submit); |
1133 | heap_fence_put(fence: submit); |
1134 | if (err) { |
1135 | i915_request_put(rq); |
1136 | goto out; |
1137 | } |
1138 | count++; |
1139 | |
1140 | /* Flush the timeline before manually wrapping again */ |
1141 | if (i915_request_wait(rq, |
1142 | I915_WAIT_INTERRUPTIBLE, |
1143 | HZ) < 0) { |
1144 | err = -ETIME; |
1145 | i915_request_put(rq); |
1146 | goto out; |
1147 | } |
1148 | retire_requests(tl); |
1149 | i915_request_put(rq); |
1150 | |
1151 | /* Single requests are limited to half a ring at most */ |
1152 | if (8 * watcher[1].rq->ring->emit > |
1153 | 3 * watcher[1].rq->ring->size) |
1154 | break; |
1155 | |
1156 | } while (!__igt_timeout(timeout: end_time, NULL) && |
1157 | count < (PAGE_SIZE / TIMELINE_SEQNO_BYTES - 1) / 2); |
1158 | |
1159 | pr_info("%s: simulated %lu wraps\n" , engine->name, count); |
1160 | err = check_watcher(w: &watcher[1], name: "after" , op: cmp_gte); |
1161 | if (err) |
1162 | goto out; |
1163 | } |
1164 | |
1165 | out: |
1166 | for (i = 0; i < ARRAY_SIZE(watcher); i++) |
1167 | cleanup_watcher(w: &watcher[i]); |
1168 | |
1169 | intel_timeline_unpin(tl); |
1170 | |
1171 | if (igt_flush_test(i915: gt->i915)) |
1172 | err = -EIO; |
1173 | |
1174 | out_free: |
1175 | intel_timeline_put(timeline: tl); |
1176 | return err; |
1177 | } |
1178 | |
1179 | static int live_hwsp_rollover_kernel(void *arg) |
1180 | { |
1181 | struct intel_gt *gt = arg; |
1182 | struct intel_engine_cs *engine; |
1183 | enum intel_engine_id id; |
1184 | int err = 0; |
1185 | |
1186 | /* |
1187 | * Run the host for long enough, and even the kernel context will |
1188 | * see a seqno rollover. |
1189 | */ |
1190 | |
1191 | for_each_engine(engine, gt, id) { |
1192 | struct intel_context *ce = engine->kernel_context; |
1193 | struct intel_timeline *tl = ce->timeline; |
1194 | struct i915_request *rq[3] = {}; |
1195 | int i; |
1196 | |
1197 | st_engine_heartbeat_disable(engine); |
1198 | if (intel_gt_wait_for_idle(gt, HZ / 2)) { |
1199 | err = -EIO; |
1200 | goto out; |
1201 | } |
1202 | |
1203 | GEM_BUG_ON(i915_active_fence_isset(&tl->last_request)); |
1204 | tl->seqno = -2u; |
1205 | WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno); |
1206 | |
1207 | for (i = 0; i < ARRAY_SIZE(rq); i++) { |
1208 | struct i915_request *this; |
1209 | |
1210 | this = i915_request_create(ce); |
1211 | if (IS_ERR(ptr: this)) { |
1212 | err = PTR_ERR(ptr: this); |
1213 | goto out; |
1214 | } |
1215 | |
1216 | pr_debug("%s: create fence.seqnp:%d\n" , |
1217 | engine->name, |
1218 | lower_32_bits(this->fence.seqno)); |
1219 | |
1220 | GEM_BUG_ON(rcu_access_pointer(this->timeline) != tl); |
1221 | |
1222 | rq[i] = i915_request_get(rq: this); |
1223 | i915_request_add(rq: this); |
1224 | } |
1225 | |
1226 | /* We expected a wrap! */ |
1227 | GEM_BUG_ON(rq[2]->fence.seqno > rq[0]->fence.seqno); |
1228 | |
1229 | if (i915_request_wait(rq: rq[2], flags: 0, HZ / 5) < 0) { |
1230 | pr_err("Wait for timeline wrap timed out!\n" ); |
1231 | err = -EIO; |
1232 | goto out; |
1233 | } |
1234 | |
1235 | for (i = 0; i < ARRAY_SIZE(rq); i++) { |
1236 | if (!i915_request_completed(rq: rq[i])) { |
1237 | pr_err("Pre-wrap request not completed!\n" ); |
1238 | err = -EINVAL; |
1239 | goto out; |
1240 | } |
1241 | } |
1242 | |
1243 | out: |
1244 | for (i = 0; i < ARRAY_SIZE(rq); i++) |
1245 | i915_request_put(rq: rq[i]); |
1246 | st_engine_heartbeat_enable(engine); |
1247 | if (err) |
1248 | break; |
1249 | } |
1250 | |
1251 | if (igt_flush_test(i915: gt->i915)) |
1252 | err = -EIO; |
1253 | |
1254 | return err; |
1255 | } |
1256 | |
1257 | static int live_hwsp_rollover_user(void *arg) |
1258 | { |
1259 | struct intel_gt *gt = arg; |
1260 | struct intel_engine_cs *engine; |
1261 | enum intel_engine_id id; |
1262 | int err = 0; |
1263 | |
1264 | /* |
1265 | * Simulate a long running user context, and force the seqno wrap |
1266 | * on the user's timeline. |
1267 | */ |
1268 | |
1269 | for_each_engine(engine, gt, id) { |
1270 | struct i915_request *rq[3] = {}; |
1271 | struct intel_timeline *tl; |
1272 | struct intel_context *ce; |
1273 | int i; |
1274 | |
1275 | ce = intel_context_create(engine); |
1276 | if (IS_ERR(ptr: ce)) |
1277 | return PTR_ERR(ptr: ce); |
1278 | |
1279 | err = intel_context_alloc_state(ce); |
1280 | if (err) |
1281 | goto out; |
1282 | |
1283 | tl = ce->timeline; |
1284 | if (!tl->has_initial_breadcrumb) |
1285 | goto out; |
1286 | |
1287 | err = intel_context_pin(ce); |
1288 | if (err) |
1289 | goto out; |
1290 | |
1291 | tl->seqno = -4u; |
1292 | WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno); |
1293 | |
1294 | for (i = 0; i < ARRAY_SIZE(rq); i++) { |
1295 | struct i915_request *this; |
1296 | |
1297 | this = intel_context_create_request(ce); |
1298 | if (IS_ERR(ptr: this)) { |
1299 | err = PTR_ERR(ptr: this); |
1300 | goto out_unpin; |
1301 | } |
1302 | |
1303 | pr_debug("%s: create fence.seqnp:%d\n" , |
1304 | engine->name, |
1305 | lower_32_bits(this->fence.seqno)); |
1306 | |
1307 | GEM_BUG_ON(rcu_access_pointer(this->timeline) != tl); |
1308 | |
1309 | rq[i] = i915_request_get(rq: this); |
1310 | i915_request_add(rq: this); |
1311 | } |
1312 | |
1313 | /* We expected a wrap! */ |
1314 | GEM_BUG_ON(rq[2]->fence.seqno > rq[0]->fence.seqno); |
1315 | |
1316 | if (i915_request_wait(rq: rq[2], flags: 0, HZ / 5) < 0) { |
1317 | pr_err("Wait for timeline wrap timed out!\n" ); |
1318 | err = -EIO; |
1319 | goto out_unpin; |
1320 | } |
1321 | |
1322 | for (i = 0; i < ARRAY_SIZE(rq); i++) { |
1323 | if (!i915_request_completed(rq: rq[i])) { |
1324 | pr_err("Pre-wrap request not completed!\n" ); |
1325 | err = -EINVAL; |
1326 | goto out_unpin; |
1327 | } |
1328 | } |
1329 | out_unpin: |
1330 | intel_context_unpin(ce); |
1331 | out: |
1332 | for (i = 0; i < ARRAY_SIZE(rq); i++) |
1333 | i915_request_put(rq: rq[i]); |
1334 | intel_context_put(ce); |
1335 | if (err) |
1336 | break; |
1337 | } |
1338 | |
1339 | if (igt_flush_test(i915: gt->i915)) |
1340 | err = -EIO; |
1341 | |
1342 | return err; |
1343 | } |
1344 | |
1345 | static int live_hwsp_recycle(void *arg) |
1346 | { |
1347 | struct intel_gt *gt = arg; |
1348 | struct intel_engine_cs *engine; |
1349 | enum intel_engine_id id; |
1350 | unsigned long count; |
1351 | int err = 0; |
1352 | |
1353 | /* |
1354 | * Check seqno writes into one timeline at a time. We expect to |
1355 | * recycle the breadcrumb slot between iterations and neither |
1356 | * want to confuse ourselves or the GPU. |
1357 | */ |
1358 | |
1359 | count = 0; |
1360 | for_each_engine(engine, gt, id) { |
1361 | IGT_TIMEOUT(end_time); |
1362 | |
1363 | if (!intel_engine_can_store_dword(engine)) |
1364 | continue; |
1365 | |
1366 | intel_engine_pm_get(engine); |
1367 | |
1368 | do { |
1369 | struct intel_timeline *tl; |
1370 | struct i915_request *rq; |
1371 | |
1372 | tl = intel_timeline_create(gt); |
1373 | if (IS_ERR(ptr: tl)) { |
1374 | err = PTR_ERR(ptr: tl); |
1375 | break; |
1376 | } |
1377 | |
1378 | rq = checked_tl_write(tl, engine, value: count); |
1379 | if (IS_ERR(ptr: rq)) { |
1380 | intel_timeline_put(timeline: tl); |
1381 | err = PTR_ERR(ptr: rq); |
1382 | break; |
1383 | } |
1384 | |
1385 | if (i915_request_wait(rq, flags: 0, HZ / 5) < 0) { |
1386 | pr_err("Wait for timeline writes timed out!\n" ); |
1387 | i915_request_put(rq); |
1388 | intel_timeline_put(timeline: tl); |
1389 | err = -EIO; |
1390 | break; |
1391 | } |
1392 | |
1393 | if (READ_ONCE(*tl->hwsp_seqno) != count) { |
1394 | GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x found 0x%x\n" , |
1395 | count, tl->fence_context, |
1396 | tl->hwsp_offset, *tl->hwsp_seqno); |
1397 | GEM_TRACE_DUMP(); |
1398 | err = -EINVAL; |
1399 | } |
1400 | |
1401 | i915_request_put(rq); |
1402 | intel_timeline_put(timeline: tl); |
1403 | count++; |
1404 | |
1405 | if (err) |
1406 | break; |
1407 | } while (!__igt_timeout(timeout: end_time, NULL)); |
1408 | |
1409 | intel_engine_pm_put(engine); |
1410 | if (err) |
1411 | break; |
1412 | } |
1413 | |
1414 | return err; |
1415 | } |
1416 | |
1417 | int intel_timeline_live_selftests(struct drm_i915_private *i915) |
1418 | { |
1419 | static const struct i915_subtest tests[] = { |
1420 | SUBTEST(live_hwsp_recycle), |
1421 | SUBTEST(live_hwsp_engine), |
1422 | SUBTEST(live_hwsp_alternate), |
1423 | SUBTEST(live_hwsp_wrap), |
1424 | SUBTEST(live_hwsp_read), |
1425 | SUBTEST(live_hwsp_rollover_kernel), |
1426 | SUBTEST(live_hwsp_rollover_user), |
1427 | }; |
1428 | |
1429 | if (intel_gt_is_wedged(gt: to_gt(i915))) |
1430 | return 0; |
1431 | |
1432 | return intel_gt_live_subtests(tests, to_gt(i915)); |
1433 | } |
1434 | |