1 | /* |
2 | * Copyright © 2016 Intel Corporation |
3 | * |
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
5 | * copy of this software and associated documentation files (the "Software"), |
6 | * to deal in the Software without restriction, including without limitation |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
8 | * and/or sell copies of the Software, and to permit persons to whom the |
9 | * Software is furnished to do so, subject to the following conditions: |
10 | * |
11 | * The above copyright notice and this permission notice (including the next |
12 | * paragraph) shall be included in all copies or substantial portions of the |
13 | * Software. |
14 | * |
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
21 | * IN THE SOFTWARE. |
22 | * |
23 | */ |
24 | |
25 | #include <linux/prime_numbers.h> |
26 | #include <linux/pm_qos.h> |
27 | #include <linux/sort.h> |
28 | |
29 | #include "gem/i915_gem_internal.h" |
30 | #include "gem/i915_gem_pm.h" |
31 | #include "gem/selftests/mock_context.h" |
32 | |
33 | #include "gt/intel_engine_heartbeat.h" |
34 | #include "gt/intel_engine_pm.h" |
35 | #include "gt/intel_engine_user.h" |
36 | #include "gt/intel_gt.h" |
37 | #include "gt/intel_gt_clock_utils.h" |
38 | #include "gt/intel_gt_requests.h" |
39 | #include "gt/selftest_engine_heartbeat.h" |
40 | |
41 | #include "i915_random.h" |
42 | #include "i915_selftest.h" |
43 | #include "igt_flush_test.h" |
44 | #include "igt_live_test.h" |
45 | #include "igt_spinner.h" |
46 | #include "lib_sw_fence.h" |
47 | |
48 | #include "mock_drm.h" |
49 | #include "mock_gem_device.h" |
50 | |
51 | static unsigned int num_uabi_engines(struct drm_i915_private *i915) |
52 | { |
53 | struct intel_engine_cs *engine; |
54 | unsigned int count; |
55 | |
56 | count = 0; |
57 | for_each_uabi_engine(engine, i915) |
58 | count++; |
59 | |
60 | return count; |
61 | } |
62 | |
63 | static struct intel_engine_cs *rcs0(struct drm_i915_private *i915) |
64 | { |
65 | return intel_engine_lookup_user(i915, class: I915_ENGINE_CLASS_RENDER, instance: 0); |
66 | } |
67 | |
68 | static int igt_add_request(void *arg) |
69 | { |
70 | struct drm_i915_private *i915 = arg; |
71 | struct i915_request *request; |
72 | |
73 | /* Basic preliminary test to create a request and let it loose! */ |
74 | |
75 | request = mock_request(ce: rcs0(i915)->kernel_context, HZ / 10); |
76 | if (!request) |
77 | return -ENOMEM; |
78 | |
79 | i915_request_add(rq: request); |
80 | |
81 | return 0; |
82 | } |
83 | |
84 | static int igt_wait_request(void *arg) |
85 | { |
86 | const long T = HZ / 4; |
87 | struct drm_i915_private *i915 = arg; |
88 | struct i915_request *request; |
89 | int err = -EINVAL; |
90 | |
91 | /* Submit a request, then wait upon it */ |
92 | |
93 | request = mock_request(ce: rcs0(i915)->kernel_context, delay: T); |
94 | if (!request) |
95 | return -ENOMEM; |
96 | |
97 | i915_request_get(rq: request); |
98 | |
99 | if (i915_request_wait(rq: request, flags: 0, timeout: 0) != -ETIME) { |
100 | pr_err("request wait (busy query) succeeded (expected timeout before submit!)\n" ); |
101 | goto out_request; |
102 | } |
103 | |
104 | if (i915_request_wait(rq: request, flags: 0, timeout: T) != -ETIME) { |
105 | pr_err("request wait succeeded (expected timeout before submit!)\n" ); |
106 | goto out_request; |
107 | } |
108 | |
109 | if (i915_request_completed(rq: request)) { |
110 | pr_err("request completed before submit!!\n" ); |
111 | goto out_request; |
112 | } |
113 | |
114 | i915_request_add(rq: request); |
115 | |
116 | if (i915_request_wait(rq: request, flags: 0, timeout: 0) != -ETIME) { |
117 | pr_err("request wait (busy query) succeeded (expected timeout after submit!)\n" ); |
118 | goto out_request; |
119 | } |
120 | |
121 | if (i915_request_completed(rq: request)) { |
122 | pr_err("request completed immediately!\n" ); |
123 | goto out_request; |
124 | } |
125 | |
126 | if (i915_request_wait(rq: request, flags: 0, timeout: T / 2) != -ETIME) { |
127 | pr_err("request wait succeeded (expected timeout!)\n" ); |
128 | goto out_request; |
129 | } |
130 | |
131 | if (i915_request_wait(rq: request, flags: 0, timeout: T) == -ETIME) { |
132 | pr_err("request wait timed out!\n" ); |
133 | goto out_request; |
134 | } |
135 | |
136 | if (!i915_request_completed(rq: request)) { |
137 | pr_err("request not complete after waiting!\n" ); |
138 | goto out_request; |
139 | } |
140 | |
141 | if (i915_request_wait(rq: request, flags: 0, timeout: T) == -ETIME) { |
142 | pr_err("request wait timed out when already complete!\n" ); |
143 | goto out_request; |
144 | } |
145 | |
146 | err = 0; |
147 | out_request: |
148 | i915_request_put(rq: request); |
149 | mock_device_flush(i915); |
150 | return err; |
151 | } |
152 | |
153 | static int igt_fence_wait(void *arg) |
154 | { |
155 | const long T = HZ / 4; |
156 | struct drm_i915_private *i915 = arg; |
157 | struct i915_request *request; |
158 | int err = -EINVAL; |
159 | |
160 | /* Submit a request, treat it as a fence and wait upon it */ |
161 | |
162 | request = mock_request(ce: rcs0(i915)->kernel_context, delay: T); |
163 | if (!request) |
164 | return -ENOMEM; |
165 | |
166 | if (dma_fence_wait_timeout(&request->fence, intr: false, timeout: T) != -ETIME) { |
167 | pr_err("fence wait success before submit (expected timeout)!\n" ); |
168 | goto out; |
169 | } |
170 | |
171 | i915_request_add(rq: request); |
172 | |
173 | if (dma_fence_is_signaled(fence: &request->fence)) { |
174 | pr_err("fence signaled immediately!\n" ); |
175 | goto out; |
176 | } |
177 | |
178 | if (dma_fence_wait_timeout(&request->fence, intr: false, timeout: T / 2) != -ETIME) { |
179 | pr_err("fence wait success after submit (expected timeout)!\n" ); |
180 | goto out; |
181 | } |
182 | |
183 | if (dma_fence_wait_timeout(&request->fence, intr: false, timeout: T) <= 0) { |
184 | pr_err("fence wait timed out (expected success)!\n" ); |
185 | goto out; |
186 | } |
187 | |
188 | if (!dma_fence_is_signaled(fence: &request->fence)) { |
189 | pr_err("fence unsignaled after waiting!\n" ); |
190 | goto out; |
191 | } |
192 | |
193 | if (dma_fence_wait_timeout(&request->fence, intr: false, timeout: T) <= 0) { |
194 | pr_err("fence wait timed out when complete (expected success)!\n" ); |
195 | goto out; |
196 | } |
197 | |
198 | err = 0; |
199 | out: |
200 | mock_device_flush(i915); |
201 | return err; |
202 | } |
203 | |
204 | static int igt_request_rewind(void *arg) |
205 | { |
206 | struct drm_i915_private *i915 = arg; |
207 | struct i915_request *request, *vip; |
208 | struct i915_gem_context *ctx[2]; |
209 | struct intel_context *ce; |
210 | int err = -EINVAL; |
211 | |
212 | ctx[0] = mock_context(i915, name: "A" ); |
213 | if (!ctx[0]) { |
214 | err = -ENOMEM; |
215 | goto err_ctx_0; |
216 | } |
217 | |
218 | ce = i915_gem_context_get_engine(ctx: ctx[0], idx: RCS0); |
219 | GEM_BUG_ON(IS_ERR(ce)); |
220 | request = mock_request(ce, delay: 2 * HZ); |
221 | intel_context_put(ce); |
222 | if (!request) { |
223 | err = -ENOMEM; |
224 | goto err_context_0; |
225 | } |
226 | |
227 | i915_request_get(rq: request); |
228 | i915_request_add(rq: request); |
229 | |
230 | ctx[1] = mock_context(i915, name: "B" ); |
231 | if (!ctx[1]) { |
232 | err = -ENOMEM; |
233 | goto err_ctx_1; |
234 | } |
235 | |
236 | ce = i915_gem_context_get_engine(ctx: ctx[1], idx: RCS0); |
237 | GEM_BUG_ON(IS_ERR(ce)); |
238 | vip = mock_request(ce, delay: 0); |
239 | intel_context_put(ce); |
240 | if (!vip) { |
241 | err = -ENOMEM; |
242 | goto err_context_1; |
243 | } |
244 | |
245 | /* Simulate preemption by manual reordering */ |
246 | if (!mock_cancel_request(request)) { |
247 | pr_err("failed to cancel request (already executed)!\n" ); |
248 | i915_request_add(rq: vip); |
249 | goto err_context_1; |
250 | } |
251 | i915_request_get(rq: vip); |
252 | i915_request_add(rq: vip); |
253 | rcu_read_lock(); |
254 | request->engine->submit_request(request); |
255 | rcu_read_unlock(); |
256 | |
257 | |
258 | if (i915_request_wait(rq: vip, flags: 0, HZ) == -ETIME) { |
259 | pr_err("timed out waiting for high priority request\n" ); |
260 | goto err; |
261 | } |
262 | |
263 | if (i915_request_completed(rq: request)) { |
264 | pr_err("low priority request already completed\n" ); |
265 | goto err; |
266 | } |
267 | |
268 | err = 0; |
269 | err: |
270 | i915_request_put(rq: vip); |
271 | err_context_1: |
272 | mock_context_close(ctx: ctx[1]); |
273 | err_ctx_1: |
274 | i915_request_put(rq: request); |
275 | err_context_0: |
276 | mock_context_close(ctx: ctx[0]); |
277 | err_ctx_0: |
278 | mock_device_flush(i915); |
279 | return err; |
280 | } |
281 | |
282 | struct smoketest { |
283 | struct intel_engine_cs *engine; |
284 | struct i915_gem_context **contexts; |
285 | atomic_long_t num_waits, num_fences; |
286 | int ncontexts, max_batch; |
287 | struct i915_request *(*request_alloc)(struct intel_context *ce); |
288 | }; |
289 | |
290 | static struct i915_request * |
291 | __mock_request_alloc(struct intel_context *ce) |
292 | { |
293 | return mock_request(ce, delay: 0); |
294 | } |
295 | |
296 | static struct i915_request * |
297 | __live_request_alloc(struct intel_context *ce) |
298 | { |
299 | return intel_context_create_request(ce); |
300 | } |
301 | |
302 | struct smoke_thread { |
303 | struct kthread_worker *worker; |
304 | struct kthread_work work; |
305 | struct smoketest *t; |
306 | bool stop; |
307 | int result; |
308 | }; |
309 | |
310 | static void __igt_breadcrumbs_smoketest(struct kthread_work *work) |
311 | { |
312 | struct smoke_thread *thread = container_of(work, typeof(*thread), work); |
313 | struct smoketest *t = thread->t; |
314 | const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1; |
315 | const unsigned int total = 4 * t->ncontexts + 1; |
316 | unsigned int num_waits = 0, num_fences = 0; |
317 | struct i915_request **requests; |
318 | I915_RND_STATE(prng); |
319 | unsigned int *order; |
320 | int err = 0; |
321 | |
322 | /* |
323 | * A very simple test to catch the most egregious of list handling bugs. |
324 | * |
325 | * At its heart, we simply create oodles of requests running across |
326 | * multiple kthreads and enable signaling on them, for the sole purpose |
327 | * of stressing our breadcrumb handling. The only inspection we do is |
328 | * that the fences were marked as signaled. |
329 | */ |
330 | |
331 | requests = kcalloc(n: total, size: sizeof(*requests), GFP_KERNEL); |
332 | if (!requests) { |
333 | thread->result = -ENOMEM; |
334 | return; |
335 | } |
336 | |
337 | order = i915_random_order(count: total, state: &prng); |
338 | if (!order) { |
339 | err = -ENOMEM; |
340 | goto out_requests; |
341 | } |
342 | |
343 | while (!READ_ONCE(thread->stop)) { |
344 | struct i915_sw_fence *submit, *wait; |
345 | unsigned int n, count; |
346 | |
347 | submit = heap_fence_create(GFP_KERNEL); |
348 | if (!submit) { |
349 | err = -ENOMEM; |
350 | break; |
351 | } |
352 | |
353 | wait = heap_fence_create(GFP_KERNEL); |
354 | if (!wait) { |
355 | i915_sw_fence_commit(fence: submit); |
356 | heap_fence_put(fence: submit); |
357 | err = -ENOMEM; |
358 | break; |
359 | } |
360 | |
361 | i915_random_reorder(order, count: total, state: &prng); |
362 | count = 1 + i915_prandom_u32_max_state(ep_ro: max_batch, state: &prng); |
363 | |
364 | for (n = 0; n < count; n++) { |
365 | struct i915_gem_context *ctx = |
366 | t->contexts[order[n] % t->ncontexts]; |
367 | struct i915_request *rq; |
368 | struct intel_context *ce; |
369 | |
370 | ce = i915_gem_context_get_engine(ctx, idx: t->engine->legacy_idx); |
371 | GEM_BUG_ON(IS_ERR(ce)); |
372 | rq = t->request_alloc(ce); |
373 | intel_context_put(ce); |
374 | if (IS_ERR(ptr: rq)) { |
375 | err = PTR_ERR(ptr: rq); |
376 | count = n; |
377 | break; |
378 | } |
379 | |
380 | err = i915_sw_fence_await_sw_fence_gfp(fence: &rq->submit, |
381 | after: submit, |
382 | GFP_KERNEL); |
383 | |
384 | requests[n] = i915_request_get(rq); |
385 | i915_request_add(rq); |
386 | |
387 | if (err >= 0) |
388 | err = i915_sw_fence_await_dma_fence(fence: wait, |
389 | dma: &rq->fence, |
390 | timeout: 0, |
391 | GFP_KERNEL); |
392 | |
393 | if (err < 0) { |
394 | i915_request_put(rq); |
395 | count = n; |
396 | break; |
397 | } |
398 | } |
399 | |
400 | i915_sw_fence_commit(fence: submit); |
401 | i915_sw_fence_commit(fence: wait); |
402 | |
403 | if (!wait_event_timeout(wait->wait, |
404 | i915_sw_fence_done(wait), |
405 | 5 * HZ)) { |
406 | struct i915_request *rq = requests[count - 1]; |
407 | |
408 | pr_err("waiting for %d/%d fences (last %llx:%lld) on %s timed out!\n" , |
409 | atomic_read(&wait->pending), count, |
410 | rq->fence.context, rq->fence.seqno, |
411 | t->engine->name); |
412 | GEM_TRACE_DUMP(); |
413 | |
414 | intel_gt_set_wedged(gt: t->engine->gt); |
415 | GEM_BUG_ON(!i915_request_completed(rq)); |
416 | i915_sw_fence_wait(fence: wait); |
417 | err = -EIO; |
418 | } |
419 | |
420 | for (n = 0; n < count; n++) { |
421 | struct i915_request *rq = requests[n]; |
422 | |
423 | if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, |
424 | &rq->fence.flags)) { |
425 | pr_err("%llu:%llu was not signaled!\n" , |
426 | rq->fence.context, rq->fence.seqno); |
427 | err = -EINVAL; |
428 | } |
429 | |
430 | i915_request_put(rq); |
431 | } |
432 | |
433 | heap_fence_put(fence: wait); |
434 | heap_fence_put(fence: submit); |
435 | |
436 | if (err < 0) |
437 | break; |
438 | |
439 | num_fences += count; |
440 | num_waits++; |
441 | |
442 | cond_resched(); |
443 | } |
444 | |
445 | atomic_long_add(i: num_fences, v: &t->num_fences); |
446 | atomic_long_add(i: num_waits, v: &t->num_waits); |
447 | |
448 | kfree(objp: order); |
449 | out_requests: |
450 | kfree(objp: requests); |
451 | thread->result = err; |
452 | } |
453 | |
454 | static int mock_breadcrumbs_smoketest(void *arg) |
455 | { |
456 | struct drm_i915_private *i915 = arg; |
457 | struct smoketest t = { |
458 | .engine = rcs0(i915), |
459 | .ncontexts = 1024, |
460 | .max_batch = 1024, |
461 | .request_alloc = __mock_request_alloc |
462 | }; |
463 | unsigned int ncpus = num_online_cpus(); |
464 | struct smoke_thread *threads; |
465 | unsigned int n; |
466 | int ret = 0; |
467 | |
468 | /* |
469 | * Smoketest our breadcrumb/signal handling for requests across multiple |
470 | * threads. A very simple test to only catch the most egregious of bugs. |
471 | * See __igt_breadcrumbs_smoketest(); |
472 | */ |
473 | |
474 | threads = kcalloc(n: ncpus, size: sizeof(*threads), GFP_KERNEL); |
475 | if (!threads) |
476 | return -ENOMEM; |
477 | |
478 | t.contexts = kcalloc(n: t.ncontexts, size: sizeof(*t.contexts), GFP_KERNEL); |
479 | if (!t.contexts) { |
480 | ret = -ENOMEM; |
481 | goto out_threads; |
482 | } |
483 | |
484 | for (n = 0; n < t.ncontexts; n++) { |
485 | t.contexts[n] = mock_context(i915: t.engine->i915, name: "mock" ); |
486 | if (!t.contexts[n]) { |
487 | ret = -ENOMEM; |
488 | goto out_contexts; |
489 | } |
490 | } |
491 | |
492 | for (n = 0; n < ncpus; n++) { |
493 | struct kthread_worker *worker; |
494 | |
495 | worker = kthread_create_worker(flags: 0, namefmt: "igt/%d" , n); |
496 | if (IS_ERR(ptr: worker)) { |
497 | ret = PTR_ERR(ptr: worker); |
498 | ncpus = n; |
499 | break; |
500 | } |
501 | |
502 | threads[n].worker = worker; |
503 | threads[n].t = &t; |
504 | threads[n].stop = false; |
505 | threads[n].result = 0; |
506 | |
507 | kthread_init_work(&threads[n].work, |
508 | __igt_breadcrumbs_smoketest); |
509 | kthread_queue_work(worker, work: &threads[n].work); |
510 | } |
511 | |
512 | msleep(msecs: jiffies_to_msecs(j: i915_selftest.timeout_jiffies)); |
513 | |
514 | for (n = 0; n < ncpus; n++) { |
515 | int err; |
516 | |
517 | WRITE_ONCE(threads[n].stop, true); |
518 | kthread_flush_work(work: &threads[n].work); |
519 | err = READ_ONCE(threads[n].result); |
520 | if (err < 0 && !ret) |
521 | ret = err; |
522 | |
523 | kthread_destroy_worker(worker: threads[n].worker); |
524 | } |
525 | pr_info("Completed %lu waits for %lu fence across %d cpus\n" , |
526 | atomic_long_read(&t.num_waits), |
527 | atomic_long_read(&t.num_fences), |
528 | ncpus); |
529 | |
530 | out_contexts: |
531 | for (n = 0; n < t.ncontexts; n++) { |
532 | if (!t.contexts[n]) |
533 | break; |
534 | mock_context_close(ctx: t.contexts[n]); |
535 | } |
536 | kfree(objp: t.contexts); |
537 | out_threads: |
538 | kfree(objp: threads); |
539 | return ret; |
540 | } |
541 | |
542 | int i915_request_mock_selftests(void) |
543 | { |
544 | static const struct i915_subtest tests[] = { |
545 | SUBTEST(igt_add_request), |
546 | SUBTEST(igt_wait_request), |
547 | SUBTEST(igt_fence_wait), |
548 | SUBTEST(igt_request_rewind), |
549 | SUBTEST(mock_breadcrumbs_smoketest), |
550 | }; |
551 | struct drm_i915_private *i915; |
552 | intel_wakeref_t wakeref; |
553 | int err = 0; |
554 | |
555 | i915 = mock_gem_device(); |
556 | if (!i915) |
557 | return -ENOMEM; |
558 | |
559 | with_intel_runtime_pm(&i915->runtime_pm, wakeref) |
560 | err = i915_subtests(tests, i915); |
561 | |
562 | mock_destroy_device(i915); |
563 | |
564 | return err; |
565 | } |
566 | |
567 | static int live_nop_request(void *arg) |
568 | { |
569 | struct drm_i915_private *i915 = arg; |
570 | struct intel_engine_cs *engine; |
571 | struct igt_live_test t; |
572 | int err = -ENODEV; |
573 | |
574 | /* |
575 | * Submit various sized batches of empty requests, to each engine |
576 | * (individually), and wait for the batch to complete. We can check |
577 | * the overhead of submitting requests to the hardware. |
578 | */ |
579 | |
580 | for_each_uabi_engine(engine, i915) { |
581 | unsigned long n, prime; |
582 | IGT_TIMEOUT(end_time); |
583 | ktime_t times[2] = {}; |
584 | |
585 | err = igt_live_test_begin(t: &t, i915, func: __func__, name: engine->name); |
586 | if (err) |
587 | return err; |
588 | |
589 | intel_engine_pm_get(engine); |
590 | for_each_prime_number_from(prime, 1, 8192) { |
591 | struct i915_request *request = NULL; |
592 | |
593 | times[1] = ktime_get_raw(); |
594 | |
595 | for (n = 0; n < prime; n++) { |
596 | i915_request_put(rq: request); |
597 | request = i915_request_create(ce: engine->kernel_context); |
598 | if (IS_ERR(ptr: request)) |
599 | return PTR_ERR(ptr: request); |
600 | |
601 | /* |
602 | * This space is left intentionally blank. |
603 | * |
604 | * We do not actually want to perform any |
605 | * action with this request, we just want |
606 | * to measure the latency in allocation |
607 | * and submission of our breadcrumbs - |
608 | * ensuring that the bare request is sufficient |
609 | * for the system to work (i.e. proper HEAD |
610 | * tracking of the rings, interrupt handling, |
611 | * etc). It also gives us the lowest bounds |
612 | * for latency. |
613 | */ |
614 | |
615 | i915_request_get(rq: request); |
616 | i915_request_add(rq: request); |
617 | } |
618 | i915_request_wait(rq: request, flags: 0, MAX_SCHEDULE_TIMEOUT); |
619 | i915_request_put(rq: request); |
620 | |
621 | times[1] = ktime_sub(ktime_get_raw(), times[1]); |
622 | if (prime == 1) |
623 | times[0] = times[1]; |
624 | |
625 | if (__igt_timeout(timeout: end_time, NULL)) |
626 | break; |
627 | } |
628 | intel_engine_pm_put(engine); |
629 | |
630 | err = igt_live_test_end(t: &t); |
631 | if (err) |
632 | return err; |
633 | |
634 | pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n" , |
635 | engine->name, |
636 | ktime_to_ns(times[0]), |
637 | prime, div64_u64(ktime_to_ns(times[1]), prime)); |
638 | } |
639 | |
640 | return err; |
641 | } |
642 | |
643 | static int __cancel_inactive(struct intel_engine_cs *engine) |
644 | { |
645 | struct intel_context *ce; |
646 | struct igt_spinner spin; |
647 | struct i915_request *rq; |
648 | int err = 0; |
649 | |
650 | if (igt_spinner_init(spin: &spin, gt: engine->gt)) |
651 | return -ENOMEM; |
652 | |
653 | ce = intel_context_create(engine); |
654 | if (IS_ERR(ptr: ce)) { |
655 | err = PTR_ERR(ptr: ce); |
656 | goto out_spin; |
657 | } |
658 | |
659 | rq = igt_spinner_create_request(spin: &spin, ce, MI_ARB_CHECK); |
660 | if (IS_ERR(ptr: rq)) { |
661 | err = PTR_ERR(ptr: rq); |
662 | goto out_ce; |
663 | } |
664 | |
665 | pr_debug("%s: Cancelling inactive request\n" , engine->name); |
666 | i915_request_cancel(rq, error: -EINTR); |
667 | i915_request_get(rq); |
668 | i915_request_add(rq); |
669 | |
670 | if (i915_request_wait(rq, flags: 0, HZ / 5) < 0) { |
671 | struct drm_printer p = drm_info_printer(dev: engine->i915->drm.dev); |
672 | |
673 | pr_err("%s: Failed to cancel inactive request\n" , engine->name); |
674 | intel_engine_dump(engine, m: &p, header: "%s\n" , engine->name); |
675 | err = -ETIME; |
676 | goto out_rq; |
677 | } |
678 | |
679 | if (rq->fence.error != -EINTR) { |
680 | pr_err("%s: fence not cancelled (%u)\n" , |
681 | engine->name, rq->fence.error); |
682 | err = -EINVAL; |
683 | } |
684 | |
685 | out_rq: |
686 | i915_request_put(rq); |
687 | out_ce: |
688 | intel_context_put(ce); |
689 | out_spin: |
690 | igt_spinner_fini(spin: &spin); |
691 | if (err) |
692 | pr_err("%s: %s error %d\n" , __func__, engine->name, err); |
693 | return err; |
694 | } |
695 | |
696 | static int __cancel_active(struct intel_engine_cs *engine) |
697 | { |
698 | struct intel_context *ce; |
699 | struct igt_spinner spin; |
700 | struct i915_request *rq; |
701 | int err = 0; |
702 | |
703 | if (igt_spinner_init(spin: &spin, gt: engine->gt)) |
704 | return -ENOMEM; |
705 | |
706 | ce = intel_context_create(engine); |
707 | if (IS_ERR(ptr: ce)) { |
708 | err = PTR_ERR(ptr: ce); |
709 | goto out_spin; |
710 | } |
711 | |
712 | rq = igt_spinner_create_request(spin: &spin, ce, MI_ARB_CHECK); |
713 | if (IS_ERR(ptr: rq)) { |
714 | err = PTR_ERR(ptr: rq); |
715 | goto out_ce; |
716 | } |
717 | |
718 | pr_debug("%s: Cancelling active request\n" , engine->name); |
719 | i915_request_get(rq); |
720 | i915_request_add(rq); |
721 | if (!igt_wait_for_spinner(spin: &spin, rq)) { |
722 | struct drm_printer p = drm_info_printer(dev: engine->i915->drm.dev); |
723 | |
724 | pr_err("Failed to start spinner on %s\n" , engine->name); |
725 | intel_engine_dump(engine, m: &p, header: "%s\n" , engine->name); |
726 | err = -ETIME; |
727 | goto out_rq; |
728 | } |
729 | i915_request_cancel(rq, error: -EINTR); |
730 | |
731 | if (i915_request_wait(rq, flags: 0, HZ / 5) < 0) { |
732 | struct drm_printer p = drm_info_printer(dev: engine->i915->drm.dev); |
733 | |
734 | pr_err("%s: Failed to cancel active request\n" , engine->name); |
735 | intel_engine_dump(engine, m: &p, header: "%s\n" , engine->name); |
736 | err = -ETIME; |
737 | goto out_rq; |
738 | } |
739 | |
740 | if (rq->fence.error != -EINTR) { |
741 | pr_err("%s: fence not cancelled (%u)\n" , |
742 | engine->name, rq->fence.error); |
743 | err = -EINVAL; |
744 | } |
745 | |
746 | out_rq: |
747 | i915_request_put(rq); |
748 | out_ce: |
749 | intel_context_put(ce); |
750 | out_spin: |
751 | igt_spinner_fini(spin: &spin); |
752 | if (err) |
753 | pr_err("%s: %s error %d\n" , __func__, engine->name, err); |
754 | return err; |
755 | } |
756 | |
757 | static int __cancel_completed(struct intel_engine_cs *engine) |
758 | { |
759 | struct intel_context *ce; |
760 | struct igt_spinner spin; |
761 | struct i915_request *rq; |
762 | int err = 0; |
763 | |
764 | if (igt_spinner_init(spin: &spin, gt: engine->gt)) |
765 | return -ENOMEM; |
766 | |
767 | ce = intel_context_create(engine); |
768 | if (IS_ERR(ptr: ce)) { |
769 | err = PTR_ERR(ptr: ce); |
770 | goto out_spin; |
771 | } |
772 | |
773 | rq = igt_spinner_create_request(spin: &spin, ce, MI_ARB_CHECK); |
774 | if (IS_ERR(ptr: rq)) { |
775 | err = PTR_ERR(ptr: rq); |
776 | goto out_ce; |
777 | } |
778 | igt_spinner_end(spin: &spin); |
779 | i915_request_get(rq); |
780 | i915_request_add(rq); |
781 | |
782 | if (i915_request_wait(rq, flags: 0, HZ / 5) < 0) { |
783 | err = -ETIME; |
784 | goto out_rq; |
785 | } |
786 | |
787 | pr_debug("%s: Cancelling completed request\n" , engine->name); |
788 | i915_request_cancel(rq, error: -EINTR); |
789 | if (rq->fence.error) { |
790 | pr_err("%s: fence not cancelled (%u)\n" , |
791 | engine->name, rq->fence.error); |
792 | err = -EINVAL; |
793 | } |
794 | |
795 | out_rq: |
796 | i915_request_put(rq); |
797 | out_ce: |
798 | intel_context_put(ce); |
799 | out_spin: |
800 | igt_spinner_fini(spin: &spin); |
801 | if (err) |
802 | pr_err("%s: %s error %d\n" , __func__, engine->name, err); |
803 | return err; |
804 | } |
805 | |
806 | /* |
807 | * Test to prove a non-preemptable request can be cancelled and a subsequent |
808 | * request on the same context can successfully complete after cancellation. |
809 | * |
810 | * Testing methodology is to create a non-preemptible request and submit it, |
811 | * wait for spinner to start, create a NOP request and submit it, cancel the |
812 | * spinner, wait for spinner to complete and verify it failed with an error, |
813 | * finally wait for NOP request to complete verify it succeeded without an |
814 | * error. Preemption timeout also reduced / restored so test runs in a timely |
815 | * maner. |
816 | */ |
817 | static int __cancel_reset(struct drm_i915_private *i915, |
818 | struct intel_engine_cs *engine) |
819 | { |
820 | struct intel_context *ce; |
821 | struct igt_spinner spin; |
822 | struct i915_request *rq, *nop; |
823 | unsigned long preempt_timeout_ms; |
824 | int err = 0; |
825 | |
826 | if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT || |
827 | !intel_has_reset_engine(gt: engine->gt)) |
828 | return 0; |
829 | |
830 | preempt_timeout_ms = engine->props.preempt_timeout_ms; |
831 | engine->props.preempt_timeout_ms = 100; |
832 | |
833 | if (igt_spinner_init(spin: &spin, gt: engine->gt)) |
834 | goto out_restore; |
835 | |
836 | ce = intel_context_create(engine); |
837 | if (IS_ERR(ptr: ce)) { |
838 | err = PTR_ERR(ptr: ce); |
839 | goto out_spin; |
840 | } |
841 | |
842 | rq = igt_spinner_create_request(spin: &spin, ce, MI_NOOP); |
843 | if (IS_ERR(ptr: rq)) { |
844 | err = PTR_ERR(ptr: rq); |
845 | goto out_ce; |
846 | } |
847 | |
848 | pr_debug("%s: Cancelling active non-preemptable request\n" , |
849 | engine->name); |
850 | i915_request_get(rq); |
851 | i915_request_add(rq); |
852 | if (!igt_wait_for_spinner(spin: &spin, rq)) { |
853 | struct drm_printer p = drm_info_printer(dev: engine->i915->drm.dev); |
854 | |
855 | pr_err("Failed to start spinner on %s\n" , engine->name); |
856 | intel_engine_dump(engine, m: &p, header: "%s\n" , engine->name); |
857 | err = -ETIME; |
858 | goto out_rq; |
859 | } |
860 | |
861 | nop = intel_context_create_request(ce); |
862 | if (IS_ERR(ptr: nop)) |
863 | goto out_rq; |
864 | i915_request_get(rq: nop); |
865 | i915_request_add(rq: nop); |
866 | |
867 | i915_request_cancel(rq, error: -EINTR); |
868 | |
869 | if (i915_request_wait(rq, flags: 0, HZ) < 0) { |
870 | struct drm_printer p = drm_info_printer(dev: engine->i915->drm.dev); |
871 | |
872 | pr_err("%s: Failed to cancel hung request\n" , engine->name); |
873 | intel_engine_dump(engine, m: &p, header: "%s\n" , engine->name); |
874 | err = -ETIME; |
875 | goto out_nop; |
876 | } |
877 | |
878 | if (rq->fence.error != -EINTR) { |
879 | pr_err("%s: fence not cancelled (%u)\n" , |
880 | engine->name, rq->fence.error); |
881 | err = -EINVAL; |
882 | goto out_nop; |
883 | } |
884 | |
885 | if (i915_request_wait(rq: nop, flags: 0, HZ) < 0) { |
886 | struct drm_printer p = drm_info_printer(dev: engine->i915->drm.dev); |
887 | |
888 | pr_err("%s: Failed to complete nop request\n" , engine->name); |
889 | intel_engine_dump(engine, m: &p, header: "%s\n" , engine->name); |
890 | err = -ETIME; |
891 | goto out_nop; |
892 | } |
893 | |
894 | if (nop->fence.error != 0) { |
895 | pr_err("%s: Nop request errored (%u)\n" , |
896 | engine->name, nop->fence.error); |
897 | err = -EINVAL; |
898 | } |
899 | |
900 | out_nop: |
901 | i915_request_put(rq: nop); |
902 | out_rq: |
903 | i915_request_put(rq); |
904 | out_ce: |
905 | intel_context_put(ce); |
906 | out_spin: |
907 | igt_spinner_fini(spin: &spin); |
908 | out_restore: |
909 | engine->props.preempt_timeout_ms = preempt_timeout_ms; |
910 | if (err) |
911 | pr_err("%s: %s error %d\n" , __func__, engine->name, err); |
912 | return err; |
913 | } |
914 | |
915 | static int live_cancel_request(void *arg) |
916 | { |
917 | struct drm_i915_private *i915 = arg; |
918 | struct intel_engine_cs *engine; |
919 | |
920 | /* |
921 | * Check cancellation of requests. We expect to be able to immediately |
922 | * cancel active requests, even if they are currently on the GPU. |
923 | */ |
924 | |
925 | for_each_uabi_engine(engine, i915) { |
926 | struct igt_live_test t; |
927 | int err, err2; |
928 | |
929 | if (!intel_engine_has_preemption(engine)) |
930 | continue; |
931 | |
932 | err = igt_live_test_begin(t: &t, i915, func: __func__, name: engine->name); |
933 | if (err) |
934 | return err; |
935 | |
936 | err = __cancel_inactive(engine); |
937 | if (err == 0) |
938 | err = __cancel_active(engine); |
939 | if (err == 0) |
940 | err = __cancel_completed(engine); |
941 | |
942 | err2 = igt_live_test_end(t: &t); |
943 | if (err) |
944 | return err; |
945 | if (err2) |
946 | return err2; |
947 | |
948 | /* Expects reset so call outside of igt_live_test_* */ |
949 | err = __cancel_reset(i915, engine); |
950 | if (err) |
951 | return err; |
952 | |
953 | if (igt_flush_test(i915)) |
954 | return -EIO; |
955 | } |
956 | |
957 | return 0; |
958 | } |
959 | |
960 | static struct i915_vma *empty_batch(struct intel_gt *gt) |
961 | { |
962 | struct drm_i915_gem_object *obj; |
963 | struct i915_vma *vma; |
964 | u32 *cmd; |
965 | int err; |
966 | |
967 | obj = i915_gem_object_create_internal(i915: gt->i915, PAGE_SIZE); |
968 | if (IS_ERR(ptr: obj)) |
969 | return ERR_CAST(ptr: obj); |
970 | |
971 | cmd = i915_gem_object_pin_map_unlocked(obj, type: I915_MAP_WC); |
972 | if (IS_ERR(ptr: cmd)) { |
973 | err = PTR_ERR(ptr: cmd); |
974 | goto err; |
975 | } |
976 | |
977 | *cmd = MI_BATCH_BUFFER_END; |
978 | |
979 | __i915_gem_object_flush_map(obj, offset: 0, size: 64); |
980 | i915_gem_object_unpin_map(obj); |
981 | |
982 | intel_gt_chipset_flush(gt); |
983 | |
984 | vma = i915_vma_instance(obj, vm: gt->vm, NULL); |
985 | if (IS_ERR(ptr: vma)) { |
986 | err = PTR_ERR(ptr: vma); |
987 | goto err; |
988 | } |
989 | |
990 | err = i915_vma_pin(vma, size: 0, alignment: 0, PIN_USER); |
991 | if (err) |
992 | goto err; |
993 | |
994 | /* Force the wait now to avoid including it in the benchmark */ |
995 | err = i915_vma_sync(vma); |
996 | if (err) |
997 | goto err_pin; |
998 | |
999 | return vma; |
1000 | |
1001 | err_pin: |
1002 | i915_vma_unpin(vma); |
1003 | err: |
1004 | i915_gem_object_put(obj); |
1005 | return ERR_PTR(error: err); |
1006 | } |
1007 | |
1008 | static int emit_bb_start(struct i915_request *rq, struct i915_vma *batch) |
1009 | { |
1010 | return rq->engine->emit_bb_start(rq, |
1011 | i915_vma_offset(vma: batch), |
1012 | i915_vma_size(vma: batch), |
1013 | 0); |
1014 | } |
1015 | |
1016 | static struct i915_request * |
1017 | empty_request(struct intel_engine_cs *engine, |
1018 | struct i915_vma *batch) |
1019 | { |
1020 | struct i915_request *request; |
1021 | int err; |
1022 | |
1023 | request = i915_request_create(ce: engine->kernel_context); |
1024 | if (IS_ERR(ptr: request)) |
1025 | return request; |
1026 | |
1027 | err = emit_bb_start(rq: request, batch); |
1028 | if (err) |
1029 | goto out_request; |
1030 | |
1031 | i915_request_get(rq: request); |
1032 | out_request: |
1033 | i915_request_add(rq: request); |
1034 | return err ? ERR_PTR(error: err) : request; |
1035 | } |
1036 | |
1037 | static int live_empty_request(void *arg) |
1038 | { |
1039 | struct drm_i915_private *i915 = arg; |
1040 | struct intel_engine_cs *engine; |
1041 | struct igt_live_test t; |
1042 | int err; |
1043 | |
1044 | /* |
1045 | * Submit various sized batches of empty requests, to each engine |
1046 | * (individually), and wait for the batch to complete. We can check |
1047 | * the overhead of submitting requests to the hardware. |
1048 | */ |
1049 | |
1050 | for_each_uabi_engine(engine, i915) { |
1051 | IGT_TIMEOUT(end_time); |
1052 | struct i915_request *request; |
1053 | struct i915_vma *batch; |
1054 | unsigned long n, prime; |
1055 | ktime_t times[2] = {}; |
1056 | |
1057 | batch = empty_batch(gt: engine->gt); |
1058 | if (IS_ERR(ptr: batch)) |
1059 | return PTR_ERR(ptr: batch); |
1060 | |
1061 | err = igt_live_test_begin(t: &t, i915, func: __func__, name: engine->name); |
1062 | if (err) |
1063 | goto out_batch; |
1064 | |
1065 | intel_engine_pm_get(engine); |
1066 | |
1067 | /* Warmup / preload */ |
1068 | request = empty_request(engine, batch); |
1069 | if (IS_ERR(ptr: request)) { |
1070 | err = PTR_ERR(ptr: request); |
1071 | intel_engine_pm_put(engine); |
1072 | goto out_batch; |
1073 | } |
1074 | i915_request_wait(rq: request, flags: 0, MAX_SCHEDULE_TIMEOUT); |
1075 | |
1076 | for_each_prime_number_from(prime, 1, 8192) { |
1077 | times[1] = ktime_get_raw(); |
1078 | |
1079 | for (n = 0; n < prime; n++) { |
1080 | i915_request_put(rq: request); |
1081 | request = empty_request(engine, batch); |
1082 | if (IS_ERR(ptr: request)) { |
1083 | err = PTR_ERR(ptr: request); |
1084 | intel_engine_pm_put(engine); |
1085 | goto out_batch; |
1086 | } |
1087 | } |
1088 | i915_request_wait(rq: request, flags: 0, MAX_SCHEDULE_TIMEOUT); |
1089 | |
1090 | times[1] = ktime_sub(ktime_get_raw(), times[1]); |
1091 | if (prime == 1) |
1092 | times[0] = times[1]; |
1093 | |
1094 | if (__igt_timeout(timeout: end_time, NULL)) |
1095 | break; |
1096 | } |
1097 | i915_request_put(rq: request); |
1098 | intel_engine_pm_put(engine); |
1099 | |
1100 | err = igt_live_test_end(t: &t); |
1101 | if (err) |
1102 | goto out_batch; |
1103 | |
1104 | pr_info("Batch latencies on %s: 1 = %lluns, %lu = %lluns\n" , |
1105 | engine->name, |
1106 | ktime_to_ns(times[0]), |
1107 | prime, div64_u64(ktime_to_ns(times[1]), prime)); |
1108 | out_batch: |
1109 | i915_vma_unpin(vma: batch); |
1110 | i915_vma_put(vma: batch); |
1111 | if (err) |
1112 | break; |
1113 | } |
1114 | |
1115 | return err; |
1116 | } |
1117 | |
1118 | static struct i915_vma *recursive_batch(struct intel_gt *gt) |
1119 | { |
1120 | struct drm_i915_gem_object *obj; |
1121 | const int ver = GRAPHICS_VER(gt->i915); |
1122 | struct i915_vma *vma; |
1123 | u32 *cmd; |
1124 | int err; |
1125 | |
1126 | obj = i915_gem_object_create_internal(i915: gt->i915, PAGE_SIZE); |
1127 | if (IS_ERR(ptr: obj)) |
1128 | return ERR_CAST(ptr: obj); |
1129 | |
1130 | vma = i915_vma_instance(obj, vm: gt->vm, NULL); |
1131 | if (IS_ERR(ptr: vma)) { |
1132 | err = PTR_ERR(ptr: vma); |
1133 | goto err; |
1134 | } |
1135 | |
1136 | err = i915_vma_pin(vma, size: 0, alignment: 0, PIN_USER); |
1137 | if (err) |
1138 | goto err; |
1139 | |
1140 | cmd = i915_gem_object_pin_map_unlocked(obj, type: I915_MAP_WC); |
1141 | if (IS_ERR(ptr: cmd)) { |
1142 | err = PTR_ERR(ptr: cmd); |
1143 | goto err; |
1144 | } |
1145 | |
1146 | if (ver >= 8) { |
1147 | *cmd++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; |
1148 | *cmd++ = lower_32_bits(i915_vma_offset(vma)); |
1149 | *cmd++ = upper_32_bits(i915_vma_offset(vma)); |
1150 | } else if (ver >= 6) { |
1151 | *cmd++ = MI_BATCH_BUFFER_START | 1 << 8; |
1152 | *cmd++ = lower_32_bits(i915_vma_offset(vma)); |
1153 | } else { |
1154 | *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; |
1155 | *cmd++ = lower_32_bits(i915_vma_offset(vma)); |
1156 | } |
1157 | *cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */ |
1158 | |
1159 | __i915_gem_object_flush_map(obj, offset: 0, size: 64); |
1160 | i915_gem_object_unpin_map(obj); |
1161 | |
1162 | intel_gt_chipset_flush(gt); |
1163 | |
1164 | return vma; |
1165 | |
1166 | err: |
1167 | i915_gem_object_put(obj); |
1168 | return ERR_PTR(error: err); |
1169 | } |
1170 | |
1171 | static int recursive_batch_resolve(struct i915_vma *batch) |
1172 | { |
1173 | u32 *cmd; |
1174 | |
1175 | cmd = i915_gem_object_pin_map_unlocked(obj: batch->obj, type: I915_MAP_WC); |
1176 | if (IS_ERR(ptr: cmd)) |
1177 | return PTR_ERR(ptr: cmd); |
1178 | |
1179 | *cmd = MI_BATCH_BUFFER_END; |
1180 | |
1181 | __i915_gem_object_flush_map(obj: batch->obj, offset: 0, size: sizeof(*cmd)); |
1182 | i915_gem_object_unpin_map(obj: batch->obj); |
1183 | |
1184 | intel_gt_chipset_flush(gt: batch->vm->gt); |
1185 | |
1186 | return 0; |
1187 | } |
1188 | |
1189 | static int live_all_engines(void *arg) |
1190 | { |
1191 | struct drm_i915_private *i915 = arg; |
1192 | const unsigned int nengines = num_uabi_engines(i915); |
1193 | struct intel_engine_cs *engine; |
1194 | struct i915_request **request; |
1195 | struct igt_live_test t; |
1196 | unsigned int idx; |
1197 | int err; |
1198 | |
1199 | /* |
1200 | * Check we can submit requests to all engines simultaneously. We |
1201 | * send a recursive batch to each engine - checking that we don't |
1202 | * block doing so, and that they don't complete too soon. |
1203 | */ |
1204 | |
1205 | request = kcalloc(n: nengines, size: sizeof(*request), GFP_KERNEL); |
1206 | if (!request) |
1207 | return -ENOMEM; |
1208 | |
1209 | err = igt_live_test_begin(t: &t, i915, func: __func__, name: "" ); |
1210 | if (err) |
1211 | goto out_free; |
1212 | |
1213 | idx = 0; |
1214 | for_each_uabi_engine(engine, i915) { |
1215 | struct i915_vma *batch; |
1216 | |
1217 | batch = recursive_batch(gt: engine->gt); |
1218 | if (IS_ERR(ptr: batch)) { |
1219 | err = PTR_ERR(ptr: batch); |
1220 | pr_err("%s: Unable to create batch, err=%d\n" , |
1221 | __func__, err); |
1222 | goto out_free; |
1223 | } |
1224 | |
1225 | i915_vma_lock(vma: batch); |
1226 | request[idx] = intel_engine_create_kernel_request(engine); |
1227 | if (IS_ERR(ptr: request[idx])) { |
1228 | err = PTR_ERR(ptr: request[idx]); |
1229 | pr_err("%s: Request allocation failed with err=%d\n" , |
1230 | __func__, err); |
1231 | goto out_unlock; |
1232 | } |
1233 | GEM_BUG_ON(request[idx]->context->vm != batch->vm); |
1234 | |
1235 | err = i915_vma_move_to_active(vma: batch, rq: request[idx], flags: 0); |
1236 | GEM_BUG_ON(err); |
1237 | |
1238 | err = emit_bb_start(rq: request[idx], batch); |
1239 | GEM_BUG_ON(err); |
1240 | request[idx]->batch = batch; |
1241 | |
1242 | i915_request_get(rq: request[idx]); |
1243 | i915_request_add(rq: request[idx]); |
1244 | idx++; |
1245 | out_unlock: |
1246 | i915_vma_unlock(vma: batch); |
1247 | if (err) |
1248 | goto out_request; |
1249 | } |
1250 | |
1251 | idx = 0; |
1252 | for_each_uabi_engine(engine, i915) { |
1253 | if (i915_request_completed(rq: request[idx])) { |
1254 | pr_err("%s(%s): request completed too early!\n" , |
1255 | __func__, engine->name); |
1256 | err = -EINVAL; |
1257 | goto out_request; |
1258 | } |
1259 | idx++; |
1260 | } |
1261 | |
1262 | idx = 0; |
1263 | for_each_uabi_engine(engine, i915) { |
1264 | err = recursive_batch_resolve(batch: request[idx]->batch); |
1265 | if (err) { |
1266 | pr_err("%s: failed to resolve batch, err=%d\n" , |
1267 | __func__, err); |
1268 | goto out_request; |
1269 | } |
1270 | idx++; |
1271 | } |
1272 | |
1273 | idx = 0; |
1274 | for_each_uabi_engine(engine, i915) { |
1275 | struct i915_request *rq = request[idx]; |
1276 | long timeout; |
1277 | |
1278 | timeout = i915_request_wait(rq, flags: 0, |
1279 | MAX_SCHEDULE_TIMEOUT); |
1280 | if (timeout < 0) { |
1281 | err = timeout; |
1282 | pr_err("%s: error waiting for request on %s, err=%d\n" , |
1283 | __func__, engine->name, err); |
1284 | goto out_request; |
1285 | } |
1286 | |
1287 | GEM_BUG_ON(!i915_request_completed(rq)); |
1288 | i915_vma_unpin(vma: rq->batch); |
1289 | i915_vma_put(vma: rq->batch); |
1290 | i915_request_put(rq); |
1291 | request[idx] = NULL; |
1292 | idx++; |
1293 | } |
1294 | |
1295 | err = igt_live_test_end(t: &t); |
1296 | |
1297 | out_request: |
1298 | idx = 0; |
1299 | for_each_uabi_engine(engine, i915) { |
1300 | struct i915_request *rq = request[idx]; |
1301 | |
1302 | if (!rq) |
1303 | continue; |
1304 | |
1305 | if (rq->batch) { |
1306 | i915_vma_unpin(vma: rq->batch); |
1307 | i915_vma_put(vma: rq->batch); |
1308 | } |
1309 | i915_request_put(rq); |
1310 | idx++; |
1311 | } |
1312 | out_free: |
1313 | kfree(objp: request); |
1314 | return err; |
1315 | } |
1316 | |
1317 | static int live_sequential_engines(void *arg) |
1318 | { |
1319 | struct drm_i915_private *i915 = arg; |
1320 | const unsigned int nengines = num_uabi_engines(i915); |
1321 | struct i915_request **request; |
1322 | struct i915_request *prev = NULL; |
1323 | struct intel_engine_cs *engine; |
1324 | struct igt_live_test t; |
1325 | unsigned int idx; |
1326 | int err; |
1327 | |
1328 | /* |
1329 | * Check we can submit requests to all engines sequentially, such |
1330 | * that each successive request waits for the earlier ones. This |
1331 | * tests that we don't execute requests out of order, even though |
1332 | * they are running on independent engines. |
1333 | */ |
1334 | |
1335 | request = kcalloc(n: nengines, size: sizeof(*request), GFP_KERNEL); |
1336 | if (!request) |
1337 | return -ENOMEM; |
1338 | |
1339 | err = igt_live_test_begin(t: &t, i915, func: __func__, name: "" ); |
1340 | if (err) |
1341 | goto out_free; |
1342 | |
1343 | idx = 0; |
1344 | for_each_uabi_engine(engine, i915) { |
1345 | struct i915_vma *batch; |
1346 | |
1347 | batch = recursive_batch(gt: engine->gt); |
1348 | if (IS_ERR(ptr: batch)) { |
1349 | err = PTR_ERR(ptr: batch); |
1350 | pr_err("%s: Unable to create batch for %s, err=%d\n" , |
1351 | __func__, engine->name, err); |
1352 | goto out_free; |
1353 | } |
1354 | |
1355 | i915_vma_lock(vma: batch); |
1356 | request[idx] = intel_engine_create_kernel_request(engine); |
1357 | if (IS_ERR(ptr: request[idx])) { |
1358 | err = PTR_ERR(ptr: request[idx]); |
1359 | pr_err("%s: Request allocation failed for %s with err=%d\n" , |
1360 | __func__, engine->name, err); |
1361 | goto out_unlock; |
1362 | } |
1363 | GEM_BUG_ON(request[idx]->context->vm != batch->vm); |
1364 | |
1365 | if (prev) { |
1366 | err = i915_request_await_dma_fence(rq: request[idx], |
1367 | fence: &prev->fence); |
1368 | if (err) { |
1369 | i915_request_add(rq: request[idx]); |
1370 | pr_err("%s: Request await failed for %s with err=%d\n" , |
1371 | __func__, engine->name, err); |
1372 | goto out_unlock; |
1373 | } |
1374 | } |
1375 | |
1376 | err = i915_vma_move_to_active(vma: batch, rq: request[idx], flags: 0); |
1377 | GEM_BUG_ON(err); |
1378 | |
1379 | err = emit_bb_start(rq: request[idx], batch); |
1380 | GEM_BUG_ON(err); |
1381 | request[idx]->batch = batch; |
1382 | |
1383 | i915_request_get(rq: request[idx]); |
1384 | i915_request_add(rq: request[idx]); |
1385 | |
1386 | prev = request[idx]; |
1387 | idx++; |
1388 | |
1389 | out_unlock: |
1390 | i915_vma_unlock(vma: batch); |
1391 | if (err) |
1392 | goto out_request; |
1393 | } |
1394 | |
1395 | idx = 0; |
1396 | for_each_uabi_engine(engine, i915) { |
1397 | long timeout; |
1398 | |
1399 | if (i915_request_completed(rq: request[idx])) { |
1400 | pr_err("%s(%s): request completed too early!\n" , |
1401 | __func__, engine->name); |
1402 | err = -EINVAL; |
1403 | goto out_request; |
1404 | } |
1405 | |
1406 | err = recursive_batch_resolve(batch: request[idx]->batch); |
1407 | if (err) { |
1408 | pr_err("%s: failed to resolve batch, err=%d\n" , |
1409 | __func__, err); |
1410 | goto out_request; |
1411 | } |
1412 | |
1413 | timeout = i915_request_wait(rq: request[idx], flags: 0, |
1414 | MAX_SCHEDULE_TIMEOUT); |
1415 | if (timeout < 0) { |
1416 | err = timeout; |
1417 | pr_err("%s: error waiting for request on %s, err=%d\n" , |
1418 | __func__, engine->name, err); |
1419 | goto out_request; |
1420 | } |
1421 | |
1422 | GEM_BUG_ON(!i915_request_completed(request[idx])); |
1423 | idx++; |
1424 | } |
1425 | |
1426 | err = igt_live_test_end(t: &t); |
1427 | |
1428 | out_request: |
1429 | idx = 0; |
1430 | for_each_uabi_engine(engine, i915) { |
1431 | u32 *cmd; |
1432 | |
1433 | if (!request[idx]) |
1434 | break; |
1435 | |
1436 | cmd = i915_gem_object_pin_map_unlocked(obj: request[idx]->batch->obj, |
1437 | type: I915_MAP_WC); |
1438 | if (!IS_ERR(ptr: cmd)) { |
1439 | *cmd = MI_BATCH_BUFFER_END; |
1440 | |
1441 | __i915_gem_object_flush_map(obj: request[idx]->batch->obj, |
1442 | offset: 0, size: sizeof(*cmd)); |
1443 | i915_gem_object_unpin_map(obj: request[idx]->batch->obj); |
1444 | |
1445 | intel_gt_chipset_flush(gt: engine->gt); |
1446 | } |
1447 | |
1448 | i915_vma_put(vma: request[idx]->batch); |
1449 | i915_request_put(rq: request[idx]); |
1450 | idx++; |
1451 | } |
1452 | out_free: |
1453 | kfree(objp: request); |
1454 | return err; |
1455 | } |
1456 | |
1457 | struct parallel_thread { |
1458 | struct kthread_worker *worker; |
1459 | struct kthread_work work; |
1460 | struct intel_engine_cs *engine; |
1461 | int result; |
1462 | }; |
1463 | |
1464 | static void __live_parallel_engine1(struct kthread_work *work) |
1465 | { |
1466 | struct parallel_thread *thread = |
1467 | container_of(work, typeof(*thread), work); |
1468 | struct intel_engine_cs *engine = thread->engine; |
1469 | IGT_TIMEOUT(end_time); |
1470 | unsigned long count; |
1471 | int err = 0; |
1472 | |
1473 | count = 0; |
1474 | intel_engine_pm_get(engine); |
1475 | do { |
1476 | struct i915_request *rq; |
1477 | |
1478 | rq = i915_request_create(ce: engine->kernel_context); |
1479 | if (IS_ERR(ptr: rq)) { |
1480 | err = PTR_ERR(ptr: rq); |
1481 | break; |
1482 | } |
1483 | |
1484 | i915_request_get(rq); |
1485 | i915_request_add(rq); |
1486 | |
1487 | err = 0; |
1488 | if (i915_request_wait(rq, flags: 0, HZ) < 0) |
1489 | err = -ETIME; |
1490 | i915_request_put(rq); |
1491 | if (err) |
1492 | break; |
1493 | |
1494 | count++; |
1495 | } while (!__igt_timeout(timeout: end_time, NULL)); |
1496 | intel_engine_pm_put(engine); |
1497 | |
1498 | pr_info("%s: %lu request + sync\n" , engine->name, count); |
1499 | thread->result = err; |
1500 | } |
1501 | |
1502 | static void __live_parallel_engineN(struct kthread_work *work) |
1503 | { |
1504 | struct parallel_thread *thread = |
1505 | container_of(work, typeof(*thread), work); |
1506 | struct intel_engine_cs *engine = thread->engine; |
1507 | IGT_TIMEOUT(end_time); |
1508 | unsigned long count; |
1509 | int err = 0; |
1510 | |
1511 | count = 0; |
1512 | intel_engine_pm_get(engine); |
1513 | do { |
1514 | struct i915_request *rq; |
1515 | |
1516 | rq = i915_request_create(ce: engine->kernel_context); |
1517 | if (IS_ERR(ptr: rq)) { |
1518 | err = PTR_ERR(ptr: rq); |
1519 | break; |
1520 | } |
1521 | |
1522 | i915_request_add(rq); |
1523 | count++; |
1524 | } while (!__igt_timeout(timeout: end_time, NULL)); |
1525 | intel_engine_pm_put(engine); |
1526 | |
1527 | pr_info("%s: %lu requests\n" , engine->name, count); |
1528 | thread->result = err; |
1529 | } |
1530 | |
1531 | static bool wake_all(struct drm_i915_private *i915) |
1532 | { |
1533 | if (atomic_dec_and_test(v: &i915->selftest.counter)) { |
1534 | wake_up_var(var: &i915->selftest.counter); |
1535 | return true; |
1536 | } |
1537 | |
1538 | return false; |
1539 | } |
1540 | |
1541 | static int wait_for_all(struct drm_i915_private *i915) |
1542 | { |
1543 | if (wake_all(i915)) |
1544 | return 0; |
1545 | |
1546 | if (wait_var_event_timeout(&i915->selftest.counter, |
1547 | !atomic_read(&i915->selftest.counter), |
1548 | i915_selftest.timeout_jiffies)) |
1549 | return 0; |
1550 | |
1551 | return -ETIME; |
1552 | } |
1553 | |
1554 | static void __live_parallel_spin(struct kthread_work *work) |
1555 | { |
1556 | struct parallel_thread *thread = |
1557 | container_of(work, typeof(*thread), work); |
1558 | struct intel_engine_cs *engine = thread->engine; |
1559 | struct igt_spinner spin; |
1560 | struct i915_request *rq; |
1561 | int err = 0; |
1562 | |
1563 | /* |
1564 | * Create a spinner running for eternity on each engine. If a second |
1565 | * spinner is incorrectly placed on the same engine, it will not be |
1566 | * able to start in time. |
1567 | */ |
1568 | |
1569 | if (igt_spinner_init(spin: &spin, gt: engine->gt)) { |
1570 | wake_all(i915: engine->i915); |
1571 | thread->result = -ENOMEM; |
1572 | return; |
1573 | } |
1574 | |
1575 | intel_engine_pm_get(engine); |
1576 | rq = igt_spinner_create_request(spin: &spin, |
1577 | ce: engine->kernel_context, |
1578 | MI_NOOP); /* no preemption */ |
1579 | intel_engine_pm_put(engine); |
1580 | if (IS_ERR(ptr: rq)) { |
1581 | err = PTR_ERR(ptr: rq); |
1582 | if (err == -ENODEV) |
1583 | err = 0; |
1584 | wake_all(i915: engine->i915); |
1585 | goto out_spin; |
1586 | } |
1587 | |
1588 | i915_request_get(rq); |
1589 | i915_request_add(rq); |
1590 | if (igt_wait_for_spinner(spin: &spin, rq)) { |
1591 | /* Occupy this engine for the whole test */ |
1592 | err = wait_for_all(i915: engine->i915); |
1593 | } else { |
1594 | pr_err("Failed to start spinner on %s\n" , engine->name); |
1595 | err = -EINVAL; |
1596 | } |
1597 | igt_spinner_end(spin: &spin); |
1598 | |
1599 | if (err == 0 && i915_request_wait(rq, flags: 0, HZ) < 0) |
1600 | err = -EIO; |
1601 | i915_request_put(rq); |
1602 | |
1603 | out_spin: |
1604 | igt_spinner_fini(spin: &spin); |
1605 | thread->result = err; |
1606 | } |
1607 | |
1608 | static int live_parallel_engines(void *arg) |
1609 | { |
1610 | struct drm_i915_private *i915 = arg; |
1611 | static void (* const func[])(struct kthread_work *) = { |
1612 | __live_parallel_engine1, |
1613 | __live_parallel_engineN, |
1614 | __live_parallel_spin, |
1615 | NULL, |
1616 | }; |
1617 | const unsigned int nengines = num_uabi_engines(i915); |
1618 | struct parallel_thread *threads; |
1619 | struct intel_engine_cs *engine; |
1620 | void (* const *fn)(struct kthread_work *); |
1621 | int err = 0; |
1622 | |
1623 | /* |
1624 | * Check we can submit requests to all engines concurrently. This |
1625 | * tests that we load up the system maximally. |
1626 | */ |
1627 | |
1628 | threads = kcalloc(n: nengines, size: sizeof(*threads), GFP_KERNEL); |
1629 | if (!threads) |
1630 | return -ENOMEM; |
1631 | |
1632 | for (fn = func; !err && *fn; fn++) { |
1633 | char name[KSYM_NAME_LEN]; |
1634 | struct igt_live_test t; |
1635 | unsigned int idx; |
1636 | |
1637 | snprintf(buf: name, size: sizeof(name), fmt: "%ps" , *fn); |
1638 | err = igt_live_test_begin(t: &t, i915, func: __func__, name); |
1639 | if (err) |
1640 | break; |
1641 | |
1642 | atomic_set(v: &i915->selftest.counter, i: nengines); |
1643 | |
1644 | idx = 0; |
1645 | for_each_uabi_engine(engine, i915) { |
1646 | struct kthread_worker *worker; |
1647 | |
1648 | worker = kthread_create_worker(flags: 0, namefmt: "igt/parallel:%s" , |
1649 | engine->name); |
1650 | if (IS_ERR(ptr: worker)) { |
1651 | err = PTR_ERR(ptr: worker); |
1652 | break; |
1653 | } |
1654 | |
1655 | threads[idx].worker = worker; |
1656 | threads[idx].result = 0; |
1657 | threads[idx].engine = engine; |
1658 | |
1659 | kthread_init_work(&threads[idx].work, *fn); |
1660 | kthread_queue_work(worker, work: &threads[idx].work); |
1661 | idx++; |
1662 | } |
1663 | |
1664 | idx = 0; |
1665 | for_each_uabi_engine(engine, i915) { |
1666 | int status; |
1667 | |
1668 | if (!threads[idx].worker) |
1669 | break; |
1670 | |
1671 | kthread_flush_work(work: &threads[idx].work); |
1672 | status = READ_ONCE(threads[idx].result); |
1673 | if (status && !err) |
1674 | err = status; |
1675 | |
1676 | kthread_destroy_worker(worker: threads[idx++].worker); |
1677 | } |
1678 | |
1679 | if (igt_live_test_end(t: &t)) |
1680 | err = -EIO; |
1681 | } |
1682 | |
1683 | kfree(objp: threads); |
1684 | return err; |
1685 | } |
1686 | |
1687 | static int |
1688 | max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine) |
1689 | { |
1690 | struct i915_request *rq; |
1691 | int ret; |
1692 | |
1693 | /* |
1694 | * Before execlists, all contexts share the same ringbuffer. With |
1695 | * execlists, each context/engine has a separate ringbuffer and |
1696 | * for the purposes of this test, inexhaustible. |
1697 | * |
1698 | * For the global ringbuffer though, we have to be very careful |
1699 | * that we do not wrap while preventing the execution of requests |
1700 | * with a unsignaled fence. |
1701 | */ |
1702 | if (HAS_EXECLISTS(ctx->i915)) |
1703 | return INT_MAX; |
1704 | |
1705 | rq = igt_request_alloc(ctx, engine); |
1706 | if (IS_ERR(ptr: rq)) { |
1707 | ret = PTR_ERR(ptr: rq); |
1708 | } else { |
1709 | int sz; |
1710 | |
1711 | ret = rq->ring->size - rq->reserved_space; |
1712 | i915_request_add(rq); |
1713 | |
1714 | sz = rq->ring->emit - rq->head; |
1715 | if (sz < 0) |
1716 | sz += rq->ring->size; |
1717 | ret /= sz; |
1718 | ret /= 2; /* leave half spare, in case of emergency! */ |
1719 | } |
1720 | |
1721 | return ret; |
1722 | } |
1723 | |
1724 | static int live_breadcrumbs_smoketest(void *arg) |
1725 | { |
1726 | struct drm_i915_private *i915 = arg; |
1727 | const unsigned int nengines = num_uabi_engines(i915); |
1728 | const unsigned int ncpus = /* saturate with nengines * ncpus */ |
1729 | max_t(int, 2, DIV_ROUND_UP(num_online_cpus(), nengines)); |
1730 | unsigned long num_waits, num_fences; |
1731 | struct intel_engine_cs *engine; |
1732 | struct smoke_thread *threads; |
1733 | struct igt_live_test live; |
1734 | intel_wakeref_t wakeref; |
1735 | struct smoketest *smoke; |
1736 | unsigned int n, idx; |
1737 | struct file *file; |
1738 | int ret = 0; |
1739 | |
1740 | /* |
1741 | * Smoketest our breadcrumb/signal handling for requests across multiple |
1742 | * threads. A very simple test to only catch the most egregious of bugs. |
1743 | * See __igt_breadcrumbs_smoketest(); |
1744 | * |
1745 | * On real hardware this time. |
1746 | */ |
1747 | |
1748 | wakeref = intel_runtime_pm_get(rpm: &i915->runtime_pm); |
1749 | |
1750 | file = mock_file(i915); |
1751 | if (IS_ERR(ptr: file)) { |
1752 | ret = PTR_ERR(ptr: file); |
1753 | goto out_rpm; |
1754 | } |
1755 | |
1756 | smoke = kcalloc(n: nengines, size: sizeof(*smoke), GFP_KERNEL); |
1757 | if (!smoke) { |
1758 | ret = -ENOMEM; |
1759 | goto out_file; |
1760 | } |
1761 | |
1762 | threads = kcalloc(n: ncpus * nengines, size: sizeof(*threads), GFP_KERNEL); |
1763 | if (!threads) { |
1764 | ret = -ENOMEM; |
1765 | goto out_smoke; |
1766 | } |
1767 | |
1768 | smoke[0].request_alloc = __live_request_alloc; |
1769 | smoke[0].ncontexts = 64; |
1770 | smoke[0].contexts = kcalloc(n: smoke[0].ncontexts, |
1771 | size: sizeof(*smoke[0].contexts), |
1772 | GFP_KERNEL); |
1773 | if (!smoke[0].contexts) { |
1774 | ret = -ENOMEM; |
1775 | goto out_threads; |
1776 | } |
1777 | |
1778 | for (n = 0; n < smoke[0].ncontexts; n++) { |
1779 | smoke[0].contexts[n] = live_context(i915, file); |
1780 | if (IS_ERR(ptr: smoke[0].contexts[n])) { |
1781 | ret = PTR_ERR(ptr: smoke[0].contexts[n]); |
1782 | goto out_contexts; |
1783 | } |
1784 | } |
1785 | |
1786 | ret = igt_live_test_begin(t: &live, i915, func: __func__, name: "" ); |
1787 | if (ret) |
1788 | goto out_contexts; |
1789 | |
1790 | idx = 0; |
1791 | for_each_uabi_engine(engine, i915) { |
1792 | smoke[idx] = smoke[0]; |
1793 | smoke[idx].engine = engine; |
1794 | smoke[idx].max_batch = |
1795 | max_batches(ctx: smoke[0].contexts[0], engine); |
1796 | if (smoke[idx].max_batch < 0) { |
1797 | ret = smoke[idx].max_batch; |
1798 | goto out_flush; |
1799 | } |
1800 | /* One ring interleaved between requests from all cpus */ |
1801 | smoke[idx].max_batch /= ncpus + 1; |
1802 | pr_debug("Limiting batches to %d requests on %s\n" , |
1803 | smoke[idx].max_batch, engine->name); |
1804 | |
1805 | for (n = 0; n < ncpus; n++) { |
1806 | unsigned int i = idx * ncpus + n; |
1807 | struct kthread_worker *worker; |
1808 | |
1809 | worker = kthread_create_worker(flags: 0, namefmt: "igt/%d.%d" , idx, n); |
1810 | if (IS_ERR(ptr: worker)) { |
1811 | ret = PTR_ERR(ptr: worker); |
1812 | goto out_flush; |
1813 | } |
1814 | |
1815 | threads[i].worker = worker; |
1816 | threads[i].t = &smoke[idx]; |
1817 | |
1818 | kthread_init_work(&threads[i].work, |
1819 | __igt_breadcrumbs_smoketest); |
1820 | kthread_queue_work(worker, work: &threads[i].work); |
1821 | } |
1822 | |
1823 | idx++; |
1824 | } |
1825 | |
1826 | msleep(msecs: jiffies_to_msecs(j: i915_selftest.timeout_jiffies)); |
1827 | |
1828 | out_flush: |
1829 | idx = 0; |
1830 | num_waits = 0; |
1831 | num_fences = 0; |
1832 | for_each_uabi_engine(engine, i915) { |
1833 | for (n = 0; n < ncpus; n++) { |
1834 | unsigned int i = idx * ncpus + n; |
1835 | int err; |
1836 | |
1837 | if (!threads[i].worker) |
1838 | continue; |
1839 | |
1840 | WRITE_ONCE(threads[i].stop, true); |
1841 | kthread_flush_work(work: &threads[i].work); |
1842 | err = READ_ONCE(threads[i].result); |
1843 | if (err < 0 && !ret) |
1844 | ret = err; |
1845 | |
1846 | kthread_destroy_worker(worker: threads[i].worker); |
1847 | } |
1848 | |
1849 | num_waits += atomic_long_read(v: &smoke[idx].num_waits); |
1850 | num_fences += atomic_long_read(v: &smoke[idx].num_fences); |
1851 | idx++; |
1852 | } |
1853 | pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n" , |
1854 | num_waits, num_fences, idx, ncpus); |
1855 | |
1856 | ret = igt_live_test_end(t: &live) ?: ret; |
1857 | out_contexts: |
1858 | kfree(objp: smoke[0].contexts); |
1859 | out_threads: |
1860 | kfree(objp: threads); |
1861 | out_smoke: |
1862 | kfree(objp: smoke); |
1863 | out_file: |
1864 | fput(file); |
1865 | out_rpm: |
1866 | intel_runtime_pm_put(rpm: &i915->runtime_pm, wref: wakeref); |
1867 | |
1868 | return ret; |
1869 | } |
1870 | |
1871 | int i915_request_live_selftests(struct drm_i915_private *i915) |
1872 | { |
1873 | static const struct i915_subtest tests[] = { |
1874 | SUBTEST(live_nop_request), |
1875 | SUBTEST(live_all_engines), |
1876 | SUBTEST(live_sequential_engines), |
1877 | SUBTEST(live_parallel_engines), |
1878 | SUBTEST(live_empty_request), |
1879 | SUBTEST(live_cancel_request), |
1880 | SUBTEST(live_breadcrumbs_smoketest), |
1881 | }; |
1882 | |
1883 | if (intel_gt_is_wedged(gt: to_gt(i915))) |
1884 | return 0; |
1885 | |
1886 | return i915_live_subtests(tests, i915); |
1887 | } |
1888 | |
1889 | static int switch_to_kernel_sync(struct intel_context *ce, int err) |
1890 | { |
1891 | struct i915_request *rq; |
1892 | struct dma_fence *fence; |
1893 | |
1894 | rq = intel_engine_create_kernel_request(engine: ce->engine); |
1895 | if (IS_ERR(ptr: rq)) |
1896 | return PTR_ERR(ptr: rq); |
1897 | |
1898 | fence = i915_active_fence_get(active: &ce->timeline->last_request); |
1899 | if (fence) { |
1900 | i915_request_await_dma_fence(rq, fence); |
1901 | dma_fence_put(fence); |
1902 | } |
1903 | |
1904 | rq = i915_request_get(rq); |
1905 | i915_request_add(rq); |
1906 | if (i915_request_wait(rq, flags: 0, HZ / 2) < 0 && !err) |
1907 | err = -ETIME; |
1908 | i915_request_put(rq); |
1909 | |
1910 | while (!err && !intel_engine_is_idle(engine: ce->engine)) |
1911 | intel_engine_flush_submission(engine: ce->engine); |
1912 | |
1913 | return err; |
1914 | } |
1915 | |
1916 | struct perf_stats { |
1917 | struct intel_engine_cs *engine; |
1918 | unsigned long count; |
1919 | ktime_t time; |
1920 | ktime_t busy; |
1921 | u64 runtime; |
1922 | }; |
1923 | |
1924 | struct perf_series { |
1925 | struct drm_i915_private *i915; |
1926 | unsigned int nengines; |
1927 | struct intel_context *ce[] __counted_by(nengines); |
1928 | }; |
1929 | |
1930 | static int cmp_u32(const void *A, const void *B) |
1931 | { |
1932 | const u32 *a = A, *b = B; |
1933 | |
1934 | return *a - *b; |
1935 | } |
1936 | |
1937 | static u32 trifilter(u32 *a) |
1938 | { |
1939 | u64 sum; |
1940 | |
1941 | #define TF_COUNT 5 |
1942 | sort(base: a, TF_COUNT, size: sizeof(*a), cmp_func: cmp_u32, NULL); |
1943 | |
1944 | sum = mul_u32_u32(a: a[2], b: 2); |
1945 | sum += a[1]; |
1946 | sum += a[3]; |
1947 | |
1948 | GEM_BUG_ON(sum > U32_MAX); |
1949 | return sum; |
1950 | #define TF_BIAS 2 |
1951 | } |
1952 | |
1953 | static u64 cycles_to_ns(struct intel_engine_cs *engine, u32 cycles) |
1954 | { |
1955 | u64 ns = intel_gt_clock_interval_to_ns(gt: engine->gt, count: cycles); |
1956 | |
1957 | return DIV_ROUND_CLOSEST(ns, 1 << TF_BIAS); |
1958 | } |
1959 | |
1960 | static u32 *emit_timestamp_store(u32 *cs, struct intel_context *ce, u32 offset) |
1961 | { |
1962 | *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; |
1963 | *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP((ce->engine->mmio_base))); |
1964 | *cs++ = offset; |
1965 | *cs++ = 0; |
1966 | |
1967 | return cs; |
1968 | } |
1969 | |
1970 | static u32 *emit_store_dw(u32 *cs, u32 offset, u32 value) |
1971 | { |
1972 | *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; |
1973 | *cs++ = offset; |
1974 | *cs++ = 0; |
1975 | *cs++ = value; |
1976 | |
1977 | return cs; |
1978 | } |
1979 | |
1980 | static u32 *emit_semaphore_poll(u32 *cs, u32 mode, u32 value, u32 offset) |
1981 | { |
1982 | *cs++ = MI_SEMAPHORE_WAIT | |
1983 | MI_SEMAPHORE_GLOBAL_GTT | |
1984 | MI_SEMAPHORE_POLL | |
1985 | mode; |
1986 | *cs++ = value; |
1987 | *cs++ = offset; |
1988 | *cs++ = 0; |
1989 | |
1990 | return cs; |
1991 | } |
1992 | |
1993 | static u32 *emit_semaphore_poll_until(u32 *cs, u32 offset, u32 value) |
1994 | { |
1995 | return emit_semaphore_poll(cs, MI_SEMAPHORE_SAD_EQ_SDD, value, offset); |
1996 | } |
1997 | |
1998 | static void semaphore_set(u32 *sema, u32 value) |
1999 | { |
2000 | WRITE_ONCE(*sema, value); |
2001 | wmb(); /* flush the update to the cache, and beyond */ |
2002 | } |
2003 | |
2004 | static u32 *hwsp_scratch(const struct intel_context *ce) |
2005 | { |
2006 | return memset32(s: ce->engine->status_page.addr + 1000, v: 0, n: 21); |
2007 | } |
2008 | |
2009 | static u32 hwsp_offset(const struct intel_context *ce, u32 *dw) |
2010 | { |
2011 | return (i915_ggtt_offset(vma: ce->engine->status_page.vma) + |
2012 | offset_in_page(dw)); |
2013 | } |
2014 | |
2015 | static int measure_semaphore_response(struct intel_context *ce) |
2016 | { |
2017 | u32 *sema = hwsp_scratch(ce); |
2018 | const u32 offset = hwsp_offset(ce, dw: sema); |
2019 | u32 elapsed[TF_COUNT], cycles; |
2020 | struct i915_request *rq; |
2021 | u32 *cs; |
2022 | int err; |
2023 | int i; |
2024 | |
2025 | /* |
2026 | * Measure how many cycles it takes for the HW to detect the change |
2027 | * in a semaphore value. |
2028 | * |
2029 | * A: read CS_TIMESTAMP from CPU |
2030 | * poke semaphore |
2031 | * B: read CS_TIMESTAMP on GPU |
2032 | * |
2033 | * Semaphore latency: B - A |
2034 | */ |
2035 | |
2036 | semaphore_set(sema, value: -1); |
2037 | |
2038 | rq = i915_request_create(ce); |
2039 | if (IS_ERR(ptr: rq)) |
2040 | return PTR_ERR(ptr: rq); |
2041 | |
2042 | cs = intel_ring_begin(rq, num_dwords: 4 + 12 * ARRAY_SIZE(elapsed)); |
2043 | if (IS_ERR(ptr: cs)) { |
2044 | i915_request_add(rq); |
2045 | err = PTR_ERR(ptr: cs); |
2046 | goto err; |
2047 | } |
2048 | |
2049 | cs = emit_store_dw(cs, offset, value: 0); |
2050 | for (i = 1; i <= ARRAY_SIZE(elapsed); i++) { |
2051 | cs = emit_semaphore_poll_until(cs, offset, value: i); |
2052 | cs = emit_timestamp_store(cs, ce, offset: offset + i * sizeof(u32)); |
2053 | cs = emit_store_dw(cs, offset, value: 0); |
2054 | } |
2055 | |
2056 | intel_ring_advance(rq, cs); |
2057 | i915_request_add(rq); |
2058 | |
2059 | if (wait_for(READ_ONCE(*sema) == 0, 50)) { |
2060 | err = -EIO; |
2061 | goto err; |
2062 | } |
2063 | |
2064 | for (i = 1; i <= ARRAY_SIZE(elapsed); i++) { |
2065 | preempt_disable(); |
2066 | cycles = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP); |
2067 | semaphore_set(sema, value: i); |
2068 | preempt_enable(); |
2069 | |
2070 | if (wait_for(READ_ONCE(*sema) == 0, 50)) { |
2071 | err = -EIO; |
2072 | goto err; |
2073 | } |
2074 | |
2075 | elapsed[i - 1] = sema[i] - cycles; |
2076 | } |
2077 | |
2078 | cycles = trifilter(a: elapsed); |
2079 | pr_info("%s: semaphore response %d cycles, %lluns\n" , |
2080 | ce->engine->name, cycles >> TF_BIAS, |
2081 | cycles_to_ns(ce->engine, cycles)); |
2082 | |
2083 | return intel_gt_wait_for_idle(gt: ce->engine->gt, HZ); |
2084 | |
2085 | err: |
2086 | intel_gt_set_wedged(gt: ce->engine->gt); |
2087 | return err; |
2088 | } |
2089 | |
2090 | static int measure_idle_dispatch(struct intel_context *ce) |
2091 | { |
2092 | u32 *sema = hwsp_scratch(ce); |
2093 | const u32 offset = hwsp_offset(ce, dw: sema); |
2094 | u32 elapsed[TF_COUNT], cycles; |
2095 | u32 *cs; |
2096 | int err; |
2097 | int i; |
2098 | |
2099 | /* |
2100 | * Measure how long it takes for us to submit a request while the |
2101 | * engine is idle, but is resting in our context. |
2102 | * |
2103 | * A: read CS_TIMESTAMP from CPU |
2104 | * submit request |
2105 | * B: read CS_TIMESTAMP on GPU |
2106 | * |
2107 | * Submission latency: B - A |
2108 | */ |
2109 | |
2110 | for (i = 0; i < ARRAY_SIZE(elapsed); i++) { |
2111 | struct i915_request *rq; |
2112 | |
2113 | err = intel_gt_wait_for_idle(gt: ce->engine->gt, HZ / 2); |
2114 | if (err) |
2115 | return err; |
2116 | |
2117 | rq = i915_request_create(ce); |
2118 | if (IS_ERR(ptr: rq)) { |
2119 | err = PTR_ERR(ptr: rq); |
2120 | goto err; |
2121 | } |
2122 | |
2123 | cs = intel_ring_begin(rq, num_dwords: 4); |
2124 | if (IS_ERR(ptr: cs)) { |
2125 | i915_request_add(rq); |
2126 | err = PTR_ERR(ptr: cs); |
2127 | goto err; |
2128 | } |
2129 | |
2130 | cs = emit_timestamp_store(cs, ce, offset: offset + i * sizeof(u32)); |
2131 | |
2132 | intel_ring_advance(rq, cs); |
2133 | |
2134 | preempt_disable(); |
2135 | local_bh_disable(); |
2136 | elapsed[i] = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP); |
2137 | i915_request_add(rq); |
2138 | local_bh_enable(); |
2139 | preempt_enable(); |
2140 | } |
2141 | |
2142 | err = intel_gt_wait_for_idle(gt: ce->engine->gt, HZ / 2); |
2143 | if (err) |
2144 | goto err; |
2145 | |
2146 | for (i = 0; i < ARRAY_SIZE(elapsed); i++) |
2147 | elapsed[i] = sema[i] - elapsed[i]; |
2148 | |
2149 | cycles = trifilter(a: elapsed); |
2150 | pr_info("%s: idle dispatch latency %d cycles, %lluns\n" , |
2151 | ce->engine->name, cycles >> TF_BIAS, |
2152 | cycles_to_ns(ce->engine, cycles)); |
2153 | |
2154 | return intel_gt_wait_for_idle(gt: ce->engine->gt, HZ); |
2155 | |
2156 | err: |
2157 | intel_gt_set_wedged(gt: ce->engine->gt); |
2158 | return err; |
2159 | } |
2160 | |
2161 | static int measure_busy_dispatch(struct intel_context *ce) |
2162 | { |
2163 | u32 *sema = hwsp_scratch(ce); |
2164 | const u32 offset = hwsp_offset(ce, dw: sema); |
2165 | u32 elapsed[TF_COUNT + 1], cycles; |
2166 | u32 *cs; |
2167 | int err; |
2168 | int i; |
2169 | |
2170 | /* |
2171 | * Measure how long it takes for us to submit a request while the |
2172 | * engine is busy, polling on a semaphore in our context. With |
2173 | * direct submission, this will include the cost of a lite restore. |
2174 | * |
2175 | * A: read CS_TIMESTAMP from CPU |
2176 | * submit request |
2177 | * B: read CS_TIMESTAMP on GPU |
2178 | * |
2179 | * Submission latency: B - A |
2180 | */ |
2181 | |
2182 | for (i = 1; i <= ARRAY_SIZE(elapsed); i++) { |
2183 | struct i915_request *rq; |
2184 | |
2185 | rq = i915_request_create(ce); |
2186 | if (IS_ERR(ptr: rq)) { |
2187 | err = PTR_ERR(ptr: rq); |
2188 | goto err; |
2189 | } |
2190 | |
2191 | cs = intel_ring_begin(rq, num_dwords: 12); |
2192 | if (IS_ERR(ptr: cs)) { |
2193 | i915_request_add(rq); |
2194 | err = PTR_ERR(ptr: cs); |
2195 | goto err; |
2196 | } |
2197 | |
2198 | cs = emit_store_dw(cs, offset: offset + i * sizeof(u32), value: -1); |
2199 | cs = emit_semaphore_poll_until(cs, offset, value: i); |
2200 | cs = emit_timestamp_store(cs, ce, offset: offset + i * sizeof(u32)); |
2201 | |
2202 | intel_ring_advance(rq, cs); |
2203 | |
2204 | if (i > 1 && wait_for(READ_ONCE(sema[i - 1]), 500)) { |
2205 | err = -EIO; |
2206 | goto err; |
2207 | } |
2208 | |
2209 | preempt_disable(); |
2210 | local_bh_disable(); |
2211 | elapsed[i - 1] = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP); |
2212 | i915_request_add(rq); |
2213 | local_bh_enable(); |
2214 | semaphore_set(sema, value: i - 1); |
2215 | preempt_enable(); |
2216 | } |
2217 | |
2218 | wait_for(READ_ONCE(sema[i - 1]), 500); |
2219 | semaphore_set(sema, value: i - 1); |
2220 | |
2221 | for (i = 1; i <= TF_COUNT; i++) { |
2222 | GEM_BUG_ON(sema[i] == -1); |
2223 | elapsed[i - 1] = sema[i] - elapsed[i]; |
2224 | } |
2225 | |
2226 | cycles = trifilter(a: elapsed); |
2227 | pr_info("%s: busy dispatch latency %d cycles, %lluns\n" , |
2228 | ce->engine->name, cycles >> TF_BIAS, |
2229 | cycles_to_ns(ce->engine, cycles)); |
2230 | |
2231 | return intel_gt_wait_for_idle(gt: ce->engine->gt, HZ); |
2232 | |
2233 | err: |
2234 | intel_gt_set_wedged(gt: ce->engine->gt); |
2235 | return err; |
2236 | } |
2237 | |
2238 | static int plug(struct intel_engine_cs *engine, u32 *sema, u32 mode, int value) |
2239 | { |
2240 | const u32 offset = |
2241 | i915_ggtt_offset(vma: engine->status_page.vma) + |
2242 | offset_in_page(sema); |
2243 | struct i915_request *rq; |
2244 | u32 *cs; |
2245 | |
2246 | rq = i915_request_create(ce: engine->kernel_context); |
2247 | if (IS_ERR(ptr: rq)) |
2248 | return PTR_ERR(ptr: rq); |
2249 | |
2250 | cs = intel_ring_begin(rq, num_dwords: 4); |
2251 | if (IS_ERR(ptr: cs)) { |
2252 | i915_request_add(rq); |
2253 | return PTR_ERR(ptr: cs); |
2254 | } |
2255 | |
2256 | cs = emit_semaphore_poll(cs, mode, value, offset); |
2257 | |
2258 | intel_ring_advance(rq, cs); |
2259 | i915_request_add(rq); |
2260 | |
2261 | return 0; |
2262 | } |
2263 | |
2264 | static int measure_inter_request(struct intel_context *ce) |
2265 | { |
2266 | u32 *sema = hwsp_scratch(ce); |
2267 | const u32 offset = hwsp_offset(ce, dw: sema); |
2268 | u32 elapsed[TF_COUNT + 1], cycles; |
2269 | struct i915_sw_fence *submit; |
2270 | int i, err; |
2271 | |
2272 | /* |
2273 | * Measure how long it takes to advance from one request into the |
2274 | * next. Between each request we flush the GPU caches to memory, |
2275 | * update the breadcrumbs, and then invalidate those caches. |
2276 | * We queue up all the requests to be submitted in one batch so |
2277 | * it should be one set of contiguous measurements. |
2278 | * |
2279 | * A: read CS_TIMESTAMP on GPU |
2280 | * advance request |
2281 | * B: read CS_TIMESTAMP on GPU |
2282 | * |
2283 | * Request latency: B - A |
2284 | */ |
2285 | |
2286 | err = plug(engine: ce->engine, sema, MI_SEMAPHORE_SAD_NEQ_SDD, value: 0); |
2287 | if (err) |
2288 | return err; |
2289 | |
2290 | submit = heap_fence_create(GFP_KERNEL); |
2291 | if (!submit) { |
2292 | semaphore_set(sema, value: 1); |
2293 | return -ENOMEM; |
2294 | } |
2295 | |
2296 | intel_engine_flush_submission(engine: ce->engine); |
2297 | for (i = 1; i <= ARRAY_SIZE(elapsed); i++) { |
2298 | struct i915_request *rq; |
2299 | u32 *cs; |
2300 | |
2301 | rq = i915_request_create(ce); |
2302 | if (IS_ERR(ptr: rq)) { |
2303 | err = PTR_ERR(ptr: rq); |
2304 | goto err_submit; |
2305 | } |
2306 | |
2307 | err = i915_sw_fence_await_sw_fence_gfp(fence: &rq->submit, |
2308 | after: submit, |
2309 | GFP_KERNEL); |
2310 | if (err < 0) { |
2311 | i915_request_add(rq); |
2312 | goto err_submit; |
2313 | } |
2314 | |
2315 | cs = intel_ring_begin(rq, num_dwords: 4); |
2316 | if (IS_ERR(ptr: cs)) { |
2317 | i915_request_add(rq); |
2318 | err = PTR_ERR(ptr: cs); |
2319 | goto err_submit; |
2320 | } |
2321 | |
2322 | cs = emit_timestamp_store(cs, ce, offset: offset + i * sizeof(u32)); |
2323 | |
2324 | intel_ring_advance(rq, cs); |
2325 | i915_request_add(rq); |
2326 | } |
2327 | i915_sw_fence_commit(fence: submit); |
2328 | intel_engine_flush_submission(engine: ce->engine); |
2329 | heap_fence_put(fence: submit); |
2330 | |
2331 | semaphore_set(sema, value: 1); |
2332 | err = intel_gt_wait_for_idle(gt: ce->engine->gt, HZ / 2); |
2333 | if (err) |
2334 | goto err; |
2335 | |
2336 | for (i = 1; i <= TF_COUNT; i++) |
2337 | elapsed[i - 1] = sema[i + 1] - sema[i]; |
2338 | |
2339 | cycles = trifilter(a: elapsed); |
2340 | pr_info("%s: inter-request latency %d cycles, %lluns\n" , |
2341 | ce->engine->name, cycles >> TF_BIAS, |
2342 | cycles_to_ns(ce->engine, cycles)); |
2343 | |
2344 | return intel_gt_wait_for_idle(gt: ce->engine->gt, HZ); |
2345 | |
2346 | err_submit: |
2347 | i915_sw_fence_commit(fence: submit); |
2348 | heap_fence_put(fence: submit); |
2349 | semaphore_set(sema, value: 1); |
2350 | err: |
2351 | intel_gt_set_wedged(gt: ce->engine->gt); |
2352 | return err; |
2353 | } |
2354 | |
2355 | static int measure_context_switch(struct intel_context *ce) |
2356 | { |
2357 | u32 *sema = hwsp_scratch(ce); |
2358 | const u32 offset = hwsp_offset(ce, dw: sema); |
2359 | struct i915_request *fence = NULL; |
2360 | u32 elapsed[TF_COUNT + 1], cycles; |
2361 | int i, j, err; |
2362 | u32 *cs; |
2363 | |
2364 | /* |
2365 | * Measure how long it takes to advance from one request in one |
2366 | * context to a request in another context. This allows us to |
2367 | * measure how long the context save/restore take, along with all |
2368 | * the inter-context setup we require. |
2369 | * |
2370 | * A: read CS_TIMESTAMP on GPU |
2371 | * switch context |
2372 | * B: read CS_TIMESTAMP on GPU |
2373 | * |
2374 | * Context switch latency: B - A |
2375 | */ |
2376 | |
2377 | err = plug(engine: ce->engine, sema, MI_SEMAPHORE_SAD_NEQ_SDD, value: 0); |
2378 | if (err) |
2379 | return err; |
2380 | |
2381 | for (i = 1; i <= ARRAY_SIZE(elapsed); i++) { |
2382 | struct intel_context *arr[] = { |
2383 | ce, ce->engine->kernel_context |
2384 | }; |
2385 | u32 addr = offset + ARRAY_SIZE(arr) * i * sizeof(u32); |
2386 | |
2387 | for (j = 0; j < ARRAY_SIZE(arr); j++) { |
2388 | struct i915_request *rq; |
2389 | |
2390 | rq = i915_request_create(ce: arr[j]); |
2391 | if (IS_ERR(ptr: rq)) { |
2392 | err = PTR_ERR(ptr: rq); |
2393 | goto err_fence; |
2394 | } |
2395 | |
2396 | if (fence) { |
2397 | err = i915_request_await_dma_fence(rq, |
2398 | fence: &fence->fence); |
2399 | if (err) { |
2400 | i915_request_add(rq); |
2401 | goto err_fence; |
2402 | } |
2403 | } |
2404 | |
2405 | cs = intel_ring_begin(rq, num_dwords: 4); |
2406 | if (IS_ERR(ptr: cs)) { |
2407 | i915_request_add(rq); |
2408 | err = PTR_ERR(ptr: cs); |
2409 | goto err_fence; |
2410 | } |
2411 | |
2412 | cs = emit_timestamp_store(cs, ce, offset: addr); |
2413 | addr += sizeof(u32); |
2414 | |
2415 | intel_ring_advance(rq, cs); |
2416 | |
2417 | i915_request_put(rq: fence); |
2418 | fence = i915_request_get(rq); |
2419 | |
2420 | i915_request_add(rq); |
2421 | } |
2422 | } |
2423 | i915_request_put(rq: fence); |
2424 | intel_engine_flush_submission(engine: ce->engine); |
2425 | |
2426 | semaphore_set(sema, value: 1); |
2427 | err = intel_gt_wait_for_idle(gt: ce->engine->gt, HZ / 2); |
2428 | if (err) |
2429 | goto err; |
2430 | |
2431 | for (i = 1; i <= TF_COUNT; i++) |
2432 | elapsed[i - 1] = sema[2 * i + 2] - sema[2 * i + 1]; |
2433 | |
2434 | cycles = trifilter(a: elapsed); |
2435 | pr_info("%s: context switch latency %d cycles, %lluns\n" , |
2436 | ce->engine->name, cycles >> TF_BIAS, |
2437 | cycles_to_ns(ce->engine, cycles)); |
2438 | |
2439 | return intel_gt_wait_for_idle(gt: ce->engine->gt, HZ); |
2440 | |
2441 | err_fence: |
2442 | i915_request_put(rq: fence); |
2443 | semaphore_set(sema, value: 1); |
2444 | err: |
2445 | intel_gt_set_wedged(gt: ce->engine->gt); |
2446 | return err; |
2447 | } |
2448 | |
2449 | static int measure_preemption(struct intel_context *ce) |
2450 | { |
2451 | u32 *sema = hwsp_scratch(ce); |
2452 | const u32 offset = hwsp_offset(ce, dw: sema); |
2453 | u32 elapsed[TF_COUNT], cycles; |
2454 | u32 *cs; |
2455 | int err; |
2456 | int i; |
2457 | |
2458 | /* |
2459 | * We measure two latencies while triggering preemption. The first |
2460 | * latency is how long it takes for us to submit a preempting request. |
2461 | * The second latency is how it takes for us to return from the |
2462 | * preemption back to the original context. |
2463 | * |
2464 | * A: read CS_TIMESTAMP from CPU |
2465 | * submit preemption |
2466 | * B: read CS_TIMESTAMP on GPU (in preempting context) |
2467 | * context switch |
2468 | * C: read CS_TIMESTAMP on GPU (in original context) |
2469 | * |
2470 | * Preemption dispatch latency: B - A |
2471 | * Preemption switch latency: C - B |
2472 | */ |
2473 | |
2474 | if (!intel_engine_has_preemption(engine: ce->engine)) |
2475 | return 0; |
2476 | |
2477 | for (i = 1; i <= ARRAY_SIZE(elapsed); i++) { |
2478 | u32 addr = offset + 2 * i * sizeof(u32); |
2479 | struct i915_request *rq; |
2480 | |
2481 | rq = i915_request_create(ce); |
2482 | if (IS_ERR(ptr: rq)) { |
2483 | err = PTR_ERR(ptr: rq); |
2484 | goto err; |
2485 | } |
2486 | |
2487 | cs = intel_ring_begin(rq, num_dwords: 12); |
2488 | if (IS_ERR(ptr: cs)) { |
2489 | i915_request_add(rq); |
2490 | err = PTR_ERR(ptr: cs); |
2491 | goto err; |
2492 | } |
2493 | |
2494 | cs = emit_store_dw(cs, offset: addr, value: -1); |
2495 | cs = emit_semaphore_poll_until(cs, offset, value: i); |
2496 | cs = emit_timestamp_store(cs, ce, offset: addr + sizeof(u32)); |
2497 | |
2498 | intel_ring_advance(rq, cs); |
2499 | i915_request_add(rq); |
2500 | |
2501 | if (wait_for(READ_ONCE(sema[2 * i]) == -1, 500)) { |
2502 | err = -EIO; |
2503 | goto err; |
2504 | } |
2505 | |
2506 | rq = i915_request_create(ce: ce->engine->kernel_context); |
2507 | if (IS_ERR(ptr: rq)) { |
2508 | err = PTR_ERR(ptr: rq); |
2509 | goto err; |
2510 | } |
2511 | |
2512 | cs = intel_ring_begin(rq, num_dwords: 8); |
2513 | if (IS_ERR(ptr: cs)) { |
2514 | i915_request_add(rq); |
2515 | err = PTR_ERR(ptr: cs); |
2516 | goto err; |
2517 | } |
2518 | |
2519 | cs = emit_timestamp_store(cs, ce, offset: addr); |
2520 | cs = emit_store_dw(cs, offset, value: i); |
2521 | |
2522 | intel_ring_advance(rq, cs); |
2523 | rq->sched.attr.priority = I915_PRIORITY_BARRIER; |
2524 | |
2525 | elapsed[i - 1] = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP); |
2526 | i915_request_add(rq); |
2527 | } |
2528 | |
2529 | if (wait_for(READ_ONCE(sema[2 * i - 2]) != -1, 500)) { |
2530 | err = -EIO; |
2531 | goto err; |
2532 | } |
2533 | |
2534 | for (i = 1; i <= TF_COUNT; i++) |
2535 | elapsed[i - 1] = sema[2 * i + 0] - elapsed[i - 1]; |
2536 | |
2537 | cycles = trifilter(a: elapsed); |
2538 | pr_info("%s: preemption dispatch latency %d cycles, %lluns\n" , |
2539 | ce->engine->name, cycles >> TF_BIAS, |
2540 | cycles_to_ns(ce->engine, cycles)); |
2541 | |
2542 | for (i = 1; i <= TF_COUNT; i++) |
2543 | elapsed[i - 1] = sema[2 * i + 1] - sema[2 * i + 0]; |
2544 | |
2545 | cycles = trifilter(a: elapsed); |
2546 | pr_info("%s: preemption switch latency %d cycles, %lluns\n" , |
2547 | ce->engine->name, cycles >> TF_BIAS, |
2548 | cycles_to_ns(ce->engine, cycles)); |
2549 | |
2550 | return intel_gt_wait_for_idle(gt: ce->engine->gt, HZ); |
2551 | |
2552 | err: |
2553 | intel_gt_set_wedged(gt: ce->engine->gt); |
2554 | return err; |
2555 | } |
2556 | |
2557 | struct signal_cb { |
2558 | struct dma_fence_cb base; |
2559 | bool seen; |
2560 | }; |
2561 | |
2562 | static void signal_cb(struct dma_fence *fence, struct dma_fence_cb *cb) |
2563 | { |
2564 | struct signal_cb *s = container_of(cb, typeof(*s), base); |
2565 | |
2566 | smp_store_mb(s->seen, true); /* be safe, be strong */ |
2567 | } |
2568 | |
2569 | static int measure_completion(struct intel_context *ce) |
2570 | { |
2571 | u32 *sema = hwsp_scratch(ce); |
2572 | const u32 offset = hwsp_offset(ce, dw: sema); |
2573 | u32 elapsed[TF_COUNT], cycles; |
2574 | u32 *cs; |
2575 | int err; |
2576 | int i; |
2577 | |
2578 | /* |
2579 | * Measure how long it takes for the signal (interrupt) to be |
2580 | * sent from the GPU to be processed by the CPU. |
2581 | * |
2582 | * A: read CS_TIMESTAMP on GPU |
2583 | * signal |
2584 | * B: read CS_TIMESTAMP from CPU |
2585 | * |
2586 | * Completion latency: B - A |
2587 | */ |
2588 | |
2589 | for (i = 1; i <= ARRAY_SIZE(elapsed); i++) { |
2590 | struct signal_cb cb = { .seen = false }; |
2591 | struct i915_request *rq; |
2592 | |
2593 | rq = i915_request_create(ce); |
2594 | if (IS_ERR(ptr: rq)) { |
2595 | err = PTR_ERR(ptr: rq); |
2596 | goto err; |
2597 | } |
2598 | |
2599 | cs = intel_ring_begin(rq, num_dwords: 12); |
2600 | if (IS_ERR(ptr: cs)) { |
2601 | i915_request_add(rq); |
2602 | err = PTR_ERR(ptr: cs); |
2603 | goto err; |
2604 | } |
2605 | |
2606 | cs = emit_store_dw(cs, offset: offset + i * sizeof(u32), value: -1); |
2607 | cs = emit_semaphore_poll_until(cs, offset, value: i); |
2608 | cs = emit_timestamp_store(cs, ce, offset: offset + i * sizeof(u32)); |
2609 | |
2610 | intel_ring_advance(rq, cs); |
2611 | |
2612 | dma_fence_add_callback(fence: &rq->fence, cb: &cb.base, func: signal_cb); |
2613 | i915_request_add(rq); |
2614 | |
2615 | intel_engine_flush_submission(engine: ce->engine); |
2616 | if (wait_for(READ_ONCE(sema[i]) == -1, 50)) { |
2617 | err = -EIO; |
2618 | goto err; |
2619 | } |
2620 | |
2621 | preempt_disable(); |
2622 | semaphore_set(sema, value: i); |
2623 | while (!READ_ONCE(cb.seen)) |
2624 | cpu_relax(); |
2625 | |
2626 | elapsed[i - 1] = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP); |
2627 | preempt_enable(); |
2628 | } |
2629 | |
2630 | err = intel_gt_wait_for_idle(gt: ce->engine->gt, HZ / 2); |
2631 | if (err) |
2632 | goto err; |
2633 | |
2634 | for (i = 0; i < ARRAY_SIZE(elapsed); i++) { |
2635 | GEM_BUG_ON(sema[i + 1] == -1); |
2636 | elapsed[i] = elapsed[i] - sema[i + 1]; |
2637 | } |
2638 | |
2639 | cycles = trifilter(a: elapsed); |
2640 | pr_info("%s: completion latency %d cycles, %lluns\n" , |
2641 | ce->engine->name, cycles >> TF_BIAS, |
2642 | cycles_to_ns(ce->engine, cycles)); |
2643 | |
2644 | return intel_gt_wait_for_idle(gt: ce->engine->gt, HZ); |
2645 | |
2646 | err: |
2647 | intel_gt_set_wedged(gt: ce->engine->gt); |
2648 | return err; |
2649 | } |
2650 | |
2651 | static void rps_pin(struct intel_gt *gt) |
2652 | { |
2653 | /* Pin the frequency to max */ |
2654 | atomic_inc(v: >->rps.num_waiters); |
2655 | intel_uncore_forcewake_get(uncore: gt->uncore, domains: FORCEWAKE_ALL); |
2656 | |
2657 | mutex_lock(>->rps.lock); |
2658 | intel_rps_set(rps: >->rps, val: gt->rps.max_freq); |
2659 | mutex_unlock(lock: >->rps.lock); |
2660 | } |
2661 | |
2662 | static void rps_unpin(struct intel_gt *gt) |
2663 | { |
2664 | intel_uncore_forcewake_put(uncore: gt->uncore, domains: FORCEWAKE_ALL); |
2665 | atomic_dec(v: >->rps.num_waiters); |
2666 | } |
2667 | |
2668 | static int perf_request_latency(void *arg) |
2669 | { |
2670 | struct drm_i915_private *i915 = arg; |
2671 | struct intel_engine_cs *engine; |
2672 | struct pm_qos_request qos; |
2673 | int err = 0; |
2674 | |
2675 | if (GRAPHICS_VER(i915) < 8) /* per-engine CS timestamp, semaphores */ |
2676 | return 0; |
2677 | |
2678 | cpu_latency_qos_add_request(req: &qos, value: 0); /* disable cstates */ |
2679 | |
2680 | for_each_uabi_engine(engine, i915) { |
2681 | struct intel_context *ce; |
2682 | |
2683 | ce = intel_context_create(engine); |
2684 | if (IS_ERR(ptr: ce)) { |
2685 | err = PTR_ERR(ptr: ce); |
2686 | goto out; |
2687 | } |
2688 | |
2689 | err = intel_context_pin(ce); |
2690 | if (err) { |
2691 | intel_context_put(ce); |
2692 | goto out; |
2693 | } |
2694 | |
2695 | st_engine_heartbeat_disable(engine); |
2696 | rps_pin(gt: engine->gt); |
2697 | |
2698 | if (err == 0) |
2699 | err = measure_semaphore_response(ce); |
2700 | if (err == 0) |
2701 | err = measure_idle_dispatch(ce); |
2702 | if (err == 0) |
2703 | err = measure_busy_dispatch(ce); |
2704 | if (err == 0) |
2705 | err = measure_inter_request(ce); |
2706 | if (err == 0) |
2707 | err = measure_context_switch(ce); |
2708 | if (err == 0) |
2709 | err = measure_preemption(ce); |
2710 | if (err == 0) |
2711 | err = measure_completion(ce); |
2712 | |
2713 | rps_unpin(gt: engine->gt); |
2714 | st_engine_heartbeat_enable(engine); |
2715 | |
2716 | intel_context_unpin(ce); |
2717 | intel_context_put(ce); |
2718 | if (err) |
2719 | goto out; |
2720 | } |
2721 | |
2722 | out: |
2723 | if (igt_flush_test(i915)) |
2724 | err = -EIO; |
2725 | |
2726 | cpu_latency_qos_remove_request(req: &qos); |
2727 | return err; |
2728 | } |
2729 | |
2730 | static int s_sync0(void *arg) |
2731 | { |
2732 | struct perf_series *ps = arg; |
2733 | IGT_TIMEOUT(end_time); |
2734 | unsigned int idx = 0; |
2735 | int err = 0; |
2736 | |
2737 | GEM_BUG_ON(!ps->nengines); |
2738 | do { |
2739 | struct i915_request *rq; |
2740 | |
2741 | rq = i915_request_create(ce: ps->ce[idx]); |
2742 | if (IS_ERR(ptr: rq)) { |
2743 | err = PTR_ERR(ptr: rq); |
2744 | break; |
2745 | } |
2746 | |
2747 | i915_request_get(rq); |
2748 | i915_request_add(rq); |
2749 | |
2750 | if (i915_request_wait(rq, flags: 0, HZ / 5) < 0) |
2751 | err = -ETIME; |
2752 | i915_request_put(rq); |
2753 | if (err) |
2754 | break; |
2755 | |
2756 | if (++idx == ps->nengines) |
2757 | idx = 0; |
2758 | } while (!__igt_timeout(timeout: end_time, NULL)); |
2759 | |
2760 | return err; |
2761 | } |
2762 | |
2763 | static int s_sync1(void *arg) |
2764 | { |
2765 | struct perf_series *ps = arg; |
2766 | struct i915_request *prev = NULL; |
2767 | IGT_TIMEOUT(end_time); |
2768 | unsigned int idx = 0; |
2769 | int err = 0; |
2770 | |
2771 | GEM_BUG_ON(!ps->nengines); |
2772 | do { |
2773 | struct i915_request *rq; |
2774 | |
2775 | rq = i915_request_create(ce: ps->ce[idx]); |
2776 | if (IS_ERR(ptr: rq)) { |
2777 | err = PTR_ERR(ptr: rq); |
2778 | break; |
2779 | } |
2780 | |
2781 | i915_request_get(rq); |
2782 | i915_request_add(rq); |
2783 | |
2784 | if (prev && i915_request_wait(rq: prev, flags: 0, HZ / 5) < 0) |
2785 | err = -ETIME; |
2786 | i915_request_put(rq: prev); |
2787 | prev = rq; |
2788 | if (err) |
2789 | break; |
2790 | |
2791 | if (++idx == ps->nengines) |
2792 | idx = 0; |
2793 | } while (!__igt_timeout(timeout: end_time, NULL)); |
2794 | i915_request_put(rq: prev); |
2795 | |
2796 | return err; |
2797 | } |
2798 | |
2799 | static int s_many(void *arg) |
2800 | { |
2801 | struct perf_series *ps = arg; |
2802 | IGT_TIMEOUT(end_time); |
2803 | unsigned int idx = 0; |
2804 | |
2805 | GEM_BUG_ON(!ps->nengines); |
2806 | do { |
2807 | struct i915_request *rq; |
2808 | |
2809 | rq = i915_request_create(ce: ps->ce[idx]); |
2810 | if (IS_ERR(ptr: rq)) |
2811 | return PTR_ERR(ptr: rq); |
2812 | |
2813 | i915_request_add(rq); |
2814 | |
2815 | if (++idx == ps->nengines) |
2816 | idx = 0; |
2817 | } while (!__igt_timeout(timeout: end_time, NULL)); |
2818 | |
2819 | return 0; |
2820 | } |
2821 | |
2822 | static int perf_series_engines(void *arg) |
2823 | { |
2824 | struct drm_i915_private *i915 = arg; |
2825 | static int (* const func[])(void *arg) = { |
2826 | s_sync0, |
2827 | s_sync1, |
2828 | s_many, |
2829 | NULL, |
2830 | }; |
2831 | const unsigned int nengines = num_uabi_engines(i915); |
2832 | struct intel_engine_cs *engine; |
2833 | int (* const *fn)(void *arg); |
2834 | struct pm_qos_request qos; |
2835 | struct perf_stats *stats; |
2836 | struct perf_series *ps; |
2837 | unsigned int idx; |
2838 | int err = 0; |
2839 | |
2840 | stats = kcalloc(n: nengines, size: sizeof(*stats), GFP_KERNEL); |
2841 | if (!stats) |
2842 | return -ENOMEM; |
2843 | |
2844 | ps = kzalloc(struct_size(ps, ce, nengines), GFP_KERNEL); |
2845 | if (!ps) { |
2846 | kfree(objp: stats); |
2847 | return -ENOMEM; |
2848 | } |
2849 | |
2850 | cpu_latency_qos_add_request(req: &qos, value: 0); /* disable cstates */ |
2851 | |
2852 | ps->i915 = i915; |
2853 | ps->nengines = nengines; |
2854 | |
2855 | idx = 0; |
2856 | for_each_uabi_engine(engine, i915) { |
2857 | struct intel_context *ce; |
2858 | |
2859 | ce = intel_context_create(engine); |
2860 | if (IS_ERR(ptr: ce)) { |
2861 | err = PTR_ERR(ptr: ce); |
2862 | goto out; |
2863 | } |
2864 | |
2865 | err = intel_context_pin(ce); |
2866 | if (err) { |
2867 | intel_context_put(ce); |
2868 | goto out; |
2869 | } |
2870 | |
2871 | ps->ce[idx++] = ce; |
2872 | } |
2873 | GEM_BUG_ON(idx != ps->nengines); |
2874 | |
2875 | for (fn = func; *fn && !err; fn++) { |
2876 | char name[KSYM_NAME_LEN]; |
2877 | struct igt_live_test t; |
2878 | |
2879 | snprintf(buf: name, size: sizeof(name), fmt: "%ps" , *fn); |
2880 | err = igt_live_test_begin(t: &t, i915, func: __func__, name); |
2881 | if (err) |
2882 | break; |
2883 | |
2884 | for (idx = 0; idx < nengines; idx++) { |
2885 | struct perf_stats *p = |
2886 | memset(&stats[idx], 0, sizeof(stats[idx])); |
2887 | struct intel_context *ce = ps->ce[idx]; |
2888 | |
2889 | p->engine = ps->ce[idx]->engine; |
2890 | intel_engine_pm_get(engine: p->engine); |
2891 | |
2892 | if (intel_engine_supports_stats(engine: p->engine)) |
2893 | p->busy = intel_engine_get_busy_time(engine: p->engine, |
2894 | now: &p->time) + 1; |
2895 | else |
2896 | p->time = ktime_get(); |
2897 | p->runtime = -intel_context_get_total_runtime_ns(ce); |
2898 | } |
2899 | |
2900 | err = (*fn)(ps); |
2901 | if (igt_live_test_end(t: &t)) |
2902 | err = -EIO; |
2903 | |
2904 | for (idx = 0; idx < nengines; idx++) { |
2905 | struct perf_stats *p = &stats[idx]; |
2906 | struct intel_context *ce = ps->ce[idx]; |
2907 | int integer, decimal; |
2908 | u64 busy, dt, now; |
2909 | |
2910 | if (p->busy) |
2911 | p->busy = ktime_sub(intel_engine_get_busy_time(p->engine, |
2912 | &now), |
2913 | p->busy - 1); |
2914 | else |
2915 | now = ktime_get(); |
2916 | p->time = ktime_sub(now, p->time); |
2917 | |
2918 | err = switch_to_kernel_sync(ce, err); |
2919 | p->runtime += intel_context_get_total_runtime_ns(ce); |
2920 | intel_engine_pm_put(engine: p->engine); |
2921 | |
2922 | busy = 100 * ktime_to_ns(kt: p->busy); |
2923 | dt = ktime_to_ns(kt: p->time); |
2924 | if (dt) { |
2925 | integer = div64_u64(dividend: busy, divisor: dt); |
2926 | busy -= integer * dt; |
2927 | decimal = div64_u64(dividend: 100 * busy, divisor: dt); |
2928 | } else { |
2929 | integer = 0; |
2930 | decimal = 0; |
2931 | } |
2932 | |
2933 | pr_info("%s %5s: { seqno:%d, busy:%d.%02d%%, runtime:%lldms, walltime:%lldms }\n" , |
2934 | name, p->engine->name, ce->timeline->seqno, |
2935 | integer, decimal, |
2936 | div_u64(p->runtime, 1000 * 1000), |
2937 | div_u64(ktime_to_ns(p->time), 1000 * 1000)); |
2938 | } |
2939 | } |
2940 | |
2941 | out: |
2942 | for (idx = 0; idx < nengines; idx++) { |
2943 | if (IS_ERR_OR_NULL(ptr: ps->ce[idx])) |
2944 | break; |
2945 | |
2946 | intel_context_unpin(ce: ps->ce[idx]); |
2947 | intel_context_put(ce: ps->ce[idx]); |
2948 | } |
2949 | kfree(objp: ps); |
2950 | |
2951 | cpu_latency_qos_remove_request(req: &qos); |
2952 | kfree(objp: stats); |
2953 | return err; |
2954 | } |
2955 | |
2956 | struct p_thread { |
2957 | struct perf_stats p; |
2958 | struct kthread_worker *worker; |
2959 | struct kthread_work work; |
2960 | struct intel_engine_cs *engine; |
2961 | int result; |
2962 | }; |
2963 | |
2964 | static void p_sync0(struct kthread_work *work) |
2965 | { |
2966 | struct p_thread *thread = container_of(work, typeof(*thread), work); |
2967 | struct perf_stats *p = &thread->p; |
2968 | struct intel_engine_cs *engine = p->engine; |
2969 | struct intel_context *ce; |
2970 | IGT_TIMEOUT(end_time); |
2971 | unsigned long count; |
2972 | bool busy; |
2973 | int err = 0; |
2974 | |
2975 | ce = intel_context_create(engine); |
2976 | if (IS_ERR(ptr: ce)) { |
2977 | thread->result = PTR_ERR(ptr: ce); |
2978 | return; |
2979 | } |
2980 | |
2981 | err = intel_context_pin(ce); |
2982 | if (err) { |
2983 | intel_context_put(ce); |
2984 | thread->result = err; |
2985 | return; |
2986 | } |
2987 | |
2988 | if (intel_engine_supports_stats(engine)) { |
2989 | p->busy = intel_engine_get_busy_time(engine, now: &p->time); |
2990 | busy = true; |
2991 | } else { |
2992 | p->time = ktime_get(); |
2993 | busy = false; |
2994 | } |
2995 | |
2996 | count = 0; |
2997 | do { |
2998 | struct i915_request *rq; |
2999 | |
3000 | rq = i915_request_create(ce); |
3001 | if (IS_ERR(ptr: rq)) { |
3002 | err = PTR_ERR(ptr: rq); |
3003 | break; |
3004 | } |
3005 | |
3006 | i915_request_get(rq); |
3007 | i915_request_add(rq); |
3008 | |
3009 | err = 0; |
3010 | if (i915_request_wait(rq, flags: 0, HZ) < 0) |
3011 | err = -ETIME; |
3012 | i915_request_put(rq); |
3013 | if (err) |
3014 | break; |
3015 | |
3016 | count++; |
3017 | } while (!__igt_timeout(timeout: end_time, NULL)); |
3018 | |
3019 | if (busy) { |
3020 | ktime_t now; |
3021 | |
3022 | p->busy = ktime_sub(intel_engine_get_busy_time(engine, &now), |
3023 | p->busy); |
3024 | p->time = ktime_sub(now, p->time); |
3025 | } else { |
3026 | p->time = ktime_sub(ktime_get(), p->time); |
3027 | } |
3028 | |
3029 | err = switch_to_kernel_sync(ce, err); |
3030 | p->runtime = intel_context_get_total_runtime_ns(ce); |
3031 | p->count = count; |
3032 | |
3033 | intel_context_unpin(ce); |
3034 | intel_context_put(ce); |
3035 | thread->result = err; |
3036 | } |
3037 | |
3038 | static void p_sync1(struct kthread_work *work) |
3039 | { |
3040 | struct p_thread *thread = container_of(work, typeof(*thread), work); |
3041 | struct perf_stats *p = &thread->p; |
3042 | struct intel_engine_cs *engine = p->engine; |
3043 | struct i915_request *prev = NULL; |
3044 | struct intel_context *ce; |
3045 | IGT_TIMEOUT(end_time); |
3046 | unsigned long count; |
3047 | bool busy; |
3048 | int err = 0; |
3049 | |
3050 | ce = intel_context_create(engine); |
3051 | if (IS_ERR(ptr: ce)) { |
3052 | thread->result = PTR_ERR(ptr: ce); |
3053 | return; |
3054 | } |
3055 | |
3056 | err = intel_context_pin(ce); |
3057 | if (err) { |
3058 | intel_context_put(ce); |
3059 | thread->result = err; |
3060 | return; |
3061 | } |
3062 | |
3063 | if (intel_engine_supports_stats(engine)) { |
3064 | p->busy = intel_engine_get_busy_time(engine, now: &p->time); |
3065 | busy = true; |
3066 | } else { |
3067 | p->time = ktime_get(); |
3068 | busy = false; |
3069 | } |
3070 | |
3071 | count = 0; |
3072 | do { |
3073 | struct i915_request *rq; |
3074 | |
3075 | rq = i915_request_create(ce); |
3076 | if (IS_ERR(ptr: rq)) { |
3077 | err = PTR_ERR(ptr: rq); |
3078 | break; |
3079 | } |
3080 | |
3081 | i915_request_get(rq); |
3082 | i915_request_add(rq); |
3083 | |
3084 | err = 0; |
3085 | if (prev && i915_request_wait(rq: prev, flags: 0, HZ) < 0) |
3086 | err = -ETIME; |
3087 | i915_request_put(rq: prev); |
3088 | prev = rq; |
3089 | if (err) |
3090 | break; |
3091 | |
3092 | count++; |
3093 | } while (!__igt_timeout(timeout: end_time, NULL)); |
3094 | i915_request_put(rq: prev); |
3095 | |
3096 | if (busy) { |
3097 | ktime_t now; |
3098 | |
3099 | p->busy = ktime_sub(intel_engine_get_busy_time(engine, &now), |
3100 | p->busy); |
3101 | p->time = ktime_sub(now, p->time); |
3102 | } else { |
3103 | p->time = ktime_sub(ktime_get(), p->time); |
3104 | } |
3105 | |
3106 | err = switch_to_kernel_sync(ce, err); |
3107 | p->runtime = intel_context_get_total_runtime_ns(ce); |
3108 | p->count = count; |
3109 | |
3110 | intel_context_unpin(ce); |
3111 | intel_context_put(ce); |
3112 | thread->result = err; |
3113 | } |
3114 | |
3115 | static void p_many(struct kthread_work *work) |
3116 | { |
3117 | struct p_thread *thread = container_of(work, typeof(*thread), work); |
3118 | struct perf_stats *p = &thread->p; |
3119 | struct intel_engine_cs *engine = p->engine; |
3120 | struct intel_context *ce; |
3121 | IGT_TIMEOUT(end_time); |
3122 | unsigned long count; |
3123 | int err = 0; |
3124 | bool busy; |
3125 | |
3126 | ce = intel_context_create(engine); |
3127 | if (IS_ERR(ptr: ce)) { |
3128 | thread->result = PTR_ERR(ptr: ce); |
3129 | return; |
3130 | } |
3131 | |
3132 | err = intel_context_pin(ce); |
3133 | if (err) { |
3134 | intel_context_put(ce); |
3135 | thread->result = err; |
3136 | return; |
3137 | } |
3138 | |
3139 | if (intel_engine_supports_stats(engine)) { |
3140 | p->busy = intel_engine_get_busy_time(engine, now: &p->time); |
3141 | busy = true; |
3142 | } else { |
3143 | p->time = ktime_get(); |
3144 | busy = false; |
3145 | } |
3146 | |
3147 | count = 0; |
3148 | do { |
3149 | struct i915_request *rq; |
3150 | |
3151 | rq = i915_request_create(ce); |
3152 | if (IS_ERR(ptr: rq)) { |
3153 | err = PTR_ERR(ptr: rq); |
3154 | break; |
3155 | } |
3156 | |
3157 | i915_request_add(rq); |
3158 | count++; |
3159 | } while (!__igt_timeout(timeout: end_time, NULL)); |
3160 | |
3161 | if (busy) { |
3162 | ktime_t now; |
3163 | |
3164 | p->busy = ktime_sub(intel_engine_get_busy_time(engine, &now), |
3165 | p->busy); |
3166 | p->time = ktime_sub(now, p->time); |
3167 | } else { |
3168 | p->time = ktime_sub(ktime_get(), p->time); |
3169 | } |
3170 | |
3171 | err = switch_to_kernel_sync(ce, err); |
3172 | p->runtime = intel_context_get_total_runtime_ns(ce); |
3173 | p->count = count; |
3174 | |
3175 | intel_context_unpin(ce); |
3176 | intel_context_put(ce); |
3177 | thread->result = err; |
3178 | } |
3179 | |
3180 | static int perf_parallel_engines(void *arg) |
3181 | { |
3182 | struct drm_i915_private *i915 = arg; |
3183 | static void (* const func[])(struct kthread_work *) = { |
3184 | p_sync0, |
3185 | p_sync1, |
3186 | p_many, |
3187 | NULL, |
3188 | }; |
3189 | const unsigned int nengines = num_uabi_engines(i915); |
3190 | void (* const *fn)(struct kthread_work *); |
3191 | struct intel_engine_cs *engine; |
3192 | struct pm_qos_request qos; |
3193 | struct p_thread *engines; |
3194 | int err = 0; |
3195 | |
3196 | engines = kcalloc(n: nengines, size: sizeof(*engines), GFP_KERNEL); |
3197 | if (!engines) |
3198 | return -ENOMEM; |
3199 | |
3200 | cpu_latency_qos_add_request(req: &qos, value: 0); |
3201 | |
3202 | for (fn = func; *fn; fn++) { |
3203 | char name[KSYM_NAME_LEN]; |
3204 | struct igt_live_test t; |
3205 | unsigned int idx; |
3206 | |
3207 | snprintf(buf: name, size: sizeof(name), fmt: "%ps" , *fn); |
3208 | err = igt_live_test_begin(t: &t, i915, func: __func__, name); |
3209 | if (err) |
3210 | break; |
3211 | |
3212 | atomic_set(v: &i915->selftest.counter, i: nengines); |
3213 | |
3214 | idx = 0; |
3215 | for_each_uabi_engine(engine, i915) { |
3216 | struct kthread_worker *worker; |
3217 | |
3218 | intel_engine_pm_get(engine); |
3219 | |
3220 | memset(&engines[idx].p, 0, sizeof(engines[idx].p)); |
3221 | |
3222 | worker = kthread_create_worker(flags: 0, namefmt: "igt:%s" , |
3223 | engine->name); |
3224 | if (IS_ERR(ptr: worker)) { |
3225 | err = PTR_ERR(ptr: worker); |
3226 | intel_engine_pm_put(engine); |
3227 | break; |
3228 | } |
3229 | engines[idx].worker = worker; |
3230 | engines[idx].result = 0; |
3231 | engines[idx].p.engine = engine; |
3232 | engines[idx].engine = engine; |
3233 | |
3234 | kthread_init_work(&engines[idx].work, *fn); |
3235 | kthread_queue_work(worker, work: &engines[idx].work); |
3236 | idx++; |
3237 | } |
3238 | |
3239 | idx = 0; |
3240 | for_each_uabi_engine(engine, i915) { |
3241 | int status; |
3242 | |
3243 | if (!engines[idx].worker) |
3244 | break; |
3245 | |
3246 | kthread_flush_work(work: &engines[idx].work); |
3247 | status = READ_ONCE(engines[idx].result); |
3248 | if (status && !err) |
3249 | err = status; |
3250 | |
3251 | intel_engine_pm_put(engine); |
3252 | |
3253 | kthread_destroy_worker(worker: engines[idx].worker); |
3254 | idx++; |
3255 | } |
3256 | |
3257 | if (igt_live_test_end(t: &t)) |
3258 | err = -EIO; |
3259 | if (err) |
3260 | break; |
3261 | |
3262 | idx = 0; |
3263 | for_each_uabi_engine(engine, i915) { |
3264 | struct perf_stats *p = &engines[idx].p; |
3265 | u64 busy = 100 * ktime_to_ns(kt: p->busy); |
3266 | u64 dt = ktime_to_ns(kt: p->time); |
3267 | int integer, decimal; |
3268 | |
3269 | if (dt) { |
3270 | integer = div64_u64(dividend: busy, divisor: dt); |
3271 | busy -= integer * dt; |
3272 | decimal = div64_u64(dividend: 100 * busy, divisor: dt); |
3273 | } else { |
3274 | integer = 0; |
3275 | decimal = 0; |
3276 | } |
3277 | |
3278 | GEM_BUG_ON(engine != p->engine); |
3279 | pr_info("%s %5s: { count:%lu, busy:%d.%02d%%, runtime:%lldms, walltime:%lldms }\n" , |
3280 | name, engine->name, p->count, integer, decimal, |
3281 | div_u64(p->runtime, 1000 * 1000), |
3282 | div_u64(ktime_to_ns(p->time), 1000 * 1000)); |
3283 | idx++; |
3284 | } |
3285 | } |
3286 | |
3287 | cpu_latency_qos_remove_request(req: &qos); |
3288 | kfree(objp: engines); |
3289 | return err; |
3290 | } |
3291 | |
3292 | int i915_request_perf_selftests(struct drm_i915_private *i915) |
3293 | { |
3294 | static const struct i915_subtest tests[] = { |
3295 | SUBTEST(perf_request_latency), |
3296 | SUBTEST(perf_series_engines), |
3297 | SUBTEST(perf_parallel_engines), |
3298 | }; |
3299 | |
3300 | if (intel_gt_is_wedged(gt: to_gt(i915))) |
3301 | return 0; |
3302 | |
3303 | return i915_subtests(tests, i915); |
3304 | } |
3305 | |