1/*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25#include <linux/prime_numbers.h>
26#include <linux/pm_qos.h>
27#include <linux/sort.h>
28
29#include "gem/i915_gem_internal.h"
30#include "gem/i915_gem_pm.h"
31#include "gem/selftests/mock_context.h"
32
33#include "gt/intel_engine_heartbeat.h"
34#include "gt/intel_engine_pm.h"
35#include "gt/intel_engine_user.h"
36#include "gt/intel_gt.h"
37#include "gt/intel_gt_clock_utils.h"
38#include "gt/intel_gt_requests.h"
39#include "gt/selftest_engine_heartbeat.h"
40
41#include "i915_random.h"
42#include "i915_selftest.h"
43#include "igt_flush_test.h"
44#include "igt_live_test.h"
45#include "igt_spinner.h"
46#include "lib_sw_fence.h"
47
48#include "mock_drm.h"
49#include "mock_gem_device.h"
50
51static unsigned int num_uabi_engines(struct drm_i915_private *i915)
52{
53 struct intel_engine_cs *engine;
54 unsigned int count;
55
56 count = 0;
57 for_each_uabi_engine(engine, i915)
58 count++;
59
60 return count;
61}
62
63static struct intel_engine_cs *rcs0(struct drm_i915_private *i915)
64{
65 return intel_engine_lookup_user(i915, class: I915_ENGINE_CLASS_RENDER, instance: 0);
66}
67
68static int igt_add_request(void *arg)
69{
70 struct drm_i915_private *i915 = arg;
71 struct i915_request *request;
72
73 /* Basic preliminary test to create a request and let it loose! */
74
75 request = mock_request(ce: rcs0(i915)->kernel_context, HZ / 10);
76 if (!request)
77 return -ENOMEM;
78
79 i915_request_add(rq: request);
80
81 return 0;
82}
83
84static int igt_wait_request(void *arg)
85{
86 const long T = HZ / 4;
87 struct drm_i915_private *i915 = arg;
88 struct i915_request *request;
89 int err = -EINVAL;
90
91 /* Submit a request, then wait upon it */
92
93 request = mock_request(ce: rcs0(i915)->kernel_context, delay: T);
94 if (!request)
95 return -ENOMEM;
96
97 i915_request_get(rq: request);
98
99 if (i915_request_wait(rq: request, flags: 0, timeout: 0) != -ETIME) {
100 pr_err("request wait (busy query) succeeded (expected timeout before submit!)\n");
101 goto out_request;
102 }
103
104 if (i915_request_wait(rq: request, flags: 0, timeout: T) != -ETIME) {
105 pr_err("request wait succeeded (expected timeout before submit!)\n");
106 goto out_request;
107 }
108
109 if (i915_request_completed(rq: request)) {
110 pr_err("request completed before submit!!\n");
111 goto out_request;
112 }
113
114 i915_request_add(rq: request);
115
116 if (i915_request_wait(rq: request, flags: 0, timeout: 0) != -ETIME) {
117 pr_err("request wait (busy query) succeeded (expected timeout after submit!)\n");
118 goto out_request;
119 }
120
121 if (i915_request_completed(rq: request)) {
122 pr_err("request completed immediately!\n");
123 goto out_request;
124 }
125
126 if (i915_request_wait(rq: request, flags: 0, timeout: T / 2) != -ETIME) {
127 pr_err("request wait succeeded (expected timeout!)\n");
128 goto out_request;
129 }
130
131 if (i915_request_wait(rq: request, flags: 0, timeout: T) == -ETIME) {
132 pr_err("request wait timed out!\n");
133 goto out_request;
134 }
135
136 if (!i915_request_completed(rq: request)) {
137 pr_err("request not complete after waiting!\n");
138 goto out_request;
139 }
140
141 if (i915_request_wait(rq: request, flags: 0, timeout: T) == -ETIME) {
142 pr_err("request wait timed out when already complete!\n");
143 goto out_request;
144 }
145
146 err = 0;
147out_request:
148 i915_request_put(rq: request);
149 mock_device_flush(i915);
150 return err;
151}
152
153static int igt_fence_wait(void *arg)
154{
155 const long T = HZ / 4;
156 struct drm_i915_private *i915 = arg;
157 struct i915_request *request;
158 int err = -EINVAL;
159
160 /* Submit a request, treat it as a fence and wait upon it */
161
162 request = mock_request(ce: rcs0(i915)->kernel_context, delay: T);
163 if (!request)
164 return -ENOMEM;
165
166 if (dma_fence_wait_timeout(&request->fence, intr: false, timeout: T) != -ETIME) {
167 pr_err("fence wait success before submit (expected timeout)!\n");
168 goto out;
169 }
170
171 i915_request_add(rq: request);
172
173 if (dma_fence_is_signaled(fence: &request->fence)) {
174 pr_err("fence signaled immediately!\n");
175 goto out;
176 }
177
178 if (dma_fence_wait_timeout(&request->fence, intr: false, timeout: T / 2) != -ETIME) {
179 pr_err("fence wait success after submit (expected timeout)!\n");
180 goto out;
181 }
182
183 if (dma_fence_wait_timeout(&request->fence, intr: false, timeout: T) <= 0) {
184 pr_err("fence wait timed out (expected success)!\n");
185 goto out;
186 }
187
188 if (!dma_fence_is_signaled(fence: &request->fence)) {
189 pr_err("fence unsignaled after waiting!\n");
190 goto out;
191 }
192
193 if (dma_fence_wait_timeout(&request->fence, intr: false, timeout: T) <= 0) {
194 pr_err("fence wait timed out when complete (expected success)!\n");
195 goto out;
196 }
197
198 err = 0;
199out:
200 mock_device_flush(i915);
201 return err;
202}
203
204static int igt_request_rewind(void *arg)
205{
206 struct drm_i915_private *i915 = arg;
207 struct i915_request *request, *vip;
208 struct i915_gem_context *ctx[2];
209 struct intel_context *ce;
210 int err = -EINVAL;
211
212 ctx[0] = mock_context(i915, name: "A");
213 if (!ctx[0]) {
214 err = -ENOMEM;
215 goto err_ctx_0;
216 }
217
218 ce = i915_gem_context_get_engine(ctx: ctx[0], idx: RCS0);
219 GEM_BUG_ON(IS_ERR(ce));
220 request = mock_request(ce, delay: 2 * HZ);
221 intel_context_put(ce);
222 if (!request) {
223 err = -ENOMEM;
224 goto err_context_0;
225 }
226
227 i915_request_get(rq: request);
228 i915_request_add(rq: request);
229
230 ctx[1] = mock_context(i915, name: "B");
231 if (!ctx[1]) {
232 err = -ENOMEM;
233 goto err_ctx_1;
234 }
235
236 ce = i915_gem_context_get_engine(ctx: ctx[1], idx: RCS0);
237 GEM_BUG_ON(IS_ERR(ce));
238 vip = mock_request(ce, delay: 0);
239 intel_context_put(ce);
240 if (!vip) {
241 err = -ENOMEM;
242 goto err_context_1;
243 }
244
245 /* Simulate preemption by manual reordering */
246 if (!mock_cancel_request(request)) {
247 pr_err("failed to cancel request (already executed)!\n");
248 i915_request_add(rq: vip);
249 goto err_context_1;
250 }
251 i915_request_get(rq: vip);
252 i915_request_add(rq: vip);
253 rcu_read_lock();
254 request->engine->submit_request(request);
255 rcu_read_unlock();
256
257
258 if (i915_request_wait(rq: vip, flags: 0, HZ) == -ETIME) {
259 pr_err("timed out waiting for high priority request\n");
260 goto err;
261 }
262
263 if (i915_request_completed(rq: request)) {
264 pr_err("low priority request already completed\n");
265 goto err;
266 }
267
268 err = 0;
269err:
270 i915_request_put(rq: vip);
271err_context_1:
272 mock_context_close(ctx: ctx[1]);
273err_ctx_1:
274 i915_request_put(rq: request);
275err_context_0:
276 mock_context_close(ctx: ctx[0]);
277err_ctx_0:
278 mock_device_flush(i915);
279 return err;
280}
281
282struct smoketest {
283 struct intel_engine_cs *engine;
284 struct i915_gem_context **contexts;
285 atomic_long_t num_waits, num_fences;
286 int ncontexts, max_batch;
287 struct i915_request *(*request_alloc)(struct intel_context *ce);
288};
289
290static struct i915_request *
291__mock_request_alloc(struct intel_context *ce)
292{
293 return mock_request(ce, delay: 0);
294}
295
296static struct i915_request *
297__live_request_alloc(struct intel_context *ce)
298{
299 return intel_context_create_request(ce);
300}
301
302struct smoke_thread {
303 struct kthread_worker *worker;
304 struct kthread_work work;
305 struct smoketest *t;
306 bool stop;
307 int result;
308};
309
310static void __igt_breadcrumbs_smoketest(struct kthread_work *work)
311{
312 struct smoke_thread *thread = container_of(work, typeof(*thread), work);
313 struct smoketest *t = thread->t;
314 const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1;
315 const unsigned int total = 4 * t->ncontexts + 1;
316 unsigned int num_waits = 0, num_fences = 0;
317 struct i915_request **requests;
318 I915_RND_STATE(prng);
319 unsigned int *order;
320 int err = 0;
321
322 /*
323 * A very simple test to catch the most egregious of list handling bugs.
324 *
325 * At its heart, we simply create oodles of requests running across
326 * multiple kthreads and enable signaling on them, for the sole purpose
327 * of stressing our breadcrumb handling. The only inspection we do is
328 * that the fences were marked as signaled.
329 */
330
331 requests = kcalloc(n: total, size: sizeof(*requests), GFP_KERNEL);
332 if (!requests) {
333 thread->result = -ENOMEM;
334 return;
335 }
336
337 order = i915_random_order(count: total, state: &prng);
338 if (!order) {
339 err = -ENOMEM;
340 goto out_requests;
341 }
342
343 while (!READ_ONCE(thread->stop)) {
344 struct i915_sw_fence *submit, *wait;
345 unsigned int n, count;
346
347 submit = heap_fence_create(GFP_KERNEL);
348 if (!submit) {
349 err = -ENOMEM;
350 break;
351 }
352
353 wait = heap_fence_create(GFP_KERNEL);
354 if (!wait) {
355 i915_sw_fence_commit(fence: submit);
356 heap_fence_put(fence: submit);
357 err = -ENOMEM;
358 break;
359 }
360
361 i915_random_reorder(order, count: total, state: &prng);
362 count = 1 + i915_prandom_u32_max_state(ep_ro: max_batch, state: &prng);
363
364 for (n = 0; n < count; n++) {
365 struct i915_gem_context *ctx =
366 t->contexts[order[n] % t->ncontexts];
367 struct i915_request *rq;
368 struct intel_context *ce;
369
370 ce = i915_gem_context_get_engine(ctx, idx: t->engine->legacy_idx);
371 GEM_BUG_ON(IS_ERR(ce));
372 rq = t->request_alloc(ce);
373 intel_context_put(ce);
374 if (IS_ERR(ptr: rq)) {
375 err = PTR_ERR(ptr: rq);
376 count = n;
377 break;
378 }
379
380 err = i915_sw_fence_await_sw_fence_gfp(fence: &rq->submit,
381 after: submit,
382 GFP_KERNEL);
383
384 requests[n] = i915_request_get(rq);
385 i915_request_add(rq);
386
387 if (err >= 0)
388 err = i915_sw_fence_await_dma_fence(fence: wait,
389 dma: &rq->fence,
390 timeout: 0,
391 GFP_KERNEL);
392
393 if (err < 0) {
394 i915_request_put(rq);
395 count = n;
396 break;
397 }
398 }
399
400 i915_sw_fence_commit(fence: submit);
401 i915_sw_fence_commit(fence: wait);
402
403 if (!wait_event_timeout(wait->wait,
404 i915_sw_fence_done(wait),
405 5 * HZ)) {
406 struct i915_request *rq = requests[count - 1];
407
408 pr_err("waiting for %d/%d fences (last %llx:%lld) on %s timed out!\n",
409 atomic_read(&wait->pending), count,
410 rq->fence.context, rq->fence.seqno,
411 t->engine->name);
412 GEM_TRACE_DUMP();
413
414 intel_gt_set_wedged(gt: t->engine->gt);
415 GEM_BUG_ON(!i915_request_completed(rq));
416 i915_sw_fence_wait(fence: wait);
417 err = -EIO;
418 }
419
420 for (n = 0; n < count; n++) {
421 struct i915_request *rq = requests[n];
422
423 if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
424 &rq->fence.flags)) {
425 pr_err("%llu:%llu was not signaled!\n",
426 rq->fence.context, rq->fence.seqno);
427 err = -EINVAL;
428 }
429
430 i915_request_put(rq);
431 }
432
433 heap_fence_put(fence: wait);
434 heap_fence_put(fence: submit);
435
436 if (err < 0)
437 break;
438
439 num_fences += count;
440 num_waits++;
441
442 cond_resched();
443 }
444
445 atomic_long_add(i: num_fences, v: &t->num_fences);
446 atomic_long_add(i: num_waits, v: &t->num_waits);
447
448 kfree(objp: order);
449out_requests:
450 kfree(objp: requests);
451 thread->result = err;
452}
453
454static int mock_breadcrumbs_smoketest(void *arg)
455{
456 struct drm_i915_private *i915 = arg;
457 struct smoketest t = {
458 .engine = rcs0(i915),
459 .ncontexts = 1024,
460 .max_batch = 1024,
461 .request_alloc = __mock_request_alloc
462 };
463 unsigned int ncpus = num_online_cpus();
464 struct smoke_thread *threads;
465 unsigned int n;
466 int ret = 0;
467
468 /*
469 * Smoketest our breadcrumb/signal handling for requests across multiple
470 * threads. A very simple test to only catch the most egregious of bugs.
471 * See __igt_breadcrumbs_smoketest();
472 */
473
474 threads = kcalloc(n: ncpus, size: sizeof(*threads), GFP_KERNEL);
475 if (!threads)
476 return -ENOMEM;
477
478 t.contexts = kcalloc(n: t.ncontexts, size: sizeof(*t.contexts), GFP_KERNEL);
479 if (!t.contexts) {
480 ret = -ENOMEM;
481 goto out_threads;
482 }
483
484 for (n = 0; n < t.ncontexts; n++) {
485 t.contexts[n] = mock_context(i915: t.engine->i915, name: "mock");
486 if (!t.contexts[n]) {
487 ret = -ENOMEM;
488 goto out_contexts;
489 }
490 }
491
492 for (n = 0; n < ncpus; n++) {
493 struct kthread_worker *worker;
494
495 worker = kthread_create_worker(flags: 0, namefmt: "igt/%d", n);
496 if (IS_ERR(ptr: worker)) {
497 ret = PTR_ERR(ptr: worker);
498 ncpus = n;
499 break;
500 }
501
502 threads[n].worker = worker;
503 threads[n].t = &t;
504 threads[n].stop = false;
505 threads[n].result = 0;
506
507 kthread_init_work(&threads[n].work,
508 __igt_breadcrumbs_smoketest);
509 kthread_queue_work(worker, work: &threads[n].work);
510 }
511
512 msleep(msecs: jiffies_to_msecs(j: i915_selftest.timeout_jiffies));
513
514 for (n = 0; n < ncpus; n++) {
515 int err;
516
517 WRITE_ONCE(threads[n].stop, true);
518 kthread_flush_work(work: &threads[n].work);
519 err = READ_ONCE(threads[n].result);
520 if (err < 0 && !ret)
521 ret = err;
522
523 kthread_destroy_worker(worker: threads[n].worker);
524 }
525 pr_info("Completed %lu waits for %lu fence across %d cpus\n",
526 atomic_long_read(&t.num_waits),
527 atomic_long_read(&t.num_fences),
528 ncpus);
529
530out_contexts:
531 for (n = 0; n < t.ncontexts; n++) {
532 if (!t.contexts[n])
533 break;
534 mock_context_close(ctx: t.contexts[n]);
535 }
536 kfree(objp: t.contexts);
537out_threads:
538 kfree(objp: threads);
539 return ret;
540}
541
542int i915_request_mock_selftests(void)
543{
544 static const struct i915_subtest tests[] = {
545 SUBTEST(igt_add_request),
546 SUBTEST(igt_wait_request),
547 SUBTEST(igt_fence_wait),
548 SUBTEST(igt_request_rewind),
549 SUBTEST(mock_breadcrumbs_smoketest),
550 };
551 struct drm_i915_private *i915;
552 intel_wakeref_t wakeref;
553 int err = 0;
554
555 i915 = mock_gem_device();
556 if (!i915)
557 return -ENOMEM;
558
559 with_intel_runtime_pm(&i915->runtime_pm, wakeref)
560 err = i915_subtests(tests, i915);
561
562 mock_destroy_device(i915);
563
564 return err;
565}
566
567static int live_nop_request(void *arg)
568{
569 struct drm_i915_private *i915 = arg;
570 struct intel_engine_cs *engine;
571 struct igt_live_test t;
572 int err = -ENODEV;
573
574 /*
575 * Submit various sized batches of empty requests, to each engine
576 * (individually), and wait for the batch to complete. We can check
577 * the overhead of submitting requests to the hardware.
578 */
579
580 for_each_uabi_engine(engine, i915) {
581 unsigned long n, prime;
582 IGT_TIMEOUT(end_time);
583 ktime_t times[2] = {};
584
585 err = igt_live_test_begin(t: &t, i915, func: __func__, name: engine->name);
586 if (err)
587 return err;
588
589 intel_engine_pm_get(engine);
590 for_each_prime_number_from(prime, 1, 8192) {
591 struct i915_request *request = NULL;
592
593 times[1] = ktime_get_raw();
594
595 for (n = 0; n < prime; n++) {
596 i915_request_put(rq: request);
597 request = i915_request_create(ce: engine->kernel_context);
598 if (IS_ERR(ptr: request))
599 return PTR_ERR(ptr: request);
600
601 /*
602 * This space is left intentionally blank.
603 *
604 * We do not actually want to perform any
605 * action with this request, we just want
606 * to measure the latency in allocation
607 * and submission of our breadcrumbs -
608 * ensuring that the bare request is sufficient
609 * for the system to work (i.e. proper HEAD
610 * tracking of the rings, interrupt handling,
611 * etc). It also gives us the lowest bounds
612 * for latency.
613 */
614
615 i915_request_get(rq: request);
616 i915_request_add(rq: request);
617 }
618 i915_request_wait(rq: request, flags: 0, MAX_SCHEDULE_TIMEOUT);
619 i915_request_put(rq: request);
620
621 times[1] = ktime_sub(ktime_get_raw(), times[1]);
622 if (prime == 1)
623 times[0] = times[1];
624
625 if (__igt_timeout(timeout: end_time, NULL))
626 break;
627 }
628 intel_engine_pm_put(engine);
629
630 err = igt_live_test_end(t: &t);
631 if (err)
632 return err;
633
634 pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n",
635 engine->name,
636 ktime_to_ns(times[0]),
637 prime, div64_u64(ktime_to_ns(times[1]), prime));
638 }
639
640 return err;
641}
642
643static int __cancel_inactive(struct intel_engine_cs *engine)
644{
645 struct intel_context *ce;
646 struct igt_spinner spin;
647 struct i915_request *rq;
648 int err = 0;
649
650 if (igt_spinner_init(spin: &spin, gt: engine->gt))
651 return -ENOMEM;
652
653 ce = intel_context_create(engine);
654 if (IS_ERR(ptr: ce)) {
655 err = PTR_ERR(ptr: ce);
656 goto out_spin;
657 }
658
659 rq = igt_spinner_create_request(spin: &spin, ce, MI_ARB_CHECK);
660 if (IS_ERR(ptr: rq)) {
661 err = PTR_ERR(ptr: rq);
662 goto out_ce;
663 }
664
665 pr_debug("%s: Cancelling inactive request\n", engine->name);
666 i915_request_cancel(rq, error: -EINTR);
667 i915_request_get(rq);
668 i915_request_add(rq);
669
670 if (i915_request_wait(rq, flags: 0, HZ / 5) < 0) {
671 struct drm_printer p = drm_info_printer(dev: engine->i915->drm.dev);
672
673 pr_err("%s: Failed to cancel inactive request\n", engine->name);
674 intel_engine_dump(engine, m: &p, header: "%s\n", engine->name);
675 err = -ETIME;
676 goto out_rq;
677 }
678
679 if (rq->fence.error != -EINTR) {
680 pr_err("%s: fence not cancelled (%u)\n",
681 engine->name, rq->fence.error);
682 err = -EINVAL;
683 }
684
685out_rq:
686 i915_request_put(rq);
687out_ce:
688 intel_context_put(ce);
689out_spin:
690 igt_spinner_fini(spin: &spin);
691 if (err)
692 pr_err("%s: %s error %d\n", __func__, engine->name, err);
693 return err;
694}
695
696static int __cancel_active(struct intel_engine_cs *engine)
697{
698 struct intel_context *ce;
699 struct igt_spinner spin;
700 struct i915_request *rq;
701 int err = 0;
702
703 if (igt_spinner_init(spin: &spin, gt: engine->gt))
704 return -ENOMEM;
705
706 ce = intel_context_create(engine);
707 if (IS_ERR(ptr: ce)) {
708 err = PTR_ERR(ptr: ce);
709 goto out_spin;
710 }
711
712 rq = igt_spinner_create_request(spin: &spin, ce, MI_ARB_CHECK);
713 if (IS_ERR(ptr: rq)) {
714 err = PTR_ERR(ptr: rq);
715 goto out_ce;
716 }
717
718 pr_debug("%s: Cancelling active request\n", engine->name);
719 i915_request_get(rq);
720 i915_request_add(rq);
721 if (!igt_wait_for_spinner(spin: &spin, rq)) {
722 struct drm_printer p = drm_info_printer(dev: engine->i915->drm.dev);
723
724 pr_err("Failed to start spinner on %s\n", engine->name);
725 intel_engine_dump(engine, m: &p, header: "%s\n", engine->name);
726 err = -ETIME;
727 goto out_rq;
728 }
729 i915_request_cancel(rq, error: -EINTR);
730
731 if (i915_request_wait(rq, flags: 0, HZ / 5) < 0) {
732 struct drm_printer p = drm_info_printer(dev: engine->i915->drm.dev);
733
734 pr_err("%s: Failed to cancel active request\n", engine->name);
735 intel_engine_dump(engine, m: &p, header: "%s\n", engine->name);
736 err = -ETIME;
737 goto out_rq;
738 }
739
740 if (rq->fence.error != -EINTR) {
741 pr_err("%s: fence not cancelled (%u)\n",
742 engine->name, rq->fence.error);
743 err = -EINVAL;
744 }
745
746out_rq:
747 i915_request_put(rq);
748out_ce:
749 intel_context_put(ce);
750out_spin:
751 igt_spinner_fini(spin: &spin);
752 if (err)
753 pr_err("%s: %s error %d\n", __func__, engine->name, err);
754 return err;
755}
756
757static int __cancel_completed(struct intel_engine_cs *engine)
758{
759 struct intel_context *ce;
760 struct igt_spinner spin;
761 struct i915_request *rq;
762 int err = 0;
763
764 if (igt_spinner_init(spin: &spin, gt: engine->gt))
765 return -ENOMEM;
766
767 ce = intel_context_create(engine);
768 if (IS_ERR(ptr: ce)) {
769 err = PTR_ERR(ptr: ce);
770 goto out_spin;
771 }
772
773 rq = igt_spinner_create_request(spin: &spin, ce, MI_ARB_CHECK);
774 if (IS_ERR(ptr: rq)) {
775 err = PTR_ERR(ptr: rq);
776 goto out_ce;
777 }
778 igt_spinner_end(spin: &spin);
779 i915_request_get(rq);
780 i915_request_add(rq);
781
782 if (i915_request_wait(rq, flags: 0, HZ / 5) < 0) {
783 err = -ETIME;
784 goto out_rq;
785 }
786
787 pr_debug("%s: Cancelling completed request\n", engine->name);
788 i915_request_cancel(rq, error: -EINTR);
789 if (rq->fence.error) {
790 pr_err("%s: fence not cancelled (%u)\n",
791 engine->name, rq->fence.error);
792 err = -EINVAL;
793 }
794
795out_rq:
796 i915_request_put(rq);
797out_ce:
798 intel_context_put(ce);
799out_spin:
800 igt_spinner_fini(spin: &spin);
801 if (err)
802 pr_err("%s: %s error %d\n", __func__, engine->name, err);
803 return err;
804}
805
806/*
807 * Test to prove a non-preemptable request can be cancelled and a subsequent
808 * request on the same context can successfully complete after cancellation.
809 *
810 * Testing methodology is to create a non-preemptible request and submit it,
811 * wait for spinner to start, create a NOP request and submit it, cancel the
812 * spinner, wait for spinner to complete and verify it failed with an error,
813 * finally wait for NOP request to complete verify it succeeded without an
814 * error. Preemption timeout also reduced / restored so test runs in a timely
815 * maner.
816 */
817static int __cancel_reset(struct drm_i915_private *i915,
818 struct intel_engine_cs *engine)
819{
820 struct intel_context *ce;
821 struct igt_spinner spin;
822 struct i915_request *rq, *nop;
823 unsigned long preempt_timeout_ms;
824 int err = 0;
825
826 if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT ||
827 !intel_has_reset_engine(gt: engine->gt))
828 return 0;
829
830 preempt_timeout_ms = engine->props.preempt_timeout_ms;
831 engine->props.preempt_timeout_ms = 100;
832
833 if (igt_spinner_init(spin: &spin, gt: engine->gt))
834 goto out_restore;
835
836 ce = intel_context_create(engine);
837 if (IS_ERR(ptr: ce)) {
838 err = PTR_ERR(ptr: ce);
839 goto out_spin;
840 }
841
842 rq = igt_spinner_create_request(spin: &spin, ce, MI_NOOP);
843 if (IS_ERR(ptr: rq)) {
844 err = PTR_ERR(ptr: rq);
845 goto out_ce;
846 }
847
848 pr_debug("%s: Cancelling active non-preemptable request\n",
849 engine->name);
850 i915_request_get(rq);
851 i915_request_add(rq);
852 if (!igt_wait_for_spinner(spin: &spin, rq)) {
853 struct drm_printer p = drm_info_printer(dev: engine->i915->drm.dev);
854
855 pr_err("Failed to start spinner on %s\n", engine->name);
856 intel_engine_dump(engine, m: &p, header: "%s\n", engine->name);
857 err = -ETIME;
858 goto out_rq;
859 }
860
861 nop = intel_context_create_request(ce);
862 if (IS_ERR(ptr: nop))
863 goto out_rq;
864 i915_request_get(rq: nop);
865 i915_request_add(rq: nop);
866
867 i915_request_cancel(rq, error: -EINTR);
868
869 if (i915_request_wait(rq, flags: 0, HZ) < 0) {
870 struct drm_printer p = drm_info_printer(dev: engine->i915->drm.dev);
871
872 pr_err("%s: Failed to cancel hung request\n", engine->name);
873 intel_engine_dump(engine, m: &p, header: "%s\n", engine->name);
874 err = -ETIME;
875 goto out_nop;
876 }
877
878 if (rq->fence.error != -EINTR) {
879 pr_err("%s: fence not cancelled (%u)\n",
880 engine->name, rq->fence.error);
881 err = -EINVAL;
882 goto out_nop;
883 }
884
885 if (i915_request_wait(rq: nop, flags: 0, HZ) < 0) {
886 struct drm_printer p = drm_info_printer(dev: engine->i915->drm.dev);
887
888 pr_err("%s: Failed to complete nop request\n", engine->name);
889 intel_engine_dump(engine, m: &p, header: "%s\n", engine->name);
890 err = -ETIME;
891 goto out_nop;
892 }
893
894 if (nop->fence.error != 0) {
895 pr_err("%s: Nop request errored (%u)\n",
896 engine->name, nop->fence.error);
897 err = -EINVAL;
898 }
899
900out_nop:
901 i915_request_put(rq: nop);
902out_rq:
903 i915_request_put(rq);
904out_ce:
905 intel_context_put(ce);
906out_spin:
907 igt_spinner_fini(spin: &spin);
908out_restore:
909 engine->props.preempt_timeout_ms = preempt_timeout_ms;
910 if (err)
911 pr_err("%s: %s error %d\n", __func__, engine->name, err);
912 return err;
913}
914
915static int live_cancel_request(void *arg)
916{
917 struct drm_i915_private *i915 = arg;
918 struct intel_engine_cs *engine;
919
920 /*
921 * Check cancellation of requests. We expect to be able to immediately
922 * cancel active requests, even if they are currently on the GPU.
923 */
924
925 for_each_uabi_engine(engine, i915) {
926 struct igt_live_test t;
927 int err, err2;
928
929 if (!intel_engine_has_preemption(engine))
930 continue;
931
932 err = igt_live_test_begin(t: &t, i915, func: __func__, name: engine->name);
933 if (err)
934 return err;
935
936 err = __cancel_inactive(engine);
937 if (err == 0)
938 err = __cancel_active(engine);
939 if (err == 0)
940 err = __cancel_completed(engine);
941
942 err2 = igt_live_test_end(t: &t);
943 if (err)
944 return err;
945 if (err2)
946 return err2;
947
948 /* Expects reset so call outside of igt_live_test_* */
949 err = __cancel_reset(i915, engine);
950 if (err)
951 return err;
952
953 if (igt_flush_test(i915))
954 return -EIO;
955 }
956
957 return 0;
958}
959
960static struct i915_vma *empty_batch(struct intel_gt *gt)
961{
962 struct drm_i915_gem_object *obj;
963 struct i915_vma *vma;
964 u32 *cmd;
965 int err;
966
967 obj = i915_gem_object_create_internal(i915: gt->i915, PAGE_SIZE);
968 if (IS_ERR(ptr: obj))
969 return ERR_CAST(ptr: obj);
970
971 cmd = i915_gem_object_pin_map_unlocked(obj, type: I915_MAP_WC);
972 if (IS_ERR(ptr: cmd)) {
973 err = PTR_ERR(ptr: cmd);
974 goto err;
975 }
976
977 *cmd = MI_BATCH_BUFFER_END;
978
979 __i915_gem_object_flush_map(obj, offset: 0, size: 64);
980 i915_gem_object_unpin_map(obj);
981
982 intel_gt_chipset_flush(gt);
983
984 vma = i915_vma_instance(obj, vm: gt->vm, NULL);
985 if (IS_ERR(ptr: vma)) {
986 err = PTR_ERR(ptr: vma);
987 goto err;
988 }
989
990 err = i915_vma_pin(vma, size: 0, alignment: 0, PIN_USER);
991 if (err)
992 goto err;
993
994 /* Force the wait now to avoid including it in the benchmark */
995 err = i915_vma_sync(vma);
996 if (err)
997 goto err_pin;
998
999 return vma;
1000
1001err_pin:
1002 i915_vma_unpin(vma);
1003err:
1004 i915_gem_object_put(obj);
1005 return ERR_PTR(error: err);
1006}
1007
1008static int emit_bb_start(struct i915_request *rq, struct i915_vma *batch)
1009{
1010 return rq->engine->emit_bb_start(rq,
1011 i915_vma_offset(vma: batch),
1012 i915_vma_size(vma: batch),
1013 0);
1014}
1015
1016static struct i915_request *
1017empty_request(struct intel_engine_cs *engine,
1018 struct i915_vma *batch)
1019{
1020 struct i915_request *request;
1021 int err;
1022
1023 request = i915_request_create(ce: engine->kernel_context);
1024 if (IS_ERR(ptr: request))
1025 return request;
1026
1027 err = emit_bb_start(rq: request, batch);
1028 if (err)
1029 goto out_request;
1030
1031 i915_request_get(rq: request);
1032out_request:
1033 i915_request_add(rq: request);
1034 return err ? ERR_PTR(error: err) : request;
1035}
1036
1037static int live_empty_request(void *arg)
1038{
1039 struct drm_i915_private *i915 = arg;
1040 struct intel_engine_cs *engine;
1041 struct igt_live_test t;
1042 int err;
1043
1044 /*
1045 * Submit various sized batches of empty requests, to each engine
1046 * (individually), and wait for the batch to complete. We can check
1047 * the overhead of submitting requests to the hardware.
1048 */
1049
1050 for_each_uabi_engine(engine, i915) {
1051 IGT_TIMEOUT(end_time);
1052 struct i915_request *request;
1053 struct i915_vma *batch;
1054 unsigned long n, prime;
1055 ktime_t times[2] = {};
1056
1057 batch = empty_batch(gt: engine->gt);
1058 if (IS_ERR(ptr: batch))
1059 return PTR_ERR(ptr: batch);
1060
1061 err = igt_live_test_begin(t: &t, i915, func: __func__, name: engine->name);
1062 if (err)
1063 goto out_batch;
1064
1065 intel_engine_pm_get(engine);
1066
1067 /* Warmup / preload */
1068 request = empty_request(engine, batch);
1069 if (IS_ERR(ptr: request)) {
1070 err = PTR_ERR(ptr: request);
1071 intel_engine_pm_put(engine);
1072 goto out_batch;
1073 }
1074 i915_request_wait(rq: request, flags: 0, MAX_SCHEDULE_TIMEOUT);
1075
1076 for_each_prime_number_from(prime, 1, 8192) {
1077 times[1] = ktime_get_raw();
1078
1079 for (n = 0; n < prime; n++) {
1080 i915_request_put(rq: request);
1081 request = empty_request(engine, batch);
1082 if (IS_ERR(ptr: request)) {
1083 err = PTR_ERR(ptr: request);
1084 intel_engine_pm_put(engine);
1085 goto out_batch;
1086 }
1087 }
1088 i915_request_wait(rq: request, flags: 0, MAX_SCHEDULE_TIMEOUT);
1089
1090 times[1] = ktime_sub(ktime_get_raw(), times[1]);
1091 if (prime == 1)
1092 times[0] = times[1];
1093
1094 if (__igt_timeout(timeout: end_time, NULL))
1095 break;
1096 }
1097 i915_request_put(rq: request);
1098 intel_engine_pm_put(engine);
1099
1100 err = igt_live_test_end(t: &t);
1101 if (err)
1102 goto out_batch;
1103
1104 pr_info("Batch latencies on %s: 1 = %lluns, %lu = %lluns\n",
1105 engine->name,
1106 ktime_to_ns(times[0]),
1107 prime, div64_u64(ktime_to_ns(times[1]), prime));
1108out_batch:
1109 i915_vma_unpin(vma: batch);
1110 i915_vma_put(vma: batch);
1111 if (err)
1112 break;
1113 }
1114
1115 return err;
1116}
1117
1118static struct i915_vma *recursive_batch(struct intel_gt *gt)
1119{
1120 struct drm_i915_gem_object *obj;
1121 const int ver = GRAPHICS_VER(gt->i915);
1122 struct i915_vma *vma;
1123 u32 *cmd;
1124 int err;
1125
1126 obj = i915_gem_object_create_internal(i915: gt->i915, PAGE_SIZE);
1127 if (IS_ERR(ptr: obj))
1128 return ERR_CAST(ptr: obj);
1129
1130 vma = i915_vma_instance(obj, vm: gt->vm, NULL);
1131 if (IS_ERR(ptr: vma)) {
1132 err = PTR_ERR(ptr: vma);
1133 goto err;
1134 }
1135
1136 err = i915_vma_pin(vma, size: 0, alignment: 0, PIN_USER);
1137 if (err)
1138 goto err;
1139
1140 cmd = i915_gem_object_pin_map_unlocked(obj, type: I915_MAP_WC);
1141 if (IS_ERR(ptr: cmd)) {
1142 err = PTR_ERR(ptr: cmd);
1143 goto err;
1144 }
1145
1146 if (ver >= 8) {
1147 *cmd++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
1148 *cmd++ = lower_32_bits(i915_vma_offset(vma));
1149 *cmd++ = upper_32_bits(i915_vma_offset(vma));
1150 } else if (ver >= 6) {
1151 *cmd++ = MI_BATCH_BUFFER_START | 1 << 8;
1152 *cmd++ = lower_32_bits(i915_vma_offset(vma));
1153 } else {
1154 *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
1155 *cmd++ = lower_32_bits(i915_vma_offset(vma));
1156 }
1157 *cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */
1158
1159 __i915_gem_object_flush_map(obj, offset: 0, size: 64);
1160 i915_gem_object_unpin_map(obj);
1161
1162 intel_gt_chipset_flush(gt);
1163
1164 return vma;
1165
1166err:
1167 i915_gem_object_put(obj);
1168 return ERR_PTR(error: err);
1169}
1170
1171static int recursive_batch_resolve(struct i915_vma *batch)
1172{
1173 u32 *cmd;
1174
1175 cmd = i915_gem_object_pin_map_unlocked(obj: batch->obj, type: I915_MAP_WC);
1176 if (IS_ERR(ptr: cmd))
1177 return PTR_ERR(ptr: cmd);
1178
1179 *cmd = MI_BATCH_BUFFER_END;
1180
1181 __i915_gem_object_flush_map(obj: batch->obj, offset: 0, size: sizeof(*cmd));
1182 i915_gem_object_unpin_map(obj: batch->obj);
1183
1184 intel_gt_chipset_flush(gt: batch->vm->gt);
1185
1186 return 0;
1187}
1188
1189static int live_all_engines(void *arg)
1190{
1191 struct drm_i915_private *i915 = arg;
1192 const unsigned int nengines = num_uabi_engines(i915);
1193 struct intel_engine_cs *engine;
1194 struct i915_request **request;
1195 struct igt_live_test t;
1196 unsigned int idx;
1197 int err;
1198
1199 /*
1200 * Check we can submit requests to all engines simultaneously. We
1201 * send a recursive batch to each engine - checking that we don't
1202 * block doing so, and that they don't complete too soon.
1203 */
1204
1205 request = kcalloc(n: nengines, size: sizeof(*request), GFP_KERNEL);
1206 if (!request)
1207 return -ENOMEM;
1208
1209 err = igt_live_test_begin(t: &t, i915, func: __func__, name: "");
1210 if (err)
1211 goto out_free;
1212
1213 idx = 0;
1214 for_each_uabi_engine(engine, i915) {
1215 struct i915_vma *batch;
1216
1217 batch = recursive_batch(gt: engine->gt);
1218 if (IS_ERR(ptr: batch)) {
1219 err = PTR_ERR(ptr: batch);
1220 pr_err("%s: Unable to create batch, err=%d\n",
1221 __func__, err);
1222 goto out_free;
1223 }
1224
1225 i915_vma_lock(vma: batch);
1226 request[idx] = intel_engine_create_kernel_request(engine);
1227 if (IS_ERR(ptr: request[idx])) {
1228 err = PTR_ERR(ptr: request[idx]);
1229 pr_err("%s: Request allocation failed with err=%d\n",
1230 __func__, err);
1231 goto out_unlock;
1232 }
1233 GEM_BUG_ON(request[idx]->context->vm != batch->vm);
1234
1235 err = i915_vma_move_to_active(vma: batch, rq: request[idx], flags: 0);
1236 GEM_BUG_ON(err);
1237
1238 err = emit_bb_start(rq: request[idx], batch);
1239 GEM_BUG_ON(err);
1240 request[idx]->batch = batch;
1241
1242 i915_request_get(rq: request[idx]);
1243 i915_request_add(rq: request[idx]);
1244 idx++;
1245out_unlock:
1246 i915_vma_unlock(vma: batch);
1247 if (err)
1248 goto out_request;
1249 }
1250
1251 idx = 0;
1252 for_each_uabi_engine(engine, i915) {
1253 if (i915_request_completed(rq: request[idx])) {
1254 pr_err("%s(%s): request completed too early!\n",
1255 __func__, engine->name);
1256 err = -EINVAL;
1257 goto out_request;
1258 }
1259 idx++;
1260 }
1261
1262 idx = 0;
1263 for_each_uabi_engine(engine, i915) {
1264 err = recursive_batch_resolve(batch: request[idx]->batch);
1265 if (err) {
1266 pr_err("%s: failed to resolve batch, err=%d\n",
1267 __func__, err);
1268 goto out_request;
1269 }
1270 idx++;
1271 }
1272
1273 idx = 0;
1274 for_each_uabi_engine(engine, i915) {
1275 struct i915_request *rq = request[idx];
1276 long timeout;
1277
1278 timeout = i915_request_wait(rq, flags: 0,
1279 MAX_SCHEDULE_TIMEOUT);
1280 if (timeout < 0) {
1281 err = timeout;
1282 pr_err("%s: error waiting for request on %s, err=%d\n",
1283 __func__, engine->name, err);
1284 goto out_request;
1285 }
1286
1287 GEM_BUG_ON(!i915_request_completed(rq));
1288 i915_vma_unpin(vma: rq->batch);
1289 i915_vma_put(vma: rq->batch);
1290 i915_request_put(rq);
1291 request[idx] = NULL;
1292 idx++;
1293 }
1294
1295 err = igt_live_test_end(t: &t);
1296
1297out_request:
1298 idx = 0;
1299 for_each_uabi_engine(engine, i915) {
1300 struct i915_request *rq = request[idx];
1301
1302 if (!rq)
1303 continue;
1304
1305 if (rq->batch) {
1306 i915_vma_unpin(vma: rq->batch);
1307 i915_vma_put(vma: rq->batch);
1308 }
1309 i915_request_put(rq);
1310 idx++;
1311 }
1312out_free:
1313 kfree(objp: request);
1314 return err;
1315}
1316
1317static int live_sequential_engines(void *arg)
1318{
1319 struct drm_i915_private *i915 = arg;
1320 const unsigned int nengines = num_uabi_engines(i915);
1321 struct i915_request **request;
1322 struct i915_request *prev = NULL;
1323 struct intel_engine_cs *engine;
1324 struct igt_live_test t;
1325 unsigned int idx;
1326 int err;
1327
1328 /*
1329 * Check we can submit requests to all engines sequentially, such
1330 * that each successive request waits for the earlier ones. This
1331 * tests that we don't execute requests out of order, even though
1332 * they are running on independent engines.
1333 */
1334
1335 request = kcalloc(n: nengines, size: sizeof(*request), GFP_KERNEL);
1336 if (!request)
1337 return -ENOMEM;
1338
1339 err = igt_live_test_begin(t: &t, i915, func: __func__, name: "");
1340 if (err)
1341 goto out_free;
1342
1343 idx = 0;
1344 for_each_uabi_engine(engine, i915) {
1345 struct i915_vma *batch;
1346
1347 batch = recursive_batch(gt: engine->gt);
1348 if (IS_ERR(ptr: batch)) {
1349 err = PTR_ERR(ptr: batch);
1350 pr_err("%s: Unable to create batch for %s, err=%d\n",
1351 __func__, engine->name, err);
1352 goto out_free;
1353 }
1354
1355 i915_vma_lock(vma: batch);
1356 request[idx] = intel_engine_create_kernel_request(engine);
1357 if (IS_ERR(ptr: request[idx])) {
1358 err = PTR_ERR(ptr: request[idx]);
1359 pr_err("%s: Request allocation failed for %s with err=%d\n",
1360 __func__, engine->name, err);
1361 goto out_unlock;
1362 }
1363 GEM_BUG_ON(request[idx]->context->vm != batch->vm);
1364
1365 if (prev) {
1366 err = i915_request_await_dma_fence(rq: request[idx],
1367 fence: &prev->fence);
1368 if (err) {
1369 i915_request_add(rq: request[idx]);
1370 pr_err("%s: Request await failed for %s with err=%d\n",
1371 __func__, engine->name, err);
1372 goto out_unlock;
1373 }
1374 }
1375
1376 err = i915_vma_move_to_active(vma: batch, rq: request[idx], flags: 0);
1377 GEM_BUG_ON(err);
1378
1379 err = emit_bb_start(rq: request[idx], batch);
1380 GEM_BUG_ON(err);
1381 request[idx]->batch = batch;
1382
1383 i915_request_get(rq: request[idx]);
1384 i915_request_add(rq: request[idx]);
1385
1386 prev = request[idx];
1387 idx++;
1388
1389out_unlock:
1390 i915_vma_unlock(vma: batch);
1391 if (err)
1392 goto out_request;
1393 }
1394
1395 idx = 0;
1396 for_each_uabi_engine(engine, i915) {
1397 long timeout;
1398
1399 if (i915_request_completed(rq: request[idx])) {
1400 pr_err("%s(%s): request completed too early!\n",
1401 __func__, engine->name);
1402 err = -EINVAL;
1403 goto out_request;
1404 }
1405
1406 err = recursive_batch_resolve(batch: request[idx]->batch);
1407 if (err) {
1408 pr_err("%s: failed to resolve batch, err=%d\n",
1409 __func__, err);
1410 goto out_request;
1411 }
1412
1413 timeout = i915_request_wait(rq: request[idx], flags: 0,
1414 MAX_SCHEDULE_TIMEOUT);
1415 if (timeout < 0) {
1416 err = timeout;
1417 pr_err("%s: error waiting for request on %s, err=%d\n",
1418 __func__, engine->name, err);
1419 goto out_request;
1420 }
1421
1422 GEM_BUG_ON(!i915_request_completed(request[idx]));
1423 idx++;
1424 }
1425
1426 err = igt_live_test_end(t: &t);
1427
1428out_request:
1429 idx = 0;
1430 for_each_uabi_engine(engine, i915) {
1431 u32 *cmd;
1432
1433 if (!request[idx])
1434 break;
1435
1436 cmd = i915_gem_object_pin_map_unlocked(obj: request[idx]->batch->obj,
1437 type: I915_MAP_WC);
1438 if (!IS_ERR(ptr: cmd)) {
1439 *cmd = MI_BATCH_BUFFER_END;
1440
1441 __i915_gem_object_flush_map(obj: request[idx]->batch->obj,
1442 offset: 0, size: sizeof(*cmd));
1443 i915_gem_object_unpin_map(obj: request[idx]->batch->obj);
1444
1445 intel_gt_chipset_flush(gt: engine->gt);
1446 }
1447
1448 i915_vma_put(vma: request[idx]->batch);
1449 i915_request_put(rq: request[idx]);
1450 idx++;
1451 }
1452out_free:
1453 kfree(objp: request);
1454 return err;
1455}
1456
1457struct parallel_thread {
1458 struct kthread_worker *worker;
1459 struct kthread_work work;
1460 struct intel_engine_cs *engine;
1461 int result;
1462};
1463
1464static void __live_parallel_engine1(struct kthread_work *work)
1465{
1466 struct parallel_thread *thread =
1467 container_of(work, typeof(*thread), work);
1468 struct intel_engine_cs *engine = thread->engine;
1469 IGT_TIMEOUT(end_time);
1470 unsigned long count;
1471 int err = 0;
1472
1473 count = 0;
1474 intel_engine_pm_get(engine);
1475 do {
1476 struct i915_request *rq;
1477
1478 rq = i915_request_create(ce: engine->kernel_context);
1479 if (IS_ERR(ptr: rq)) {
1480 err = PTR_ERR(ptr: rq);
1481 break;
1482 }
1483
1484 i915_request_get(rq);
1485 i915_request_add(rq);
1486
1487 err = 0;
1488 if (i915_request_wait(rq, flags: 0, HZ) < 0)
1489 err = -ETIME;
1490 i915_request_put(rq);
1491 if (err)
1492 break;
1493
1494 count++;
1495 } while (!__igt_timeout(timeout: end_time, NULL));
1496 intel_engine_pm_put(engine);
1497
1498 pr_info("%s: %lu request + sync\n", engine->name, count);
1499 thread->result = err;
1500}
1501
1502static void __live_parallel_engineN(struct kthread_work *work)
1503{
1504 struct parallel_thread *thread =
1505 container_of(work, typeof(*thread), work);
1506 struct intel_engine_cs *engine = thread->engine;
1507 IGT_TIMEOUT(end_time);
1508 unsigned long count;
1509 int err = 0;
1510
1511 count = 0;
1512 intel_engine_pm_get(engine);
1513 do {
1514 struct i915_request *rq;
1515
1516 rq = i915_request_create(ce: engine->kernel_context);
1517 if (IS_ERR(ptr: rq)) {
1518 err = PTR_ERR(ptr: rq);
1519 break;
1520 }
1521
1522 i915_request_add(rq);
1523 count++;
1524 } while (!__igt_timeout(timeout: end_time, NULL));
1525 intel_engine_pm_put(engine);
1526
1527 pr_info("%s: %lu requests\n", engine->name, count);
1528 thread->result = err;
1529}
1530
1531static bool wake_all(struct drm_i915_private *i915)
1532{
1533 if (atomic_dec_and_test(v: &i915->selftest.counter)) {
1534 wake_up_var(var: &i915->selftest.counter);
1535 return true;
1536 }
1537
1538 return false;
1539}
1540
1541static int wait_for_all(struct drm_i915_private *i915)
1542{
1543 if (wake_all(i915))
1544 return 0;
1545
1546 if (wait_var_event_timeout(&i915->selftest.counter,
1547 !atomic_read(&i915->selftest.counter),
1548 i915_selftest.timeout_jiffies))
1549 return 0;
1550
1551 return -ETIME;
1552}
1553
1554static void __live_parallel_spin(struct kthread_work *work)
1555{
1556 struct parallel_thread *thread =
1557 container_of(work, typeof(*thread), work);
1558 struct intel_engine_cs *engine = thread->engine;
1559 struct igt_spinner spin;
1560 struct i915_request *rq;
1561 int err = 0;
1562
1563 /*
1564 * Create a spinner running for eternity on each engine. If a second
1565 * spinner is incorrectly placed on the same engine, it will not be
1566 * able to start in time.
1567 */
1568
1569 if (igt_spinner_init(spin: &spin, gt: engine->gt)) {
1570 wake_all(i915: engine->i915);
1571 thread->result = -ENOMEM;
1572 return;
1573 }
1574
1575 intel_engine_pm_get(engine);
1576 rq = igt_spinner_create_request(spin: &spin,
1577 ce: engine->kernel_context,
1578 MI_NOOP); /* no preemption */
1579 intel_engine_pm_put(engine);
1580 if (IS_ERR(ptr: rq)) {
1581 err = PTR_ERR(ptr: rq);
1582 if (err == -ENODEV)
1583 err = 0;
1584 wake_all(i915: engine->i915);
1585 goto out_spin;
1586 }
1587
1588 i915_request_get(rq);
1589 i915_request_add(rq);
1590 if (igt_wait_for_spinner(spin: &spin, rq)) {
1591 /* Occupy this engine for the whole test */
1592 err = wait_for_all(i915: engine->i915);
1593 } else {
1594 pr_err("Failed to start spinner on %s\n", engine->name);
1595 err = -EINVAL;
1596 }
1597 igt_spinner_end(spin: &spin);
1598
1599 if (err == 0 && i915_request_wait(rq, flags: 0, HZ) < 0)
1600 err = -EIO;
1601 i915_request_put(rq);
1602
1603out_spin:
1604 igt_spinner_fini(spin: &spin);
1605 thread->result = err;
1606}
1607
1608static int live_parallel_engines(void *arg)
1609{
1610 struct drm_i915_private *i915 = arg;
1611 static void (* const func[])(struct kthread_work *) = {
1612 __live_parallel_engine1,
1613 __live_parallel_engineN,
1614 __live_parallel_spin,
1615 NULL,
1616 };
1617 const unsigned int nengines = num_uabi_engines(i915);
1618 struct parallel_thread *threads;
1619 struct intel_engine_cs *engine;
1620 void (* const *fn)(struct kthread_work *);
1621 int err = 0;
1622
1623 /*
1624 * Check we can submit requests to all engines concurrently. This
1625 * tests that we load up the system maximally.
1626 */
1627
1628 threads = kcalloc(n: nengines, size: sizeof(*threads), GFP_KERNEL);
1629 if (!threads)
1630 return -ENOMEM;
1631
1632 for (fn = func; !err && *fn; fn++) {
1633 char name[KSYM_NAME_LEN];
1634 struct igt_live_test t;
1635 unsigned int idx;
1636
1637 snprintf(buf: name, size: sizeof(name), fmt: "%ps", *fn);
1638 err = igt_live_test_begin(t: &t, i915, func: __func__, name);
1639 if (err)
1640 break;
1641
1642 atomic_set(v: &i915->selftest.counter, i: nengines);
1643
1644 idx = 0;
1645 for_each_uabi_engine(engine, i915) {
1646 struct kthread_worker *worker;
1647
1648 worker = kthread_create_worker(flags: 0, namefmt: "igt/parallel:%s",
1649 engine->name);
1650 if (IS_ERR(ptr: worker)) {
1651 err = PTR_ERR(ptr: worker);
1652 break;
1653 }
1654
1655 threads[idx].worker = worker;
1656 threads[idx].result = 0;
1657 threads[idx].engine = engine;
1658
1659 kthread_init_work(&threads[idx].work, *fn);
1660 kthread_queue_work(worker, work: &threads[idx].work);
1661 idx++;
1662 }
1663
1664 idx = 0;
1665 for_each_uabi_engine(engine, i915) {
1666 int status;
1667
1668 if (!threads[idx].worker)
1669 break;
1670
1671 kthread_flush_work(work: &threads[idx].work);
1672 status = READ_ONCE(threads[idx].result);
1673 if (status && !err)
1674 err = status;
1675
1676 kthread_destroy_worker(worker: threads[idx++].worker);
1677 }
1678
1679 if (igt_live_test_end(t: &t))
1680 err = -EIO;
1681 }
1682
1683 kfree(objp: threads);
1684 return err;
1685}
1686
1687static int
1688max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
1689{
1690 struct i915_request *rq;
1691 int ret;
1692
1693 /*
1694 * Before execlists, all contexts share the same ringbuffer. With
1695 * execlists, each context/engine has a separate ringbuffer and
1696 * for the purposes of this test, inexhaustible.
1697 *
1698 * For the global ringbuffer though, we have to be very careful
1699 * that we do not wrap while preventing the execution of requests
1700 * with a unsignaled fence.
1701 */
1702 if (HAS_EXECLISTS(ctx->i915))
1703 return INT_MAX;
1704
1705 rq = igt_request_alloc(ctx, engine);
1706 if (IS_ERR(ptr: rq)) {
1707 ret = PTR_ERR(ptr: rq);
1708 } else {
1709 int sz;
1710
1711 ret = rq->ring->size - rq->reserved_space;
1712 i915_request_add(rq);
1713
1714 sz = rq->ring->emit - rq->head;
1715 if (sz < 0)
1716 sz += rq->ring->size;
1717 ret /= sz;
1718 ret /= 2; /* leave half spare, in case of emergency! */
1719 }
1720
1721 return ret;
1722}
1723
1724static int live_breadcrumbs_smoketest(void *arg)
1725{
1726 struct drm_i915_private *i915 = arg;
1727 const unsigned int nengines = num_uabi_engines(i915);
1728 const unsigned int ncpus = /* saturate with nengines * ncpus */
1729 max_t(int, 2, DIV_ROUND_UP(num_online_cpus(), nengines));
1730 unsigned long num_waits, num_fences;
1731 struct intel_engine_cs *engine;
1732 struct smoke_thread *threads;
1733 struct igt_live_test live;
1734 intel_wakeref_t wakeref;
1735 struct smoketest *smoke;
1736 unsigned int n, idx;
1737 struct file *file;
1738 int ret = 0;
1739
1740 /*
1741 * Smoketest our breadcrumb/signal handling for requests across multiple
1742 * threads. A very simple test to only catch the most egregious of bugs.
1743 * See __igt_breadcrumbs_smoketest();
1744 *
1745 * On real hardware this time.
1746 */
1747
1748 wakeref = intel_runtime_pm_get(rpm: &i915->runtime_pm);
1749
1750 file = mock_file(i915);
1751 if (IS_ERR(ptr: file)) {
1752 ret = PTR_ERR(ptr: file);
1753 goto out_rpm;
1754 }
1755
1756 smoke = kcalloc(n: nengines, size: sizeof(*smoke), GFP_KERNEL);
1757 if (!smoke) {
1758 ret = -ENOMEM;
1759 goto out_file;
1760 }
1761
1762 threads = kcalloc(n: ncpus * nengines, size: sizeof(*threads), GFP_KERNEL);
1763 if (!threads) {
1764 ret = -ENOMEM;
1765 goto out_smoke;
1766 }
1767
1768 smoke[0].request_alloc = __live_request_alloc;
1769 smoke[0].ncontexts = 64;
1770 smoke[0].contexts = kcalloc(n: smoke[0].ncontexts,
1771 size: sizeof(*smoke[0].contexts),
1772 GFP_KERNEL);
1773 if (!smoke[0].contexts) {
1774 ret = -ENOMEM;
1775 goto out_threads;
1776 }
1777
1778 for (n = 0; n < smoke[0].ncontexts; n++) {
1779 smoke[0].contexts[n] = live_context(i915, file);
1780 if (IS_ERR(ptr: smoke[0].contexts[n])) {
1781 ret = PTR_ERR(ptr: smoke[0].contexts[n]);
1782 goto out_contexts;
1783 }
1784 }
1785
1786 ret = igt_live_test_begin(t: &live, i915, func: __func__, name: "");
1787 if (ret)
1788 goto out_contexts;
1789
1790 idx = 0;
1791 for_each_uabi_engine(engine, i915) {
1792 smoke[idx] = smoke[0];
1793 smoke[idx].engine = engine;
1794 smoke[idx].max_batch =
1795 max_batches(ctx: smoke[0].contexts[0], engine);
1796 if (smoke[idx].max_batch < 0) {
1797 ret = smoke[idx].max_batch;
1798 goto out_flush;
1799 }
1800 /* One ring interleaved between requests from all cpus */
1801 smoke[idx].max_batch /= ncpus + 1;
1802 pr_debug("Limiting batches to %d requests on %s\n",
1803 smoke[idx].max_batch, engine->name);
1804
1805 for (n = 0; n < ncpus; n++) {
1806 unsigned int i = idx * ncpus + n;
1807 struct kthread_worker *worker;
1808
1809 worker = kthread_create_worker(flags: 0, namefmt: "igt/%d.%d", idx, n);
1810 if (IS_ERR(ptr: worker)) {
1811 ret = PTR_ERR(ptr: worker);
1812 goto out_flush;
1813 }
1814
1815 threads[i].worker = worker;
1816 threads[i].t = &smoke[idx];
1817
1818 kthread_init_work(&threads[i].work,
1819 __igt_breadcrumbs_smoketest);
1820 kthread_queue_work(worker, work: &threads[i].work);
1821 }
1822
1823 idx++;
1824 }
1825
1826 msleep(msecs: jiffies_to_msecs(j: i915_selftest.timeout_jiffies));
1827
1828out_flush:
1829 idx = 0;
1830 num_waits = 0;
1831 num_fences = 0;
1832 for_each_uabi_engine(engine, i915) {
1833 for (n = 0; n < ncpus; n++) {
1834 unsigned int i = idx * ncpus + n;
1835 int err;
1836
1837 if (!threads[i].worker)
1838 continue;
1839
1840 WRITE_ONCE(threads[i].stop, true);
1841 kthread_flush_work(work: &threads[i].work);
1842 err = READ_ONCE(threads[i].result);
1843 if (err < 0 && !ret)
1844 ret = err;
1845
1846 kthread_destroy_worker(worker: threads[i].worker);
1847 }
1848
1849 num_waits += atomic_long_read(v: &smoke[idx].num_waits);
1850 num_fences += atomic_long_read(v: &smoke[idx].num_fences);
1851 idx++;
1852 }
1853 pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n",
1854 num_waits, num_fences, idx, ncpus);
1855
1856 ret = igt_live_test_end(t: &live) ?: ret;
1857out_contexts:
1858 kfree(objp: smoke[0].contexts);
1859out_threads:
1860 kfree(objp: threads);
1861out_smoke:
1862 kfree(objp: smoke);
1863out_file:
1864 fput(file);
1865out_rpm:
1866 intel_runtime_pm_put(rpm: &i915->runtime_pm, wref: wakeref);
1867
1868 return ret;
1869}
1870
1871int i915_request_live_selftests(struct drm_i915_private *i915)
1872{
1873 static const struct i915_subtest tests[] = {
1874 SUBTEST(live_nop_request),
1875 SUBTEST(live_all_engines),
1876 SUBTEST(live_sequential_engines),
1877 SUBTEST(live_parallel_engines),
1878 SUBTEST(live_empty_request),
1879 SUBTEST(live_cancel_request),
1880 SUBTEST(live_breadcrumbs_smoketest),
1881 };
1882
1883 if (intel_gt_is_wedged(gt: to_gt(i915)))
1884 return 0;
1885
1886 return i915_live_subtests(tests, i915);
1887}
1888
1889static int switch_to_kernel_sync(struct intel_context *ce, int err)
1890{
1891 struct i915_request *rq;
1892 struct dma_fence *fence;
1893
1894 rq = intel_engine_create_kernel_request(engine: ce->engine);
1895 if (IS_ERR(ptr: rq))
1896 return PTR_ERR(ptr: rq);
1897
1898 fence = i915_active_fence_get(active: &ce->timeline->last_request);
1899 if (fence) {
1900 i915_request_await_dma_fence(rq, fence);
1901 dma_fence_put(fence);
1902 }
1903
1904 rq = i915_request_get(rq);
1905 i915_request_add(rq);
1906 if (i915_request_wait(rq, flags: 0, HZ / 2) < 0 && !err)
1907 err = -ETIME;
1908 i915_request_put(rq);
1909
1910 while (!err && !intel_engine_is_idle(engine: ce->engine))
1911 intel_engine_flush_submission(engine: ce->engine);
1912
1913 return err;
1914}
1915
1916struct perf_stats {
1917 struct intel_engine_cs *engine;
1918 unsigned long count;
1919 ktime_t time;
1920 ktime_t busy;
1921 u64 runtime;
1922};
1923
1924struct perf_series {
1925 struct drm_i915_private *i915;
1926 unsigned int nengines;
1927 struct intel_context *ce[] __counted_by(nengines);
1928};
1929
1930static int cmp_u32(const void *A, const void *B)
1931{
1932 const u32 *a = A, *b = B;
1933
1934 return *a - *b;
1935}
1936
1937static u32 trifilter(u32 *a)
1938{
1939 u64 sum;
1940
1941#define TF_COUNT 5
1942 sort(base: a, TF_COUNT, size: sizeof(*a), cmp_func: cmp_u32, NULL);
1943
1944 sum = mul_u32_u32(a: a[2], b: 2);
1945 sum += a[1];
1946 sum += a[3];
1947
1948 GEM_BUG_ON(sum > U32_MAX);
1949 return sum;
1950#define TF_BIAS 2
1951}
1952
1953static u64 cycles_to_ns(struct intel_engine_cs *engine, u32 cycles)
1954{
1955 u64 ns = intel_gt_clock_interval_to_ns(gt: engine->gt, count: cycles);
1956
1957 return DIV_ROUND_CLOSEST(ns, 1 << TF_BIAS);
1958}
1959
1960static u32 *emit_timestamp_store(u32 *cs, struct intel_context *ce, u32 offset)
1961{
1962 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
1963 *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP((ce->engine->mmio_base)));
1964 *cs++ = offset;
1965 *cs++ = 0;
1966
1967 return cs;
1968}
1969
1970static u32 *emit_store_dw(u32 *cs, u32 offset, u32 value)
1971{
1972 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1973 *cs++ = offset;
1974 *cs++ = 0;
1975 *cs++ = value;
1976
1977 return cs;
1978}
1979
1980static u32 *emit_semaphore_poll(u32 *cs, u32 mode, u32 value, u32 offset)
1981{
1982 *cs++ = MI_SEMAPHORE_WAIT |
1983 MI_SEMAPHORE_GLOBAL_GTT |
1984 MI_SEMAPHORE_POLL |
1985 mode;
1986 *cs++ = value;
1987 *cs++ = offset;
1988 *cs++ = 0;
1989
1990 return cs;
1991}
1992
1993static u32 *emit_semaphore_poll_until(u32 *cs, u32 offset, u32 value)
1994{
1995 return emit_semaphore_poll(cs, MI_SEMAPHORE_SAD_EQ_SDD, value, offset);
1996}
1997
1998static void semaphore_set(u32 *sema, u32 value)
1999{
2000 WRITE_ONCE(*sema, value);
2001 wmb(); /* flush the update to the cache, and beyond */
2002}
2003
2004static u32 *hwsp_scratch(const struct intel_context *ce)
2005{
2006 return memset32(s: ce->engine->status_page.addr + 1000, v: 0, n: 21);
2007}
2008
2009static u32 hwsp_offset(const struct intel_context *ce, u32 *dw)
2010{
2011 return (i915_ggtt_offset(vma: ce->engine->status_page.vma) +
2012 offset_in_page(dw));
2013}
2014
2015static int measure_semaphore_response(struct intel_context *ce)
2016{
2017 u32 *sema = hwsp_scratch(ce);
2018 const u32 offset = hwsp_offset(ce, dw: sema);
2019 u32 elapsed[TF_COUNT], cycles;
2020 struct i915_request *rq;
2021 u32 *cs;
2022 int err;
2023 int i;
2024
2025 /*
2026 * Measure how many cycles it takes for the HW to detect the change
2027 * in a semaphore value.
2028 *
2029 * A: read CS_TIMESTAMP from CPU
2030 * poke semaphore
2031 * B: read CS_TIMESTAMP on GPU
2032 *
2033 * Semaphore latency: B - A
2034 */
2035
2036 semaphore_set(sema, value: -1);
2037
2038 rq = i915_request_create(ce);
2039 if (IS_ERR(ptr: rq))
2040 return PTR_ERR(ptr: rq);
2041
2042 cs = intel_ring_begin(rq, num_dwords: 4 + 12 * ARRAY_SIZE(elapsed));
2043 if (IS_ERR(ptr: cs)) {
2044 i915_request_add(rq);
2045 err = PTR_ERR(ptr: cs);
2046 goto err;
2047 }
2048
2049 cs = emit_store_dw(cs, offset, value: 0);
2050 for (i = 1; i <= ARRAY_SIZE(elapsed); i++) {
2051 cs = emit_semaphore_poll_until(cs, offset, value: i);
2052 cs = emit_timestamp_store(cs, ce, offset: offset + i * sizeof(u32));
2053 cs = emit_store_dw(cs, offset, value: 0);
2054 }
2055
2056 intel_ring_advance(rq, cs);
2057 i915_request_add(rq);
2058
2059 if (wait_for(READ_ONCE(*sema) == 0, 50)) {
2060 err = -EIO;
2061 goto err;
2062 }
2063
2064 for (i = 1; i <= ARRAY_SIZE(elapsed); i++) {
2065 preempt_disable();
2066 cycles = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP);
2067 semaphore_set(sema, value: i);
2068 preempt_enable();
2069
2070 if (wait_for(READ_ONCE(*sema) == 0, 50)) {
2071 err = -EIO;
2072 goto err;
2073 }
2074
2075 elapsed[i - 1] = sema[i] - cycles;
2076 }
2077
2078 cycles = trifilter(a: elapsed);
2079 pr_info("%s: semaphore response %d cycles, %lluns\n",
2080 ce->engine->name, cycles >> TF_BIAS,
2081 cycles_to_ns(ce->engine, cycles));
2082
2083 return intel_gt_wait_for_idle(gt: ce->engine->gt, HZ);
2084
2085err:
2086 intel_gt_set_wedged(gt: ce->engine->gt);
2087 return err;
2088}
2089
2090static int measure_idle_dispatch(struct intel_context *ce)
2091{
2092 u32 *sema = hwsp_scratch(ce);
2093 const u32 offset = hwsp_offset(ce, dw: sema);
2094 u32 elapsed[TF_COUNT], cycles;
2095 u32 *cs;
2096 int err;
2097 int i;
2098
2099 /*
2100 * Measure how long it takes for us to submit a request while the
2101 * engine is idle, but is resting in our context.
2102 *
2103 * A: read CS_TIMESTAMP from CPU
2104 * submit request
2105 * B: read CS_TIMESTAMP on GPU
2106 *
2107 * Submission latency: B - A
2108 */
2109
2110 for (i = 0; i < ARRAY_SIZE(elapsed); i++) {
2111 struct i915_request *rq;
2112
2113 err = intel_gt_wait_for_idle(gt: ce->engine->gt, HZ / 2);
2114 if (err)
2115 return err;
2116
2117 rq = i915_request_create(ce);
2118 if (IS_ERR(ptr: rq)) {
2119 err = PTR_ERR(ptr: rq);
2120 goto err;
2121 }
2122
2123 cs = intel_ring_begin(rq, num_dwords: 4);
2124 if (IS_ERR(ptr: cs)) {
2125 i915_request_add(rq);
2126 err = PTR_ERR(ptr: cs);
2127 goto err;
2128 }
2129
2130 cs = emit_timestamp_store(cs, ce, offset: offset + i * sizeof(u32));
2131
2132 intel_ring_advance(rq, cs);
2133
2134 preempt_disable();
2135 local_bh_disable();
2136 elapsed[i] = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP);
2137 i915_request_add(rq);
2138 local_bh_enable();
2139 preempt_enable();
2140 }
2141
2142 err = intel_gt_wait_for_idle(gt: ce->engine->gt, HZ / 2);
2143 if (err)
2144 goto err;
2145
2146 for (i = 0; i < ARRAY_SIZE(elapsed); i++)
2147 elapsed[i] = sema[i] - elapsed[i];
2148
2149 cycles = trifilter(a: elapsed);
2150 pr_info("%s: idle dispatch latency %d cycles, %lluns\n",
2151 ce->engine->name, cycles >> TF_BIAS,
2152 cycles_to_ns(ce->engine, cycles));
2153
2154 return intel_gt_wait_for_idle(gt: ce->engine->gt, HZ);
2155
2156err:
2157 intel_gt_set_wedged(gt: ce->engine->gt);
2158 return err;
2159}
2160
2161static int measure_busy_dispatch(struct intel_context *ce)
2162{
2163 u32 *sema = hwsp_scratch(ce);
2164 const u32 offset = hwsp_offset(ce, dw: sema);
2165 u32 elapsed[TF_COUNT + 1], cycles;
2166 u32 *cs;
2167 int err;
2168 int i;
2169
2170 /*
2171 * Measure how long it takes for us to submit a request while the
2172 * engine is busy, polling on a semaphore in our context. With
2173 * direct submission, this will include the cost of a lite restore.
2174 *
2175 * A: read CS_TIMESTAMP from CPU
2176 * submit request
2177 * B: read CS_TIMESTAMP on GPU
2178 *
2179 * Submission latency: B - A
2180 */
2181
2182 for (i = 1; i <= ARRAY_SIZE(elapsed); i++) {
2183 struct i915_request *rq;
2184
2185 rq = i915_request_create(ce);
2186 if (IS_ERR(ptr: rq)) {
2187 err = PTR_ERR(ptr: rq);
2188 goto err;
2189 }
2190
2191 cs = intel_ring_begin(rq, num_dwords: 12);
2192 if (IS_ERR(ptr: cs)) {
2193 i915_request_add(rq);
2194 err = PTR_ERR(ptr: cs);
2195 goto err;
2196 }
2197
2198 cs = emit_store_dw(cs, offset: offset + i * sizeof(u32), value: -1);
2199 cs = emit_semaphore_poll_until(cs, offset, value: i);
2200 cs = emit_timestamp_store(cs, ce, offset: offset + i * sizeof(u32));
2201
2202 intel_ring_advance(rq, cs);
2203
2204 if (i > 1 && wait_for(READ_ONCE(sema[i - 1]), 500)) {
2205 err = -EIO;
2206 goto err;
2207 }
2208
2209 preempt_disable();
2210 local_bh_disable();
2211 elapsed[i - 1] = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP);
2212 i915_request_add(rq);
2213 local_bh_enable();
2214 semaphore_set(sema, value: i - 1);
2215 preempt_enable();
2216 }
2217
2218 wait_for(READ_ONCE(sema[i - 1]), 500);
2219 semaphore_set(sema, value: i - 1);
2220
2221 for (i = 1; i <= TF_COUNT; i++) {
2222 GEM_BUG_ON(sema[i] == -1);
2223 elapsed[i - 1] = sema[i] - elapsed[i];
2224 }
2225
2226 cycles = trifilter(a: elapsed);
2227 pr_info("%s: busy dispatch latency %d cycles, %lluns\n",
2228 ce->engine->name, cycles >> TF_BIAS,
2229 cycles_to_ns(ce->engine, cycles));
2230
2231 return intel_gt_wait_for_idle(gt: ce->engine->gt, HZ);
2232
2233err:
2234 intel_gt_set_wedged(gt: ce->engine->gt);
2235 return err;
2236}
2237
2238static int plug(struct intel_engine_cs *engine, u32 *sema, u32 mode, int value)
2239{
2240 const u32 offset =
2241 i915_ggtt_offset(vma: engine->status_page.vma) +
2242 offset_in_page(sema);
2243 struct i915_request *rq;
2244 u32 *cs;
2245
2246 rq = i915_request_create(ce: engine->kernel_context);
2247 if (IS_ERR(ptr: rq))
2248 return PTR_ERR(ptr: rq);
2249
2250 cs = intel_ring_begin(rq, num_dwords: 4);
2251 if (IS_ERR(ptr: cs)) {
2252 i915_request_add(rq);
2253 return PTR_ERR(ptr: cs);
2254 }
2255
2256 cs = emit_semaphore_poll(cs, mode, value, offset);
2257
2258 intel_ring_advance(rq, cs);
2259 i915_request_add(rq);
2260
2261 return 0;
2262}
2263
2264static int measure_inter_request(struct intel_context *ce)
2265{
2266 u32 *sema = hwsp_scratch(ce);
2267 const u32 offset = hwsp_offset(ce, dw: sema);
2268 u32 elapsed[TF_COUNT + 1], cycles;
2269 struct i915_sw_fence *submit;
2270 int i, err;
2271
2272 /*
2273 * Measure how long it takes to advance from one request into the
2274 * next. Between each request we flush the GPU caches to memory,
2275 * update the breadcrumbs, and then invalidate those caches.
2276 * We queue up all the requests to be submitted in one batch so
2277 * it should be one set of contiguous measurements.
2278 *
2279 * A: read CS_TIMESTAMP on GPU
2280 * advance request
2281 * B: read CS_TIMESTAMP on GPU
2282 *
2283 * Request latency: B - A
2284 */
2285
2286 err = plug(engine: ce->engine, sema, MI_SEMAPHORE_SAD_NEQ_SDD, value: 0);
2287 if (err)
2288 return err;
2289
2290 submit = heap_fence_create(GFP_KERNEL);
2291 if (!submit) {
2292 semaphore_set(sema, value: 1);
2293 return -ENOMEM;
2294 }
2295
2296 intel_engine_flush_submission(engine: ce->engine);
2297 for (i = 1; i <= ARRAY_SIZE(elapsed); i++) {
2298 struct i915_request *rq;
2299 u32 *cs;
2300
2301 rq = i915_request_create(ce);
2302 if (IS_ERR(ptr: rq)) {
2303 err = PTR_ERR(ptr: rq);
2304 goto err_submit;
2305 }
2306
2307 err = i915_sw_fence_await_sw_fence_gfp(fence: &rq->submit,
2308 after: submit,
2309 GFP_KERNEL);
2310 if (err < 0) {
2311 i915_request_add(rq);
2312 goto err_submit;
2313 }
2314
2315 cs = intel_ring_begin(rq, num_dwords: 4);
2316 if (IS_ERR(ptr: cs)) {
2317 i915_request_add(rq);
2318 err = PTR_ERR(ptr: cs);
2319 goto err_submit;
2320 }
2321
2322 cs = emit_timestamp_store(cs, ce, offset: offset + i * sizeof(u32));
2323
2324 intel_ring_advance(rq, cs);
2325 i915_request_add(rq);
2326 }
2327 i915_sw_fence_commit(fence: submit);
2328 intel_engine_flush_submission(engine: ce->engine);
2329 heap_fence_put(fence: submit);
2330
2331 semaphore_set(sema, value: 1);
2332 err = intel_gt_wait_for_idle(gt: ce->engine->gt, HZ / 2);
2333 if (err)
2334 goto err;
2335
2336 for (i = 1; i <= TF_COUNT; i++)
2337 elapsed[i - 1] = sema[i + 1] - sema[i];
2338
2339 cycles = trifilter(a: elapsed);
2340 pr_info("%s: inter-request latency %d cycles, %lluns\n",
2341 ce->engine->name, cycles >> TF_BIAS,
2342 cycles_to_ns(ce->engine, cycles));
2343
2344 return intel_gt_wait_for_idle(gt: ce->engine->gt, HZ);
2345
2346err_submit:
2347 i915_sw_fence_commit(fence: submit);
2348 heap_fence_put(fence: submit);
2349 semaphore_set(sema, value: 1);
2350err:
2351 intel_gt_set_wedged(gt: ce->engine->gt);
2352 return err;
2353}
2354
2355static int measure_context_switch(struct intel_context *ce)
2356{
2357 u32 *sema = hwsp_scratch(ce);
2358 const u32 offset = hwsp_offset(ce, dw: sema);
2359 struct i915_request *fence = NULL;
2360 u32 elapsed[TF_COUNT + 1], cycles;
2361 int i, j, err;
2362 u32 *cs;
2363
2364 /*
2365 * Measure how long it takes to advance from one request in one
2366 * context to a request in another context. This allows us to
2367 * measure how long the context save/restore take, along with all
2368 * the inter-context setup we require.
2369 *
2370 * A: read CS_TIMESTAMP on GPU
2371 * switch context
2372 * B: read CS_TIMESTAMP on GPU
2373 *
2374 * Context switch latency: B - A
2375 */
2376
2377 err = plug(engine: ce->engine, sema, MI_SEMAPHORE_SAD_NEQ_SDD, value: 0);
2378 if (err)
2379 return err;
2380
2381 for (i = 1; i <= ARRAY_SIZE(elapsed); i++) {
2382 struct intel_context *arr[] = {
2383 ce, ce->engine->kernel_context
2384 };
2385 u32 addr = offset + ARRAY_SIZE(arr) * i * sizeof(u32);
2386
2387 for (j = 0; j < ARRAY_SIZE(arr); j++) {
2388 struct i915_request *rq;
2389
2390 rq = i915_request_create(ce: arr[j]);
2391 if (IS_ERR(ptr: rq)) {
2392 err = PTR_ERR(ptr: rq);
2393 goto err_fence;
2394 }
2395
2396 if (fence) {
2397 err = i915_request_await_dma_fence(rq,
2398 fence: &fence->fence);
2399 if (err) {
2400 i915_request_add(rq);
2401 goto err_fence;
2402 }
2403 }
2404
2405 cs = intel_ring_begin(rq, num_dwords: 4);
2406 if (IS_ERR(ptr: cs)) {
2407 i915_request_add(rq);
2408 err = PTR_ERR(ptr: cs);
2409 goto err_fence;
2410 }
2411
2412 cs = emit_timestamp_store(cs, ce, offset: addr);
2413 addr += sizeof(u32);
2414
2415 intel_ring_advance(rq, cs);
2416
2417 i915_request_put(rq: fence);
2418 fence = i915_request_get(rq);
2419
2420 i915_request_add(rq);
2421 }
2422 }
2423 i915_request_put(rq: fence);
2424 intel_engine_flush_submission(engine: ce->engine);
2425
2426 semaphore_set(sema, value: 1);
2427 err = intel_gt_wait_for_idle(gt: ce->engine->gt, HZ / 2);
2428 if (err)
2429 goto err;
2430
2431 for (i = 1; i <= TF_COUNT; i++)
2432 elapsed[i - 1] = sema[2 * i + 2] - sema[2 * i + 1];
2433
2434 cycles = trifilter(a: elapsed);
2435 pr_info("%s: context switch latency %d cycles, %lluns\n",
2436 ce->engine->name, cycles >> TF_BIAS,
2437 cycles_to_ns(ce->engine, cycles));
2438
2439 return intel_gt_wait_for_idle(gt: ce->engine->gt, HZ);
2440
2441err_fence:
2442 i915_request_put(rq: fence);
2443 semaphore_set(sema, value: 1);
2444err:
2445 intel_gt_set_wedged(gt: ce->engine->gt);
2446 return err;
2447}
2448
2449static int measure_preemption(struct intel_context *ce)
2450{
2451 u32 *sema = hwsp_scratch(ce);
2452 const u32 offset = hwsp_offset(ce, dw: sema);
2453 u32 elapsed[TF_COUNT], cycles;
2454 u32 *cs;
2455 int err;
2456 int i;
2457
2458 /*
2459 * We measure two latencies while triggering preemption. The first
2460 * latency is how long it takes for us to submit a preempting request.
2461 * The second latency is how it takes for us to return from the
2462 * preemption back to the original context.
2463 *
2464 * A: read CS_TIMESTAMP from CPU
2465 * submit preemption
2466 * B: read CS_TIMESTAMP on GPU (in preempting context)
2467 * context switch
2468 * C: read CS_TIMESTAMP on GPU (in original context)
2469 *
2470 * Preemption dispatch latency: B - A
2471 * Preemption switch latency: C - B
2472 */
2473
2474 if (!intel_engine_has_preemption(engine: ce->engine))
2475 return 0;
2476
2477 for (i = 1; i <= ARRAY_SIZE(elapsed); i++) {
2478 u32 addr = offset + 2 * i * sizeof(u32);
2479 struct i915_request *rq;
2480
2481 rq = i915_request_create(ce);
2482 if (IS_ERR(ptr: rq)) {
2483 err = PTR_ERR(ptr: rq);
2484 goto err;
2485 }
2486
2487 cs = intel_ring_begin(rq, num_dwords: 12);
2488 if (IS_ERR(ptr: cs)) {
2489 i915_request_add(rq);
2490 err = PTR_ERR(ptr: cs);
2491 goto err;
2492 }
2493
2494 cs = emit_store_dw(cs, offset: addr, value: -1);
2495 cs = emit_semaphore_poll_until(cs, offset, value: i);
2496 cs = emit_timestamp_store(cs, ce, offset: addr + sizeof(u32));
2497
2498 intel_ring_advance(rq, cs);
2499 i915_request_add(rq);
2500
2501 if (wait_for(READ_ONCE(sema[2 * i]) == -1, 500)) {
2502 err = -EIO;
2503 goto err;
2504 }
2505
2506 rq = i915_request_create(ce: ce->engine->kernel_context);
2507 if (IS_ERR(ptr: rq)) {
2508 err = PTR_ERR(ptr: rq);
2509 goto err;
2510 }
2511
2512 cs = intel_ring_begin(rq, num_dwords: 8);
2513 if (IS_ERR(ptr: cs)) {
2514 i915_request_add(rq);
2515 err = PTR_ERR(ptr: cs);
2516 goto err;
2517 }
2518
2519 cs = emit_timestamp_store(cs, ce, offset: addr);
2520 cs = emit_store_dw(cs, offset, value: i);
2521
2522 intel_ring_advance(rq, cs);
2523 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
2524
2525 elapsed[i - 1] = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP);
2526 i915_request_add(rq);
2527 }
2528
2529 if (wait_for(READ_ONCE(sema[2 * i - 2]) != -1, 500)) {
2530 err = -EIO;
2531 goto err;
2532 }
2533
2534 for (i = 1; i <= TF_COUNT; i++)
2535 elapsed[i - 1] = sema[2 * i + 0] - elapsed[i - 1];
2536
2537 cycles = trifilter(a: elapsed);
2538 pr_info("%s: preemption dispatch latency %d cycles, %lluns\n",
2539 ce->engine->name, cycles >> TF_BIAS,
2540 cycles_to_ns(ce->engine, cycles));
2541
2542 for (i = 1; i <= TF_COUNT; i++)
2543 elapsed[i - 1] = sema[2 * i + 1] - sema[2 * i + 0];
2544
2545 cycles = trifilter(a: elapsed);
2546 pr_info("%s: preemption switch latency %d cycles, %lluns\n",
2547 ce->engine->name, cycles >> TF_BIAS,
2548 cycles_to_ns(ce->engine, cycles));
2549
2550 return intel_gt_wait_for_idle(gt: ce->engine->gt, HZ);
2551
2552err:
2553 intel_gt_set_wedged(gt: ce->engine->gt);
2554 return err;
2555}
2556
2557struct signal_cb {
2558 struct dma_fence_cb base;
2559 bool seen;
2560};
2561
2562static void signal_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
2563{
2564 struct signal_cb *s = container_of(cb, typeof(*s), base);
2565
2566 smp_store_mb(s->seen, true); /* be safe, be strong */
2567}
2568
2569static int measure_completion(struct intel_context *ce)
2570{
2571 u32 *sema = hwsp_scratch(ce);
2572 const u32 offset = hwsp_offset(ce, dw: sema);
2573 u32 elapsed[TF_COUNT], cycles;
2574 u32 *cs;
2575 int err;
2576 int i;
2577
2578 /*
2579 * Measure how long it takes for the signal (interrupt) to be
2580 * sent from the GPU to be processed by the CPU.
2581 *
2582 * A: read CS_TIMESTAMP on GPU
2583 * signal
2584 * B: read CS_TIMESTAMP from CPU
2585 *
2586 * Completion latency: B - A
2587 */
2588
2589 for (i = 1; i <= ARRAY_SIZE(elapsed); i++) {
2590 struct signal_cb cb = { .seen = false };
2591 struct i915_request *rq;
2592
2593 rq = i915_request_create(ce);
2594 if (IS_ERR(ptr: rq)) {
2595 err = PTR_ERR(ptr: rq);
2596 goto err;
2597 }
2598
2599 cs = intel_ring_begin(rq, num_dwords: 12);
2600 if (IS_ERR(ptr: cs)) {
2601 i915_request_add(rq);
2602 err = PTR_ERR(ptr: cs);
2603 goto err;
2604 }
2605
2606 cs = emit_store_dw(cs, offset: offset + i * sizeof(u32), value: -1);
2607 cs = emit_semaphore_poll_until(cs, offset, value: i);
2608 cs = emit_timestamp_store(cs, ce, offset: offset + i * sizeof(u32));
2609
2610 intel_ring_advance(rq, cs);
2611
2612 dma_fence_add_callback(fence: &rq->fence, cb: &cb.base, func: signal_cb);
2613 i915_request_add(rq);
2614
2615 intel_engine_flush_submission(engine: ce->engine);
2616 if (wait_for(READ_ONCE(sema[i]) == -1, 50)) {
2617 err = -EIO;
2618 goto err;
2619 }
2620
2621 preempt_disable();
2622 semaphore_set(sema, value: i);
2623 while (!READ_ONCE(cb.seen))
2624 cpu_relax();
2625
2626 elapsed[i - 1] = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP);
2627 preempt_enable();
2628 }
2629
2630 err = intel_gt_wait_for_idle(gt: ce->engine->gt, HZ / 2);
2631 if (err)
2632 goto err;
2633
2634 for (i = 0; i < ARRAY_SIZE(elapsed); i++) {
2635 GEM_BUG_ON(sema[i + 1] == -1);
2636 elapsed[i] = elapsed[i] - sema[i + 1];
2637 }
2638
2639 cycles = trifilter(a: elapsed);
2640 pr_info("%s: completion latency %d cycles, %lluns\n",
2641 ce->engine->name, cycles >> TF_BIAS,
2642 cycles_to_ns(ce->engine, cycles));
2643
2644 return intel_gt_wait_for_idle(gt: ce->engine->gt, HZ);
2645
2646err:
2647 intel_gt_set_wedged(gt: ce->engine->gt);
2648 return err;
2649}
2650
2651static void rps_pin(struct intel_gt *gt)
2652{
2653 /* Pin the frequency to max */
2654 atomic_inc(v: &gt->rps.num_waiters);
2655 intel_uncore_forcewake_get(uncore: gt->uncore, domains: FORCEWAKE_ALL);
2656
2657 mutex_lock(&gt->rps.lock);
2658 intel_rps_set(rps: &gt->rps, val: gt->rps.max_freq);
2659 mutex_unlock(lock: &gt->rps.lock);
2660}
2661
2662static void rps_unpin(struct intel_gt *gt)
2663{
2664 intel_uncore_forcewake_put(uncore: gt->uncore, domains: FORCEWAKE_ALL);
2665 atomic_dec(v: &gt->rps.num_waiters);
2666}
2667
2668static int perf_request_latency(void *arg)
2669{
2670 struct drm_i915_private *i915 = arg;
2671 struct intel_engine_cs *engine;
2672 struct pm_qos_request qos;
2673 int err = 0;
2674
2675 if (GRAPHICS_VER(i915) < 8) /* per-engine CS timestamp, semaphores */
2676 return 0;
2677
2678 cpu_latency_qos_add_request(req: &qos, value: 0); /* disable cstates */
2679
2680 for_each_uabi_engine(engine, i915) {
2681 struct intel_context *ce;
2682
2683 ce = intel_context_create(engine);
2684 if (IS_ERR(ptr: ce)) {
2685 err = PTR_ERR(ptr: ce);
2686 goto out;
2687 }
2688
2689 err = intel_context_pin(ce);
2690 if (err) {
2691 intel_context_put(ce);
2692 goto out;
2693 }
2694
2695 st_engine_heartbeat_disable(engine);
2696 rps_pin(gt: engine->gt);
2697
2698 if (err == 0)
2699 err = measure_semaphore_response(ce);
2700 if (err == 0)
2701 err = measure_idle_dispatch(ce);
2702 if (err == 0)
2703 err = measure_busy_dispatch(ce);
2704 if (err == 0)
2705 err = measure_inter_request(ce);
2706 if (err == 0)
2707 err = measure_context_switch(ce);
2708 if (err == 0)
2709 err = measure_preemption(ce);
2710 if (err == 0)
2711 err = measure_completion(ce);
2712
2713 rps_unpin(gt: engine->gt);
2714 st_engine_heartbeat_enable(engine);
2715
2716 intel_context_unpin(ce);
2717 intel_context_put(ce);
2718 if (err)
2719 goto out;
2720 }
2721
2722out:
2723 if (igt_flush_test(i915))
2724 err = -EIO;
2725
2726 cpu_latency_qos_remove_request(req: &qos);
2727 return err;
2728}
2729
2730static int s_sync0(void *arg)
2731{
2732 struct perf_series *ps = arg;
2733 IGT_TIMEOUT(end_time);
2734 unsigned int idx = 0;
2735 int err = 0;
2736
2737 GEM_BUG_ON(!ps->nengines);
2738 do {
2739 struct i915_request *rq;
2740
2741 rq = i915_request_create(ce: ps->ce[idx]);
2742 if (IS_ERR(ptr: rq)) {
2743 err = PTR_ERR(ptr: rq);
2744 break;
2745 }
2746
2747 i915_request_get(rq);
2748 i915_request_add(rq);
2749
2750 if (i915_request_wait(rq, flags: 0, HZ / 5) < 0)
2751 err = -ETIME;
2752 i915_request_put(rq);
2753 if (err)
2754 break;
2755
2756 if (++idx == ps->nengines)
2757 idx = 0;
2758 } while (!__igt_timeout(timeout: end_time, NULL));
2759
2760 return err;
2761}
2762
2763static int s_sync1(void *arg)
2764{
2765 struct perf_series *ps = arg;
2766 struct i915_request *prev = NULL;
2767 IGT_TIMEOUT(end_time);
2768 unsigned int idx = 0;
2769 int err = 0;
2770
2771 GEM_BUG_ON(!ps->nengines);
2772 do {
2773 struct i915_request *rq;
2774
2775 rq = i915_request_create(ce: ps->ce[idx]);
2776 if (IS_ERR(ptr: rq)) {
2777 err = PTR_ERR(ptr: rq);
2778 break;
2779 }
2780
2781 i915_request_get(rq);
2782 i915_request_add(rq);
2783
2784 if (prev && i915_request_wait(rq: prev, flags: 0, HZ / 5) < 0)
2785 err = -ETIME;
2786 i915_request_put(rq: prev);
2787 prev = rq;
2788 if (err)
2789 break;
2790
2791 if (++idx == ps->nengines)
2792 idx = 0;
2793 } while (!__igt_timeout(timeout: end_time, NULL));
2794 i915_request_put(rq: prev);
2795
2796 return err;
2797}
2798
2799static int s_many(void *arg)
2800{
2801 struct perf_series *ps = arg;
2802 IGT_TIMEOUT(end_time);
2803 unsigned int idx = 0;
2804
2805 GEM_BUG_ON(!ps->nengines);
2806 do {
2807 struct i915_request *rq;
2808
2809 rq = i915_request_create(ce: ps->ce[idx]);
2810 if (IS_ERR(ptr: rq))
2811 return PTR_ERR(ptr: rq);
2812
2813 i915_request_add(rq);
2814
2815 if (++idx == ps->nengines)
2816 idx = 0;
2817 } while (!__igt_timeout(timeout: end_time, NULL));
2818
2819 return 0;
2820}
2821
2822static int perf_series_engines(void *arg)
2823{
2824 struct drm_i915_private *i915 = arg;
2825 static int (* const func[])(void *arg) = {
2826 s_sync0,
2827 s_sync1,
2828 s_many,
2829 NULL,
2830 };
2831 const unsigned int nengines = num_uabi_engines(i915);
2832 struct intel_engine_cs *engine;
2833 int (* const *fn)(void *arg);
2834 struct pm_qos_request qos;
2835 struct perf_stats *stats;
2836 struct perf_series *ps;
2837 unsigned int idx;
2838 int err = 0;
2839
2840 stats = kcalloc(n: nengines, size: sizeof(*stats), GFP_KERNEL);
2841 if (!stats)
2842 return -ENOMEM;
2843
2844 ps = kzalloc(struct_size(ps, ce, nengines), GFP_KERNEL);
2845 if (!ps) {
2846 kfree(objp: stats);
2847 return -ENOMEM;
2848 }
2849
2850 cpu_latency_qos_add_request(req: &qos, value: 0); /* disable cstates */
2851
2852 ps->i915 = i915;
2853 ps->nengines = nengines;
2854
2855 idx = 0;
2856 for_each_uabi_engine(engine, i915) {
2857 struct intel_context *ce;
2858
2859 ce = intel_context_create(engine);
2860 if (IS_ERR(ptr: ce)) {
2861 err = PTR_ERR(ptr: ce);
2862 goto out;
2863 }
2864
2865 err = intel_context_pin(ce);
2866 if (err) {
2867 intel_context_put(ce);
2868 goto out;
2869 }
2870
2871 ps->ce[idx++] = ce;
2872 }
2873 GEM_BUG_ON(idx != ps->nengines);
2874
2875 for (fn = func; *fn && !err; fn++) {
2876 char name[KSYM_NAME_LEN];
2877 struct igt_live_test t;
2878
2879 snprintf(buf: name, size: sizeof(name), fmt: "%ps", *fn);
2880 err = igt_live_test_begin(t: &t, i915, func: __func__, name);
2881 if (err)
2882 break;
2883
2884 for (idx = 0; idx < nengines; idx++) {
2885 struct perf_stats *p =
2886 memset(&stats[idx], 0, sizeof(stats[idx]));
2887 struct intel_context *ce = ps->ce[idx];
2888
2889 p->engine = ps->ce[idx]->engine;
2890 intel_engine_pm_get(engine: p->engine);
2891
2892 if (intel_engine_supports_stats(engine: p->engine))
2893 p->busy = intel_engine_get_busy_time(engine: p->engine,
2894 now: &p->time) + 1;
2895 else
2896 p->time = ktime_get();
2897 p->runtime = -intel_context_get_total_runtime_ns(ce);
2898 }
2899
2900 err = (*fn)(ps);
2901 if (igt_live_test_end(t: &t))
2902 err = -EIO;
2903
2904 for (idx = 0; idx < nengines; idx++) {
2905 struct perf_stats *p = &stats[idx];
2906 struct intel_context *ce = ps->ce[idx];
2907 int integer, decimal;
2908 u64 busy, dt, now;
2909
2910 if (p->busy)
2911 p->busy = ktime_sub(intel_engine_get_busy_time(p->engine,
2912 &now),
2913 p->busy - 1);
2914 else
2915 now = ktime_get();
2916 p->time = ktime_sub(now, p->time);
2917
2918 err = switch_to_kernel_sync(ce, err);
2919 p->runtime += intel_context_get_total_runtime_ns(ce);
2920 intel_engine_pm_put(engine: p->engine);
2921
2922 busy = 100 * ktime_to_ns(kt: p->busy);
2923 dt = ktime_to_ns(kt: p->time);
2924 if (dt) {
2925 integer = div64_u64(dividend: busy, divisor: dt);
2926 busy -= integer * dt;
2927 decimal = div64_u64(dividend: 100 * busy, divisor: dt);
2928 } else {
2929 integer = 0;
2930 decimal = 0;
2931 }
2932
2933 pr_info("%s %5s: { seqno:%d, busy:%d.%02d%%, runtime:%lldms, walltime:%lldms }\n",
2934 name, p->engine->name, ce->timeline->seqno,
2935 integer, decimal,
2936 div_u64(p->runtime, 1000 * 1000),
2937 div_u64(ktime_to_ns(p->time), 1000 * 1000));
2938 }
2939 }
2940
2941out:
2942 for (idx = 0; idx < nengines; idx++) {
2943 if (IS_ERR_OR_NULL(ptr: ps->ce[idx]))
2944 break;
2945
2946 intel_context_unpin(ce: ps->ce[idx]);
2947 intel_context_put(ce: ps->ce[idx]);
2948 }
2949 kfree(objp: ps);
2950
2951 cpu_latency_qos_remove_request(req: &qos);
2952 kfree(objp: stats);
2953 return err;
2954}
2955
2956struct p_thread {
2957 struct perf_stats p;
2958 struct kthread_worker *worker;
2959 struct kthread_work work;
2960 struct intel_engine_cs *engine;
2961 int result;
2962};
2963
2964static void p_sync0(struct kthread_work *work)
2965{
2966 struct p_thread *thread = container_of(work, typeof(*thread), work);
2967 struct perf_stats *p = &thread->p;
2968 struct intel_engine_cs *engine = p->engine;
2969 struct intel_context *ce;
2970 IGT_TIMEOUT(end_time);
2971 unsigned long count;
2972 bool busy;
2973 int err = 0;
2974
2975 ce = intel_context_create(engine);
2976 if (IS_ERR(ptr: ce)) {
2977 thread->result = PTR_ERR(ptr: ce);
2978 return;
2979 }
2980
2981 err = intel_context_pin(ce);
2982 if (err) {
2983 intel_context_put(ce);
2984 thread->result = err;
2985 return;
2986 }
2987
2988 if (intel_engine_supports_stats(engine)) {
2989 p->busy = intel_engine_get_busy_time(engine, now: &p->time);
2990 busy = true;
2991 } else {
2992 p->time = ktime_get();
2993 busy = false;
2994 }
2995
2996 count = 0;
2997 do {
2998 struct i915_request *rq;
2999
3000 rq = i915_request_create(ce);
3001 if (IS_ERR(ptr: rq)) {
3002 err = PTR_ERR(ptr: rq);
3003 break;
3004 }
3005
3006 i915_request_get(rq);
3007 i915_request_add(rq);
3008
3009 err = 0;
3010 if (i915_request_wait(rq, flags: 0, HZ) < 0)
3011 err = -ETIME;
3012 i915_request_put(rq);
3013 if (err)
3014 break;
3015
3016 count++;
3017 } while (!__igt_timeout(timeout: end_time, NULL));
3018
3019 if (busy) {
3020 ktime_t now;
3021
3022 p->busy = ktime_sub(intel_engine_get_busy_time(engine, &now),
3023 p->busy);
3024 p->time = ktime_sub(now, p->time);
3025 } else {
3026 p->time = ktime_sub(ktime_get(), p->time);
3027 }
3028
3029 err = switch_to_kernel_sync(ce, err);
3030 p->runtime = intel_context_get_total_runtime_ns(ce);
3031 p->count = count;
3032
3033 intel_context_unpin(ce);
3034 intel_context_put(ce);
3035 thread->result = err;
3036}
3037
3038static void p_sync1(struct kthread_work *work)
3039{
3040 struct p_thread *thread = container_of(work, typeof(*thread), work);
3041 struct perf_stats *p = &thread->p;
3042 struct intel_engine_cs *engine = p->engine;
3043 struct i915_request *prev = NULL;
3044 struct intel_context *ce;
3045 IGT_TIMEOUT(end_time);
3046 unsigned long count;
3047 bool busy;
3048 int err = 0;
3049
3050 ce = intel_context_create(engine);
3051 if (IS_ERR(ptr: ce)) {
3052 thread->result = PTR_ERR(ptr: ce);
3053 return;
3054 }
3055
3056 err = intel_context_pin(ce);
3057 if (err) {
3058 intel_context_put(ce);
3059 thread->result = err;
3060 return;
3061 }
3062
3063 if (intel_engine_supports_stats(engine)) {
3064 p->busy = intel_engine_get_busy_time(engine, now: &p->time);
3065 busy = true;
3066 } else {
3067 p->time = ktime_get();
3068 busy = false;
3069 }
3070
3071 count = 0;
3072 do {
3073 struct i915_request *rq;
3074
3075 rq = i915_request_create(ce);
3076 if (IS_ERR(ptr: rq)) {
3077 err = PTR_ERR(ptr: rq);
3078 break;
3079 }
3080
3081 i915_request_get(rq);
3082 i915_request_add(rq);
3083
3084 err = 0;
3085 if (prev && i915_request_wait(rq: prev, flags: 0, HZ) < 0)
3086 err = -ETIME;
3087 i915_request_put(rq: prev);
3088 prev = rq;
3089 if (err)
3090 break;
3091
3092 count++;
3093 } while (!__igt_timeout(timeout: end_time, NULL));
3094 i915_request_put(rq: prev);
3095
3096 if (busy) {
3097 ktime_t now;
3098
3099 p->busy = ktime_sub(intel_engine_get_busy_time(engine, &now),
3100 p->busy);
3101 p->time = ktime_sub(now, p->time);
3102 } else {
3103 p->time = ktime_sub(ktime_get(), p->time);
3104 }
3105
3106 err = switch_to_kernel_sync(ce, err);
3107 p->runtime = intel_context_get_total_runtime_ns(ce);
3108 p->count = count;
3109
3110 intel_context_unpin(ce);
3111 intel_context_put(ce);
3112 thread->result = err;
3113}
3114
3115static void p_many(struct kthread_work *work)
3116{
3117 struct p_thread *thread = container_of(work, typeof(*thread), work);
3118 struct perf_stats *p = &thread->p;
3119 struct intel_engine_cs *engine = p->engine;
3120 struct intel_context *ce;
3121 IGT_TIMEOUT(end_time);
3122 unsigned long count;
3123 int err = 0;
3124 bool busy;
3125
3126 ce = intel_context_create(engine);
3127 if (IS_ERR(ptr: ce)) {
3128 thread->result = PTR_ERR(ptr: ce);
3129 return;
3130 }
3131
3132 err = intel_context_pin(ce);
3133 if (err) {
3134 intel_context_put(ce);
3135 thread->result = err;
3136 return;
3137 }
3138
3139 if (intel_engine_supports_stats(engine)) {
3140 p->busy = intel_engine_get_busy_time(engine, now: &p->time);
3141 busy = true;
3142 } else {
3143 p->time = ktime_get();
3144 busy = false;
3145 }
3146
3147 count = 0;
3148 do {
3149 struct i915_request *rq;
3150
3151 rq = i915_request_create(ce);
3152 if (IS_ERR(ptr: rq)) {
3153 err = PTR_ERR(ptr: rq);
3154 break;
3155 }
3156
3157 i915_request_add(rq);
3158 count++;
3159 } while (!__igt_timeout(timeout: end_time, NULL));
3160
3161 if (busy) {
3162 ktime_t now;
3163
3164 p->busy = ktime_sub(intel_engine_get_busy_time(engine, &now),
3165 p->busy);
3166 p->time = ktime_sub(now, p->time);
3167 } else {
3168 p->time = ktime_sub(ktime_get(), p->time);
3169 }
3170
3171 err = switch_to_kernel_sync(ce, err);
3172 p->runtime = intel_context_get_total_runtime_ns(ce);
3173 p->count = count;
3174
3175 intel_context_unpin(ce);
3176 intel_context_put(ce);
3177 thread->result = err;
3178}
3179
3180static int perf_parallel_engines(void *arg)
3181{
3182 struct drm_i915_private *i915 = arg;
3183 static void (* const func[])(struct kthread_work *) = {
3184 p_sync0,
3185 p_sync1,
3186 p_many,
3187 NULL,
3188 };
3189 const unsigned int nengines = num_uabi_engines(i915);
3190 void (* const *fn)(struct kthread_work *);
3191 struct intel_engine_cs *engine;
3192 struct pm_qos_request qos;
3193 struct p_thread *engines;
3194 int err = 0;
3195
3196 engines = kcalloc(n: nengines, size: sizeof(*engines), GFP_KERNEL);
3197 if (!engines)
3198 return -ENOMEM;
3199
3200 cpu_latency_qos_add_request(req: &qos, value: 0);
3201
3202 for (fn = func; *fn; fn++) {
3203 char name[KSYM_NAME_LEN];
3204 struct igt_live_test t;
3205 unsigned int idx;
3206
3207 snprintf(buf: name, size: sizeof(name), fmt: "%ps", *fn);
3208 err = igt_live_test_begin(t: &t, i915, func: __func__, name);
3209 if (err)
3210 break;
3211
3212 atomic_set(v: &i915->selftest.counter, i: nengines);
3213
3214 idx = 0;
3215 for_each_uabi_engine(engine, i915) {
3216 struct kthread_worker *worker;
3217
3218 intel_engine_pm_get(engine);
3219
3220 memset(&engines[idx].p, 0, sizeof(engines[idx].p));
3221
3222 worker = kthread_create_worker(flags: 0, namefmt: "igt:%s",
3223 engine->name);
3224 if (IS_ERR(ptr: worker)) {
3225 err = PTR_ERR(ptr: worker);
3226 intel_engine_pm_put(engine);
3227 break;
3228 }
3229 engines[idx].worker = worker;
3230 engines[idx].result = 0;
3231 engines[idx].p.engine = engine;
3232 engines[idx].engine = engine;
3233
3234 kthread_init_work(&engines[idx].work, *fn);
3235 kthread_queue_work(worker, work: &engines[idx].work);
3236 idx++;
3237 }
3238
3239 idx = 0;
3240 for_each_uabi_engine(engine, i915) {
3241 int status;
3242
3243 if (!engines[idx].worker)
3244 break;
3245
3246 kthread_flush_work(work: &engines[idx].work);
3247 status = READ_ONCE(engines[idx].result);
3248 if (status && !err)
3249 err = status;
3250
3251 intel_engine_pm_put(engine);
3252
3253 kthread_destroy_worker(worker: engines[idx].worker);
3254 idx++;
3255 }
3256
3257 if (igt_live_test_end(t: &t))
3258 err = -EIO;
3259 if (err)
3260 break;
3261
3262 idx = 0;
3263 for_each_uabi_engine(engine, i915) {
3264 struct perf_stats *p = &engines[idx].p;
3265 u64 busy = 100 * ktime_to_ns(kt: p->busy);
3266 u64 dt = ktime_to_ns(kt: p->time);
3267 int integer, decimal;
3268
3269 if (dt) {
3270 integer = div64_u64(dividend: busy, divisor: dt);
3271 busy -= integer * dt;
3272 decimal = div64_u64(dividend: 100 * busy, divisor: dt);
3273 } else {
3274 integer = 0;
3275 decimal = 0;
3276 }
3277
3278 GEM_BUG_ON(engine != p->engine);
3279 pr_info("%s %5s: { count:%lu, busy:%d.%02d%%, runtime:%lldms, walltime:%lldms }\n",
3280 name, engine->name, p->count, integer, decimal,
3281 div_u64(p->runtime, 1000 * 1000),
3282 div_u64(ktime_to_ns(p->time), 1000 * 1000));
3283 idx++;
3284 }
3285 }
3286
3287 cpu_latency_qos_remove_request(req: &qos);
3288 kfree(objp: engines);
3289 return err;
3290}
3291
3292int i915_request_perf_selftests(struct drm_i915_private *i915)
3293{
3294 static const struct i915_subtest tests[] = {
3295 SUBTEST(perf_request_latency),
3296 SUBTEST(perf_series_engines),
3297 SUBTEST(perf_parallel_engines),
3298 };
3299
3300 if (intel_gt_is_wedged(gt: to_gt(i915)))
3301 return 0;
3302
3303 return i915_subtests(tests, i915);
3304}
3305

source code of linux/drivers/gpu/drm/i915/selftests/i915_request.c