1/*
2 * SPDX-License-Identifier: MIT
3 *
4 * Copyright © 2019 Intel Corporation
5 */
6
7#include <linux/kref.h>
8
9#include "gem/i915_gem_pm.h"
10#include "gt/intel_gt.h"
11
12#include "i915_selftest.h"
13
14#include "igt_flush_test.h"
15#include "lib_sw_fence.h"
16
17#define TEST_OA_CONFIG_UUID "12345678-1234-1234-1234-1234567890ab"
18
19static int
20alloc_empty_config(struct i915_perf *perf)
21{
22 struct i915_oa_config *oa_config;
23
24 oa_config = kzalloc(size: sizeof(*oa_config), GFP_KERNEL);
25 if (!oa_config)
26 return -ENOMEM;
27
28 oa_config->perf = perf;
29 kref_init(kref: &oa_config->ref);
30
31 strscpy(oa_config->uuid, TEST_OA_CONFIG_UUID, sizeof(oa_config->uuid));
32
33 mutex_lock(&perf->metrics_lock);
34
35 oa_config->id = idr_alloc(&perf->metrics_idr, ptr: oa_config, start: 2, end: 0, GFP_KERNEL);
36 if (oa_config->id < 0) {
37 mutex_unlock(lock: &perf->metrics_lock);
38 i915_oa_config_put(oa_config);
39 return -ENOMEM;
40 }
41
42 mutex_unlock(lock: &perf->metrics_lock);
43
44 return 0;
45}
46
47static void
48destroy_empty_config(struct i915_perf *perf)
49{
50 struct i915_oa_config *oa_config = NULL, *tmp;
51 int id;
52
53 mutex_lock(&perf->metrics_lock);
54
55 idr_for_each_entry(&perf->metrics_idr, tmp, id) {
56 if (!strcmp(tmp->uuid, TEST_OA_CONFIG_UUID)) {
57 oa_config = tmp;
58 break;
59 }
60 }
61
62 if (oa_config)
63 idr_remove(&perf->metrics_idr, id: oa_config->id);
64
65 mutex_unlock(lock: &perf->metrics_lock);
66
67 if (oa_config)
68 i915_oa_config_put(oa_config);
69}
70
71static struct i915_oa_config *
72get_empty_config(struct i915_perf *perf)
73{
74 struct i915_oa_config *oa_config = NULL, *tmp;
75 int id;
76
77 mutex_lock(&perf->metrics_lock);
78
79 idr_for_each_entry(&perf->metrics_idr, tmp, id) {
80 if (!strcmp(tmp->uuid, TEST_OA_CONFIG_UUID)) {
81 oa_config = i915_oa_config_get(oa_config: tmp);
82 break;
83 }
84 }
85
86 mutex_unlock(lock: &perf->metrics_lock);
87
88 return oa_config;
89}
90
91static struct i915_perf_stream *
92test_stream(struct i915_perf *perf)
93{
94 struct drm_i915_perf_open_param param = {};
95 struct i915_oa_config *oa_config = get_empty_config(perf);
96 struct perf_open_properties props = {
97 .engine = intel_engine_lookup_user(i915: perf->i915,
98 class: I915_ENGINE_CLASS_RENDER,
99 instance: 0),
100 .sample_flags = SAMPLE_OA_REPORT,
101 .oa_format = GRAPHICS_VER(perf->i915) == 12 ?
102 I915_OA_FORMAT_A32u40_A4u32_B8_C8 : I915_OA_FORMAT_C4_B8,
103 };
104 struct i915_perf_stream *stream;
105 struct intel_gt *gt;
106
107 if (!props.engine)
108 return NULL;
109
110 gt = props.engine->gt;
111
112 if (!oa_config)
113 return NULL;
114
115 props.metrics_set = oa_config->id;
116
117 stream = kzalloc(size: sizeof(*stream), GFP_KERNEL);
118 if (!stream) {
119 i915_oa_config_put(oa_config);
120 return NULL;
121 }
122
123 stream->perf = perf;
124
125 mutex_lock(&gt->perf.lock);
126 if (i915_oa_stream_init(stream, param: &param, props: &props)) {
127 kfree(objp: stream);
128 stream = NULL;
129 }
130 mutex_unlock(lock: &gt->perf.lock);
131
132 i915_oa_config_put(oa_config);
133
134 return stream;
135}
136
137static void stream_destroy(struct i915_perf_stream *stream)
138{
139 struct intel_gt *gt = stream->engine->gt;
140
141 mutex_lock(&gt->perf.lock);
142 i915_perf_destroy_locked(stream);
143 mutex_unlock(lock: &gt->perf.lock);
144}
145
146static int live_sanitycheck(void *arg)
147{
148 struct drm_i915_private *i915 = arg;
149 struct i915_perf_stream *stream;
150
151 /* Quick check we can create a perf stream */
152
153 stream = test_stream(perf: &i915->perf);
154 if (!stream)
155 return -EINVAL;
156
157 stream_destroy(stream);
158 return 0;
159}
160
161static int write_timestamp(struct i915_request *rq, int slot)
162{
163 u32 *cs;
164 int len;
165
166 cs = intel_ring_begin(rq, num_dwords: 6);
167 if (IS_ERR(ptr: cs))
168 return PTR_ERR(ptr: cs);
169
170 len = 5;
171 if (GRAPHICS_VER(rq->i915) >= 8)
172 len++;
173
174 *cs++ = GFX_OP_PIPE_CONTROL(len);
175 *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB |
176 PIPE_CONTROL_STORE_DATA_INDEX |
177 PIPE_CONTROL_WRITE_TIMESTAMP;
178 *cs++ = slot * sizeof(u32);
179 *cs++ = 0;
180 *cs++ = 0;
181 *cs++ = 0;
182
183 intel_ring_advance(rq, cs);
184
185 return 0;
186}
187
188static ktime_t poll_status(struct i915_request *rq, int slot)
189{
190 while (!intel_read_status_page(engine: rq->engine, reg: slot) &&
191 !i915_request_completed(rq))
192 cpu_relax();
193
194 return ktime_get();
195}
196
197static int live_noa_delay(void *arg)
198{
199 struct drm_i915_private *i915 = arg;
200 struct i915_perf_stream *stream;
201 struct i915_request *rq;
202 ktime_t t0, t1;
203 u64 expected;
204 u32 delay;
205 int err;
206 int i;
207
208 /* Check that the GPU delays matches expectations */
209
210 stream = test_stream(perf: &i915->perf);
211 if (!stream)
212 return -ENOMEM;
213
214 expected = atomic64_read(v: &stream->perf->noa_programming_delay);
215
216 if (stream->engine->class != RENDER_CLASS) {
217 err = -ENODEV;
218 goto out;
219 }
220
221 for (i = 0; i < 4; i++)
222 intel_write_status_page(engine: stream->engine, reg: 0x100 + i, value: 0);
223
224 rq = intel_engine_create_kernel_request(engine: stream->engine);
225 if (IS_ERR(ptr: rq)) {
226 err = PTR_ERR(ptr: rq);
227 goto out;
228 }
229
230 if (rq->engine->emit_init_breadcrumb) {
231 err = rq->engine->emit_init_breadcrumb(rq);
232 if (err) {
233 i915_request_add(rq);
234 goto out;
235 }
236 }
237
238 err = write_timestamp(rq, slot: 0x100);
239 if (err) {
240 i915_request_add(rq);
241 goto out;
242 }
243
244 err = rq->engine->emit_bb_start(rq,
245 i915_ggtt_offset(vma: stream->noa_wait), 0,
246 I915_DISPATCH_SECURE);
247 if (err) {
248 i915_request_add(rq);
249 goto out;
250 }
251
252 err = write_timestamp(rq, slot: 0x102);
253 if (err) {
254 i915_request_add(rq);
255 goto out;
256 }
257
258 i915_request_get(rq);
259 i915_request_add(rq);
260
261 preempt_disable();
262 t0 = poll_status(rq, slot: 0x100);
263 t1 = poll_status(rq, slot: 0x102);
264 preempt_enable();
265
266 pr_info("CPU delay: %lluns, expected %lluns\n",
267 ktime_sub(t1, t0), expected);
268
269 delay = intel_read_status_page(engine: stream->engine, reg: 0x102);
270 delay -= intel_read_status_page(engine: stream->engine, reg: 0x100);
271 delay = intel_gt_clock_interval_to_ns(gt: stream->engine->gt, count: delay);
272 pr_info("GPU delay: %uns, expected %lluns\n",
273 delay, expected);
274
275 if (4 * delay < 3 * expected || 2 * delay > 3 * expected) {
276 pr_err("GPU delay [%uus] outside of expected threshold! [%lluus, %lluus]\n",
277 delay / 1000,
278 div_u64(3 * expected, 4000),
279 div_u64(3 * expected, 2000));
280 err = -EINVAL;
281 }
282
283 i915_request_put(rq);
284out:
285 stream_destroy(stream);
286 return err;
287}
288
289static int live_noa_gpr(void *arg)
290{
291 struct drm_i915_private *i915 = arg;
292 struct i915_perf_stream *stream;
293 struct intel_context *ce;
294 struct i915_request *rq;
295 u32 *cs, *store;
296 void *scratch;
297 u32 gpr0;
298 int err;
299 int i;
300
301 /* Check that the delay does not clobber user context state (GPR) */
302
303 stream = test_stream(perf: &i915->perf);
304 if (!stream)
305 return -ENOMEM;
306
307 gpr0 = i915_mmio_reg_offset(GEN8_RING_CS_GPR(stream->engine->mmio_base, 0));
308
309 ce = intel_context_create(engine: stream->engine);
310 if (IS_ERR(ptr: ce)) {
311 err = PTR_ERR(ptr: ce);
312 goto out;
313 }
314
315 /* Poison the ce->vm so we detect writes not to the GGTT gt->scratch */
316 scratch = __px_vaddr(p: ce->vm->scratch[0]);
317 memset(scratch, POISON_FREE, PAGE_SIZE);
318
319 rq = intel_context_create_request(ce);
320 if (IS_ERR(ptr: rq)) {
321 err = PTR_ERR(ptr: rq);
322 goto out_ce;
323 }
324 i915_request_get(rq);
325
326 if (rq->engine->emit_init_breadcrumb) {
327 err = rq->engine->emit_init_breadcrumb(rq);
328 if (err) {
329 i915_request_add(rq);
330 goto out_rq;
331 }
332 }
333
334 /* Fill the 16 qword [32 dword] GPR with a known unlikely value */
335 cs = intel_ring_begin(rq, num_dwords: 2 * 32 + 2);
336 if (IS_ERR(ptr: cs)) {
337 err = PTR_ERR(ptr: cs);
338 i915_request_add(rq);
339 goto out_rq;
340 }
341
342 *cs++ = MI_LOAD_REGISTER_IMM(32);
343 for (i = 0; i < 32; i++) {
344 *cs++ = gpr0 + i * sizeof(u32);
345 *cs++ = STACK_MAGIC;
346 }
347 *cs++ = MI_NOOP;
348 intel_ring_advance(rq, cs);
349
350 /* Execute the GPU delay */
351 err = rq->engine->emit_bb_start(rq,
352 i915_ggtt_offset(vma: stream->noa_wait), 0,
353 I915_DISPATCH_SECURE);
354 if (err) {
355 i915_request_add(rq);
356 goto out_rq;
357 }
358
359 /* Read the GPR back, using the pinned global HWSP for convenience */
360 store = memset32(s: rq->engine->status_page.addr + 512, v: 0, n: 32);
361 for (i = 0; i < 32; i++) {
362 u32 cmd;
363
364 cs = intel_ring_begin(rq, num_dwords: 4);
365 if (IS_ERR(ptr: cs)) {
366 err = PTR_ERR(ptr: cs);
367 i915_request_add(rq);
368 goto out_rq;
369 }
370
371 cmd = MI_STORE_REGISTER_MEM;
372 if (GRAPHICS_VER(i915) >= 8)
373 cmd++;
374 cmd |= MI_USE_GGTT;
375
376 *cs++ = cmd;
377 *cs++ = gpr0 + i * sizeof(u32);
378 *cs++ = i915_ggtt_offset(vma: rq->engine->status_page.vma) +
379 offset_in_page(store) +
380 i * sizeof(u32);
381 *cs++ = 0;
382 intel_ring_advance(rq, cs);
383 }
384
385 i915_request_add(rq);
386
387 if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE, HZ / 2) < 0) {
388 pr_err("noa_wait timed out\n");
389 intel_gt_set_wedged(gt: stream->engine->gt);
390 err = -EIO;
391 goto out_rq;
392 }
393
394 /* Verify that the GPR contain our expected values */
395 for (i = 0; i < 32; i++) {
396 if (store[i] == STACK_MAGIC)
397 continue;
398
399 pr_err("GPR[%d] lost, found:%08x, expected:%08x!\n",
400 i, store[i], STACK_MAGIC);
401 err = -EINVAL;
402 }
403
404 /* Verify that the user's scratch page was not used for GPR storage */
405 if (memchr_inv(p: scratch, POISON_FREE, PAGE_SIZE)) {
406 pr_err("Scratch page overwritten!\n");
407 igt_hexdump(buf: scratch, len: 4096);
408 err = -EINVAL;
409 }
410
411out_rq:
412 i915_request_put(rq);
413out_ce:
414 intel_context_put(ce);
415out:
416 stream_destroy(stream);
417 return err;
418}
419
420int i915_perf_live_selftests(struct drm_i915_private *i915)
421{
422 static const struct i915_subtest tests[] = {
423 SUBTEST(live_sanitycheck),
424 SUBTEST(live_noa_delay),
425 SUBTEST(live_noa_gpr),
426 };
427 struct i915_perf *perf = &i915->perf;
428 int err;
429
430 if (!perf->metrics_kobj || !perf->ops.enable_metric_set)
431 return 0;
432
433 if (intel_gt_is_wedged(gt: to_gt(i915)))
434 return 0;
435
436 err = alloc_empty_config(perf: &i915->perf);
437 if (err)
438 return err;
439
440 err = i915_live_subtests(tests, i915);
441
442 destroy_empty_config(perf: &i915->perf);
443
444 return err;
445}
446

source code of linux/drivers/gpu/drm/i915/selftests/i915_perf.c