1 | /* |
2 | * SPDX-License-Identifier: MIT |
3 | * |
4 | * Copyright © 2019 Intel Corporation |
5 | */ |
6 | |
7 | #include <linux/kref.h> |
8 | |
9 | #include "gem/i915_gem_pm.h" |
10 | #include "gt/intel_gt.h" |
11 | |
12 | #include "i915_selftest.h" |
13 | |
14 | #include "igt_flush_test.h" |
15 | #include "lib_sw_fence.h" |
16 | |
17 | #define TEST_OA_CONFIG_UUID "12345678-1234-1234-1234-1234567890ab" |
18 | |
19 | static int |
20 | alloc_empty_config(struct i915_perf *perf) |
21 | { |
22 | struct i915_oa_config *oa_config; |
23 | |
24 | oa_config = kzalloc(size: sizeof(*oa_config), GFP_KERNEL); |
25 | if (!oa_config) |
26 | return -ENOMEM; |
27 | |
28 | oa_config->perf = perf; |
29 | kref_init(kref: &oa_config->ref); |
30 | |
31 | strscpy(oa_config->uuid, TEST_OA_CONFIG_UUID, sizeof(oa_config->uuid)); |
32 | |
33 | mutex_lock(&perf->metrics_lock); |
34 | |
35 | oa_config->id = idr_alloc(&perf->metrics_idr, ptr: oa_config, start: 2, end: 0, GFP_KERNEL); |
36 | if (oa_config->id < 0) { |
37 | mutex_unlock(lock: &perf->metrics_lock); |
38 | i915_oa_config_put(oa_config); |
39 | return -ENOMEM; |
40 | } |
41 | |
42 | mutex_unlock(lock: &perf->metrics_lock); |
43 | |
44 | return 0; |
45 | } |
46 | |
47 | static void |
48 | destroy_empty_config(struct i915_perf *perf) |
49 | { |
50 | struct i915_oa_config *oa_config = NULL, *tmp; |
51 | int id; |
52 | |
53 | mutex_lock(&perf->metrics_lock); |
54 | |
55 | idr_for_each_entry(&perf->metrics_idr, tmp, id) { |
56 | if (!strcmp(tmp->uuid, TEST_OA_CONFIG_UUID)) { |
57 | oa_config = tmp; |
58 | break; |
59 | } |
60 | } |
61 | |
62 | if (oa_config) |
63 | idr_remove(&perf->metrics_idr, id: oa_config->id); |
64 | |
65 | mutex_unlock(lock: &perf->metrics_lock); |
66 | |
67 | if (oa_config) |
68 | i915_oa_config_put(oa_config); |
69 | } |
70 | |
71 | static struct i915_oa_config * |
72 | get_empty_config(struct i915_perf *perf) |
73 | { |
74 | struct i915_oa_config *oa_config = NULL, *tmp; |
75 | int id; |
76 | |
77 | mutex_lock(&perf->metrics_lock); |
78 | |
79 | idr_for_each_entry(&perf->metrics_idr, tmp, id) { |
80 | if (!strcmp(tmp->uuid, TEST_OA_CONFIG_UUID)) { |
81 | oa_config = i915_oa_config_get(oa_config: tmp); |
82 | break; |
83 | } |
84 | } |
85 | |
86 | mutex_unlock(lock: &perf->metrics_lock); |
87 | |
88 | return oa_config; |
89 | } |
90 | |
91 | static struct i915_perf_stream * |
92 | test_stream(struct i915_perf *perf) |
93 | { |
94 | struct drm_i915_perf_open_param param = {}; |
95 | struct i915_oa_config *oa_config = get_empty_config(perf); |
96 | struct perf_open_properties props = { |
97 | .engine = intel_engine_lookup_user(i915: perf->i915, |
98 | class: I915_ENGINE_CLASS_RENDER, |
99 | instance: 0), |
100 | .sample_flags = SAMPLE_OA_REPORT, |
101 | .oa_format = GRAPHICS_VER(perf->i915) == 12 ? |
102 | I915_OA_FORMAT_A32u40_A4u32_B8_C8 : I915_OA_FORMAT_C4_B8, |
103 | }; |
104 | struct i915_perf_stream *stream; |
105 | struct intel_gt *gt; |
106 | |
107 | if (!props.engine) |
108 | return NULL; |
109 | |
110 | gt = props.engine->gt; |
111 | |
112 | if (!oa_config) |
113 | return NULL; |
114 | |
115 | props.metrics_set = oa_config->id; |
116 | |
117 | stream = kzalloc(size: sizeof(*stream), GFP_KERNEL); |
118 | if (!stream) { |
119 | i915_oa_config_put(oa_config); |
120 | return NULL; |
121 | } |
122 | |
123 | stream->perf = perf; |
124 | |
125 | mutex_lock(>->perf.lock); |
126 | if (i915_oa_stream_init(stream, param: ¶m, props: &props)) { |
127 | kfree(objp: stream); |
128 | stream = NULL; |
129 | } |
130 | mutex_unlock(lock: >->perf.lock); |
131 | |
132 | i915_oa_config_put(oa_config); |
133 | |
134 | return stream; |
135 | } |
136 | |
137 | static void stream_destroy(struct i915_perf_stream *stream) |
138 | { |
139 | struct intel_gt *gt = stream->engine->gt; |
140 | |
141 | mutex_lock(>->perf.lock); |
142 | i915_perf_destroy_locked(stream); |
143 | mutex_unlock(lock: >->perf.lock); |
144 | } |
145 | |
146 | static int live_sanitycheck(void *arg) |
147 | { |
148 | struct drm_i915_private *i915 = arg; |
149 | struct i915_perf_stream *stream; |
150 | |
151 | /* Quick check we can create a perf stream */ |
152 | |
153 | stream = test_stream(perf: &i915->perf); |
154 | if (!stream) |
155 | return -EINVAL; |
156 | |
157 | stream_destroy(stream); |
158 | return 0; |
159 | } |
160 | |
161 | static int write_timestamp(struct i915_request *rq, int slot) |
162 | { |
163 | u32 *cs; |
164 | int len; |
165 | |
166 | cs = intel_ring_begin(rq, num_dwords: 6); |
167 | if (IS_ERR(ptr: cs)) |
168 | return PTR_ERR(ptr: cs); |
169 | |
170 | len = 5; |
171 | if (GRAPHICS_VER(rq->i915) >= 8) |
172 | len++; |
173 | |
174 | *cs++ = GFX_OP_PIPE_CONTROL(len); |
175 | *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | |
176 | PIPE_CONTROL_STORE_DATA_INDEX | |
177 | PIPE_CONTROL_WRITE_TIMESTAMP; |
178 | *cs++ = slot * sizeof(u32); |
179 | *cs++ = 0; |
180 | *cs++ = 0; |
181 | *cs++ = 0; |
182 | |
183 | intel_ring_advance(rq, cs); |
184 | |
185 | return 0; |
186 | } |
187 | |
188 | static ktime_t poll_status(struct i915_request *rq, int slot) |
189 | { |
190 | while (!intel_read_status_page(engine: rq->engine, reg: slot) && |
191 | !i915_request_completed(rq)) |
192 | cpu_relax(); |
193 | |
194 | return ktime_get(); |
195 | } |
196 | |
197 | static int live_noa_delay(void *arg) |
198 | { |
199 | struct drm_i915_private *i915 = arg; |
200 | struct i915_perf_stream *stream; |
201 | struct i915_request *rq; |
202 | ktime_t t0, t1; |
203 | u64 expected; |
204 | u32 delay; |
205 | int err; |
206 | int i; |
207 | |
208 | /* Check that the GPU delays matches expectations */ |
209 | |
210 | stream = test_stream(perf: &i915->perf); |
211 | if (!stream) |
212 | return -ENOMEM; |
213 | |
214 | expected = atomic64_read(v: &stream->perf->noa_programming_delay); |
215 | |
216 | if (stream->engine->class != RENDER_CLASS) { |
217 | err = -ENODEV; |
218 | goto out; |
219 | } |
220 | |
221 | for (i = 0; i < 4; i++) |
222 | intel_write_status_page(engine: stream->engine, reg: 0x100 + i, value: 0); |
223 | |
224 | rq = intel_engine_create_kernel_request(engine: stream->engine); |
225 | if (IS_ERR(ptr: rq)) { |
226 | err = PTR_ERR(ptr: rq); |
227 | goto out; |
228 | } |
229 | |
230 | if (rq->engine->emit_init_breadcrumb) { |
231 | err = rq->engine->emit_init_breadcrumb(rq); |
232 | if (err) { |
233 | i915_request_add(rq); |
234 | goto out; |
235 | } |
236 | } |
237 | |
238 | err = write_timestamp(rq, slot: 0x100); |
239 | if (err) { |
240 | i915_request_add(rq); |
241 | goto out; |
242 | } |
243 | |
244 | err = rq->engine->emit_bb_start(rq, |
245 | i915_ggtt_offset(vma: stream->noa_wait), 0, |
246 | I915_DISPATCH_SECURE); |
247 | if (err) { |
248 | i915_request_add(rq); |
249 | goto out; |
250 | } |
251 | |
252 | err = write_timestamp(rq, slot: 0x102); |
253 | if (err) { |
254 | i915_request_add(rq); |
255 | goto out; |
256 | } |
257 | |
258 | i915_request_get(rq); |
259 | i915_request_add(rq); |
260 | |
261 | preempt_disable(); |
262 | t0 = poll_status(rq, slot: 0x100); |
263 | t1 = poll_status(rq, slot: 0x102); |
264 | preempt_enable(); |
265 | |
266 | pr_info("CPU delay: %lluns, expected %lluns\n" , |
267 | ktime_sub(t1, t0), expected); |
268 | |
269 | delay = intel_read_status_page(engine: stream->engine, reg: 0x102); |
270 | delay -= intel_read_status_page(engine: stream->engine, reg: 0x100); |
271 | delay = intel_gt_clock_interval_to_ns(gt: stream->engine->gt, count: delay); |
272 | pr_info("GPU delay: %uns, expected %lluns\n" , |
273 | delay, expected); |
274 | |
275 | if (4 * delay < 3 * expected || 2 * delay > 3 * expected) { |
276 | pr_err("GPU delay [%uus] outside of expected threshold! [%lluus, %lluus]\n" , |
277 | delay / 1000, |
278 | div_u64(3 * expected, 4000), |
279 | div_u64(3 * expected, 2000)); |
280 | err = -EINVAL; |
281 | } |
282 | |
283 | i915_request_put(rq); |
284 | out: |
285 | stream_destroy(stream); |
286 | return err; |
287 | } |
288 | |
289 | static int live_noa_gpr(void *arg) |
290 | { |
291 | struct drm_i915_private *i915 = arg; |
292 | struct i915_perf_stream *stream; |
293 | struct intel_context *ce; |
294 | struct i915_request *rq; |
295 | u32 *cs, *store; |
296 | void *scratch; |
297 | u32 gpr0; |
298 | int err; |
299 | int i; |
300 | |
301 | /* Check that the delay does not clobber user context state (GPR) */ |
302 | |
303 | stream = test_stream(perf: &i915->perf); |
304 | if (!stream) |
305 | return -ENOMEM; |
306 | |
307 | gpr0 = i915_mmio_reg_offset(GEN8_RING_CS_GPR(stream->engine->mmio_base, 0)); |
308 | |
309 | ce = intel_context_create(engine: stream->engine); |
310 | if (IS_ERR(ptr: ce)) { |
311 | err = PTR_ERR(ptr: ce); |
312 | goto out; |
313 | } |
314 | |
315 | /* Poison the ce->vm so we detect writes not to the GGTT gt->scratch */ |
316 | scratch = __px_vaddr(p: ce->vm->scratch[0]); |
317 | memset(scratch, POISON_FREE, PAGE_SIZE); |
318 | |
319 | rq = intel_context_create_request(ce); |
320 | if (IS_ERR(ptr: rq)) { |
321 | err = PTR_ERR(ptr: rq); |
322 | goto out_ce; |
323 | } |
324 | i915_request_get(rq); |
325 | |
326 | if (rq->engine->emit_init_breadcrumb) { |
327 | err = rq->engine->emit_init_breadcrumb(rq); |
328 | if (err) { |
329 | i915_request_add(rq); |
330 | goto out_rq; |
331 | } |
332 | } |
333 | |
334 | /* Fill the 16 qword [32 dword] GPR with a known unlikely value */ |
335 | cs = intel_ring_begin(rq, num_dwords: 2 * 32 + 2); |
336 | if (IS_ERR(ptr: cs)) { |
337 | err = PTR_ERR(ptr: cs); |
338 | i915_request_add(rq); |
339 | goto out_rq; |
340 | } |
341 | |
342 | *cs++ = MI_LOAD_REGISTER_IMM(32); |
343 | for (i = 0; i < 32; i++) { |
344 | *cs++ = gpr0 + i * sizeof(u32); |
345 | *cs++ = STACK_MAGIC; |
346 | } |
347 | *cs++ = MI_NOOP; |
348 | intel_ring_advance(rq, cs); |
349 | |
350 | /* Execute the GPU delay */ |
351 | err = rq->engine->emit_bb_start(rq, |
352 | i915_ggtt_offset(vma: stream->noa_wait), 0, |
353 | I915_DISPATCH_SECURE); |
354 | if (err) { |
355 | i915_request_add(rq); |
356 | goto out_rq; |
357 | } |
358 | |
359 | /* Read the GPR back, using the pinned global HWSP for convenience */ |
360 | store = memset32(s: rq->engine->status_page.addr + 512, v: 0, n: 32); |
361 | for (i = 0; i < 32; i++) { |
362 | u32 cmd; |
363 | |
364 | cs = intel_ring_begin(rq, num_dwords: 4); |
365 | if (IS_ERR(ptr: cs)) { |
366 | err = PTR_ERR(ptr: cs); |
367 | i915_request_add(rq); |
368 | goto out_rq; |
369 | } |
370 | |
371 | cmd = MI_STORE_REGISTER_MEM; |
372 | if (GRAPHICS_VER(i915) >= 8) |
373 | cmd++; |
374 | cmd |= MI_USE_GGTT; |
375 | |
376 | *cs++ = cmd; |
377 | *cs++ = gpr0 + i * sizeof(u32); |
378 | *cs++ = i915_ggtt_offset(vma: rq->engine->status_page.vma) + |
379 | offset_in_page(store) + |
380 | i * sizeof(u32); |
381 | *cs++ = 0; |
382 | intel_ring_advance(rq, cs); |
383 | } |
384 | |
385 | i915_request_add(rq); |
386 | |
387 | if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE, HZ / 2) < 0) { |
388 | pr_err("noa_wait timed out\n" ); |
389 | intel_gt_set_wedged(gt: stream->engine->gt); |
390 | err = -EIO; |
391 | goto out_rq; |
392 | } |
393 | |
394 | /* Verify that the GPR contain our expected values */ |
395 | for (i = 0; i < 32; i++) { |
396 | if (store[i] == STACK_MAGIC) |
397 | continue; |
398 | |
399 | pr_err("GPR[%d] lost, found:%08x, expected:%08x!\n" , |
400 | i, store[i], STACK_MAGIC); |
401 | err = -EINVAL; |
402 | } |
403 | |
404 | /* Verify that the user's scratch page was not used for GPR storage */ |
405 | if (memchr_inv(p: scratch, POISON_FREE, PAGE_SIZE)) { |
406 | pr_err("Scratch page overwritten!\n" ); |
407 | igt_hexdump(buf: scratch, len: 4096); |
408 | err = -EINVAL; |
409 | } |
410 | |
411 | out_rq: |
412 | i915_request_put(rq); |
413 | out_ce: |
414 | intel_context_put(ce); |
415 | out: |
416 | stream_destroy(stream); |
417 | return err; |
418 | } |
419 | |
420 | int i915_perf_live_selftests(struct drm_i915_private *i915) |
421 | { |
422 | static const struct i915_subtest tests[] = { |
423 | SUBTEST(live_sanitycheck), |
424 | SUBTEST(live_noa_delay), |
425 | SUBTEST(live_noa_gpr), |
426 | }; |
427 | struct i915_perf *perf = &i915->perf; |
428 | int err; |
429 | |
430 | if (!perf->metrics_kobj || !perf->ops.enable_metric_set) |
431 | return 0; |
432 | |
433 | if (intel_gt_is_wedged(gt: to_gt(i915))) |
434 | return 0; |
435 | |
436 | err = alloc_empty_config(perf: &i915->perf); |
437 | if (err) |
438 | return err; |
439 | |
440 | err = i915_live_subtests(tests, i915); |
441 | |
442 | destroy_empty_config(perf: &i915->perf); |
443 | |
444 | return err; |
445 | } |
446 | |