1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Copyright © 2018 Intel Corporation |
4 | */ |
5 | |
6 | #include <linux/sort.h> |
7 | |
8 | #include "intel_gpu_commands.h" |
9 | #include "intel_gt_pm.h" |
10 | #include "intel_rps.h" |
11 | |
12 | #include "i915_selftest.h" |
13 | #include "selftests/igt_flush_test.h" |
14 | |
15 | #define COUNT 5 |
16 | |
17 | static int cmp_u32(const void *A, const void *B) |
18 | { |
19 | const u32 *a = A, *b = B; |
20 | |
21 | return *a - *b; |
22 | } |
23 | |
24 | static intel_wakeref_t perf_begin(struct intel_gt *gt) |
25 | { |
26 | intel_wakeref_t wakeref = intel_gt_pm_get(gt); |
27 | |
28 | /* Boost gpufreq to max [waitboost] and keep it fixed */ |
29 | atomic_inc(v: >->rps.num_waiters); |
30 | queue_work(wq: gt->i915->unordered_wq, work: >->rps.work); |
31 | flush_work(work: >->rps.work); |
32 | |
33 | return wakeref; |
34 | } |
35 | |
36 | static int perf_end(struct intel_gt *gt, intel_wakeref_t wakeref) |
37 | { |
38 | atomic_dec(v: >->rps.num_waiters); |
39 | intel_gt_pm_put(gt, handle: wakeref); |
40 | |
41 | return igt_flush_test(i915: gt->i915); |
42 | } |
43 | |
44 | static i915_reg_t timestamp_reg(struct intel_engine_cs *engine) |
45 | { |
46 | struct drm_i915_private *i915 = engine->i915; |
47 | |
48 | if (GRAPHICS_VER(i915) == 5 || IS_G4X(i915)) |
49 | return RING_TIMESTAMP_UDW(engine->mmio_base); |
50 | else |
51 | return RING_TIMESTAMP(engine->mmio_base); |
52 | } |
53 | |
54 | static int write_timestamp(struct i915_request *rq, int slot) |
55 | { |
56 | struct intel_timeline *tl = |
57 | rcu_dereference_protected(rq->timeline, |
58 | !i915_request_signaled(rq)); |
59 | u32 cmd; |
60 | u32 *cs; |
61 | |
62 | cs = intel_ring_begin(rq, num_dwords: 4); |
63 | if (IS_ERR(ptr: cs)) |
64 | return PTR_ERR(ptr: cs); |
65 | |
66 | cmd = MI_STORE_REGISTER_MEM | MI_USE_GGTT; |
67 | if (GRAPHICS_VER(rq->i915) >= 8) |
68 | cmd++; |
69 | *cs++ = cmd; |
70 | *cs++ = i915_mmio_reg_offset(timestamp_reg(rq->engine)); |
71 | *cs++ = tl->hwsp_offset + slot * sizeof(u32); |
72 | *cs++ = 0; |
73 | |
74 | intel_ring_advance(rq, cs); |
75 | |
76 | return 0; |
77 | } |
78 | |
79 | static struct i915_vma *create_empty_batch(struct intel_context *ce) |
80 | { |
81 | struct drm_i915_gem_object *obj; |
82 | struct i915_vma *vma; |
83 | u32 *cs; |
84 | int err; |
85 | |
86 | obj = i915_gem_object_create_internal(i915: ce->engine->i915, PAGE_SIZE); |
87 | if (IS_ERR(ptr: obj)) |
88 | return ERR_CAST(ptr: obj); |
89 | |
90 | cs = i915_gem_object_pin_map_unlocked(obj, type: I915_MAP_WB); |
91 | if (IS_ERR(ptr: cs)) { |
92 | err = PTR_ERR(ptr: cs); |
93 | goto err_put; |
94 | } |
95 | |
96 | cs[0] = MI_BATCH_BUFFER_END; |
97 | |
98 | i915_gem_object_flush_map(obj); |
99 | |
100 | vma = i915_vma_instance(obj, vm: ce->vm, NULL); |
101 | if (IS_ERR(ptr: vma)) { |
102 | err = PTR_ERR(ptr: vma); |
103 | goto err_unpin; |
104 | } |
105 | |
106 | err = i915_vma_pin(vma, size: 0, alignment: 0, PIN_USER); |
107 | if (err) |
108 | goto err_unpin; |
109 | |
110 | i915_gem_object_unpin_map(obj); |
111 | return vma; |
112 | |
113 | err_unpin: |
114 | i915_gem_object_unpin_map(obj); |
115 | err_put: |
116 | i915_gem_object_put(obj); |
117 | return ERR_PTR(error: err); |
118 | } |
119 | |
120 | static u32 trifilter(u32 *a) |
121 | { |
122 | u64 sum; |
123 | |
124 | sort(base: a, COUNT, size: sizeof(*a), cmp_func: cmp_u32, NULL); |
125 | |
126 | sum = mul_u32_u32(a: a[2], b: 2); |
127 | sum += a[1]; |
128 | sum += a[3]; |
129 | |
130 | return sum >> 2; |
131 | } |
132 | |
133 | static int perf_mi_bb_start(void *arg) |
134 | { |
135 | struct intel_gt *gt = arg; |
136 | struct intel_engine_cs *engine; |
137 | enum intel_engine_id id; |
138 | intel_wakeref_t wakeref; |
139 | int err = 0; |
140 | |
141 | if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */ |
142 | return 0; |
143 | |
144 | wakeref = perf_begin(gt); |
145 | for_each_engine(engine, gt, id) { |
146 | struct intel_context *ce = engine->kernel_context; |
147 | struct i915_vma *batch; |
148 | u32 cycles[COUNT]; |
149 | int i; |
150 | |
151 | if (GRAPHICS_VER(engine->i915) < 7 && engine->id != RCS0) |
152 | continue; |
153 | |
154 | intel_engine_pm_get(engine); |
155 | |
156 | batch = create_empty_batch(ce); |
157 | if (IS_ERR(ptr: batch)) { |
158 | err = PTR_ERR(ptr: batch); |
159 | intel_engine_pm_put(engine); |
160 | break; |
161 | } |
162 | |
163 | err = i915_vma_sync(vma: batch); |
164 | if (err) { |
165 | intel_engine_pm_put(engine); |
166 | i915_vma_put(vma: batch); |
167 | break; |
168 | } |
169 | |
170 | for (i = 0; i < ARRAY_SIZE(cycles); i++) { |
171 | struct i915_request *rq; |
172 | |
173 | rq = i915_request_create(ce); |
174 | if (IS_ERR(ptr: rq)) { |
175 | err = PTR_ERR(ptr: rq); |
176 | break; |
177 | } |
178 | |
179 | err = write_timestamp(rq, slot: 2); |
180 | if (err) |
181 | goto out; |
182 | |
183 | err = rq->engine->emit_bb_start(rq, |
184 | i915_vma_offset(vma: batch), 8, |
185 | 0); |
186 | if (err) |
187 | goto out; |
188 | |
189 | err = write_timestamp(rq, slot: 3); |
190 | if (err) |
191 | goto out; |
192 | |
193 | out: |
194 | i915_request_get(rq); |
195 | i915_request_add(rq); |
196 | |
197 | if (i915_request_wait(rq, flags: 0, HZ / 5) < 0) |
198 | err = -EIO; |
199 | i915_request_put(rq); |
200 | if (err) |
201 | break; |
202 | |
203 | cycles[i] = rq->hwsp_seqno[3] - rq->hwsp_seqno[2]; |
204 | } |
205 | i915_vma_put(vma: batch); |
206 | intel_engine_pm_put(engine); |
207 | if (err) |
208 | break; |
209 | |
210 | pr_info("%s: MI_BB_START cycles: %u\n" , |
211 | engine->name, trifilter(cycles)); |
212 | } |
213 | if (perf_end(gt, wakeref)) |
214 | err = -EIO; |
215 | |
216 | return err; |
217 | } |
218 | |
219 | static struct i915_vma *create_nop_batch(struct intel_context *ce) |
220 | { |
221 | struct drm_i915_gem_object *obj; |
222 | struct i915_vma *vma; |
223 | u32 *cs; |
224 | int err; |
225 | |
226 | obj = i915_gem_object_create_internal(i915: ce->engine->i915, SZ_64K); |
227 | if (IS_ERR(ptr: obj)) |
228 | return ERR_CAST(ptr: obj); |
229 | |
230 | cs = i915_gem_object_pin_map_unlocked(obj, type: I915_MAP_WB); |
231 | if (IS_ERR(ptr: cs)) { |
232 | err = PTR_ERR(ptr: cs); |
233 | goto err_put; |
234 | } |
235 | |
236 | memset(cs, 0, SZ_64K); |
237 | cs[SZ_64K / sizeof(*cs) - 1] = MI_BATCH_BUFFER_END; |
238 | |
239 | i915_gem_object_flush_map(obj); |
240 | |
241 | vma = i915_vma_instance(obj, vm: ce->vm, NULL); |
242 | if (IS_ERR(ptr: vma)) { |
243 | err = PTR_ERR(ptr: vma); |
244 | goto err_unpin; |
245 | } |
246 | |
247 | err = i915_vma_pin(vma, size: 0, alignment: 0, PIN_USER); |
248 | if (err) |
249 | goto err_unpin; |
250 | |
251 | i915_gem_object_unpin_map(obj); |
252 | return vma; |
253 | |
254 | err_unpin: |
255 | i915_gem_object_unpin_map(obj); |
256 | err_put: |
257 | i915_gem_object_put(obj); |
258 | return ERR_PTR(error: err); |
259 | } |
260 | |
261 | static int perf_mi_noop(void *arg) |
262 | { |
263 | struct intel_gt *gt = arg; |
264 | struct intel_engine_cs *engine; |
265 | enum intel_engine_id id; |
266 | intel_wakeref_t wakeref; |
267 | int err = 0; |
268 | |
269 | if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */ |
270 | return 0; |
271 | |
272 | wakeref = perf_begin(gt); |
273 | for_each_engine(engine, gt, id) { |
274 | struct intel_context *ce = engine->kernel_context; |
275 | struct i915_vma *base, *nop; |
276 | u32 cycles[COUNT]; |
277 | int i; |
278 | |
279 | if (GRAPHICS_VER(engine->i915) < 7 && engine->id != RCS0) |
280 | continue; |
281 | |
282 | intel_engine_pm_get(engine); |
283 | |
284 | base = create_empty_batch(ce); |
285 | if (IS_ERR(ptr: base)) { |
286 | err = PTR_ERR(ptr: base); |
287 | intel_engine_pm_put(engine); |
288 | break; |
289 | } |
290 | |
291 | err = i915_vma_sync(vma: base); |
292 | if (err) { |
293 | i915_vma_put(vma: base); |
294 | intel_engine_pm_put(engine); |
295 | break; |
296 | } |
297 | |
298 | nop = create_nop_batch(ce); |
299 | if (IS_ERR(ptr: nop)) { |
300 | err = PTR_ERR(ptr: nop); |
301 | i915_vma_put(vma: base); |
302 | intel_engine_pm_put(engine); |
303 | break; |
304 | } |
305 | |
306 | err = i915_vma_sync(vma: nop); |
307 | if (err) { |
308 | i915_vma_put(vma: nop); |
309 | i915_vma_put(vma: base); |
310 | intel_engine_pm_put(engine); |
311 | break; |
312 | } |
313 | |
314 | for (i = 0; i < ARRAY_SIZE(cycles); i++) { |
315 | struct i915_request *rq; |
316 | |
317 | rq = i915_request_create(ce); |
318 | if (IS_ERR(ptr: rq)) { |
319 | err = PTR_ERR(ptr: rq); |
320 | break; |
321 | } |
322 | |
323 | err = write_timestamp(rq, slot: 2); |
324 | if (err) |
325 | goto out; |
326 | |
327 | err = rq->engine->emit_bb_start(rq, |
328 | i915_vma_offset(vma: base), 8, |
329 | 0); |
330 | if (err) |
331 | goto out; |
332 | |
333 | err = write_timestamp(rq, slot: 3); |
334 | if (err) |
335 | goto out; |
336 | |
337 | err = rq->engine->emit_bb_start(rq, |
338 | i915_vma_offset(vma: nop), |
339 | i915_vma_size(vma: nop), |
340 | 0); |
341 | if (err) |
342 | goto out; |
343 | |
344 | err = write_timestamp(rq, slot: 4); |
345 | if (err) |
346 | goto out; |
347 | |
348 | out: |
349 | i915_request_get(rq); |
350 | i915_request_add(rq); |
351 | |
352 | if (i915_request_wait(rq, flags: 0, HZ / 5) < 0) |
353 | err = -EIO; |
354 | i915_request_put(rq); |
355 | if (err) |
356 | break; |
357 | |
358 | cycles[i] = |
359 | (rq->hwsp_seqno[4] - rq->hwsp_seqno[3]) - |
360 | (rq->hwsp_seqno[3] - rq->hwsp_seqno[2]); |
361 | } |
362 | i915_vma_put(vma: nop); |
363 | i915_vma_put(vma: base); |
364 | intel_engine_pm_put(engine); |
365 | if (err) |
366 | break; |
367 | |
368 | pr_info("%s: 16K MI_NOOP cycles: %u\n" , |
369 | engine->name, trifilter(cycles)); |
370 | } |
371 | if (perf_end(gt, wakeref)) |
372 | err = -EIO; |
373 | |
374 | return err; |
375 | } |
376 | |
377 | int intel_engine_cs_perf_selftests(struct drm_i915_private *i915) |
378 | { |
379 | static const struct i915_subtest tests[] = { |
380 | SUBTEST(perf_mi_bb_start), |
381 | SUBTEST(perf_mi_noop), |
382 | }; |
383 | |
384 | if (intel_gt_is_wedged(gt: to_gt(i915))) |
385 | return 0; |
386 | |
387 | return intel_gt_live_subtests(tests, to_gt(i915)); |
388 | } |
389 | |
390 | static int intel_mmio_bases_check(void *arg) |
391 | { |
392 | int i, j; |
393 | |
394 | for (i = 0; i < ARRAY_SIZE(intel_engines); i++) { |
395 | const struct engine_info *info = &intel_engines[i]; |
396 | u8 prev = U8_MAX; |
397 | |
398 | for (j = 0; j < MAX_MMIO_BASES; j++) { |
399 | u8 ver = info->mmio_bases[j].graphics_ver; |
400 | u32 base = info->mmio_bases[j].base; |
401 | |
402 | if (ver >= prev) { |
403 | pr_err("%s(%s, class:%d, instance:%d): mmio base for graphics ver %u is before the one for ver %u\n" , |
404 | __func__, |
405 | intel_engine_class_repr(info->class), |
406 | info->class, info->instance, |
407 | prev, ver); |
408 | return -EINVAL; |
409 | } |
410 | |
411 | if (ver == 0) |
412 | break; |
413 | |
414 | if (!base) { |
415 | pr_err("%s(%s, class:%d, instance:%d): invalid mmio base (%x) for graphics ver %u at entry %u\n" , |
416 | __func__, |
417 | intel_engine_class_repr(info->class), |
418 | info->class, info->instance, |
419 | base, ver, j); |
420 | return -EINVAL; |
421 | } |
422 | |
423 | prev = ver; |
424 | } |
425 | |
426 | pr_debug("%s: min graphics version supported for %s%d is %u\n" , |
427 | __func__, |
428 | intel_engine_class_repr(info->class), |
429 | info->instance, |
430 | prev); |
431 | } |
432 | |
433 | return 0; |
434 | } |
435 | |
436 | int intel_engine_cs_mock_selftests(void) |
437 | { |
438 | static const struct i915_subtest tests[] = { |
439 | SUBTEST(intel_mmio_bases_check), |
440 | }; |
441 | |
442 | return i915_subtests(tests, NULL); |
443 | } |
444 | |