1 | // SPDX-License-Identifier: MIT |
2 | /* |
3 | * Copyright © 2020 Intel Corporation |
4 | */ |
5 | |
6 | #include <linux/sort.h> |
7 | |
8 | #include "gem/i915_gem_internal.h" |
9 | #include "gem/i915_gem_lmem.h" |
10 | |
11 | #include "selftests/igt_spinner.h" |
12 | #include "selftests/i915_random.h" |
13 | |
14 | static const unsigned int sizes[] = { |
15 | SZ_4K, |
16 | SZ_64K, |
17 | SZ_2M, |
18 | CHUNK_SZ - SZ_4K, |
19 | CHUNK_SZ, |
20 | CHUNK_SZ + SZ_4K, |
21 | SZ_64M, |
22 | }; |
23 | |
24 | static struct drm_i915_gem_object * |
25 | create_lmem_or_internal(struct drm_i915_private *i915, size_t size) |
26 | { |
27 | struct drm_i915_gem_object *obj; |
28 | |
29 | obj = i915_gem_object_create_lmem(i915, size, flags: 0); |
30 | if (!IS_ERR(ptr: obj)) |
31 | return obj; |
32 | |
33 | return i915_gem_object_create_internal(i915, size); |
34 | } |
35 | |
36 | static int copy(struct intel_migrate *migrate, |
37 | int (*fn)(struct intel_migrate *migrate, |
38 | struct i915_gem_ww_ctx *ww, |
39 | struct drm_i915_gem_object *src, |
40 | struct drm_i915_gem_object *dst, |
41 | struct i915_request **out), |
42 | u32 sz, struct rnd_state *prng) |
43 | { |
44 | struct drm_i915_private *i915 = migrate->context->engine->i915; |
45 | struct drm_i915_gem_object *src, *dst; |
46 | struct i915_request *rq; |
47 | struct i915_gem_ww_ctx ww; |
48 | u32 *vaddr; |
49 | int err = 0; |
50 | int i; |
51 | |
52 | src = create_lmem_or_internal(i915, size: sz); |
53 | if (IS_ERR(ptr: src)) |
54 | return 0; |
55 | |
56 | sz = src->base.size; |
57 | dst = i915_gem_object_create_internal(i915, size: sz); |
58 | if (IS_ERR(ptr: dst)) |
59 | goto err_free_src; |
60 | |
61 | for_i915_gem_ww(&ww, err, true) { |
62 | err = i915_gem_object_lock(obj: src, ww: &ww); |
63 | if (err) |
64 | continue; |
65 | |
66 | err = i915_gem_object_lock(obj: dst, ww: &ww); |
67 | if (err) |
68 | continue; |
69 | |
70 | vaddr = i915_gem_object_pin_map(obj: src, type: I915_MAP_WC); |
71 | if (IS_ERR(ptr: vaddr)) { |
72 | err = PTR_ERR(ptr: vaddr); |
73 | continue; |
74 | } |
75 | |
76 | for (i = 0; i < sz / sizeof(u32); i++) |
77 | vaddr[i] = i; |
78 | i915_gem_object_flush_map(obj: src); |
79 | |
80 | vaddr = i915_gem_object_pin_map(obj: dst, type: I915_MAP_WC); |
81 | if (IS_ERR(ptr: vaddr)) { |
82 | err = PTR_ERR(ptr: vaddr); |
83 | goto unpin_src; |
84 | } |
85 | |
86 | for (i = 0; i < sz / sizeof(u32); i++) |
87 | vaddr[i] = ~i; |
88 | i915_gem_object_flush_map(obj: dst); |
89 | |
90 | err = fn(migrate, &ww, src, dst, &rq); |
91 | if (!err) |
92 | continue; |
93 | |
94 | if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS) |
95 | pr_err("%ps failed, size: %u\n" , fn, sz); |
96 | if (rq) { |
97 | i915_request_wait(rq, flags: 0, HZ); |
98 | i915_request_put(rq); |
99 | } |
100 | i915_gem_object_unpin_map(obj: dst); |
101 | unpin_src: |
102 | i915_gem_object_unpin_map(obj: src); |
103 | } |
104 | if (err) |
105 | goto err_out; |
106 | |
107 | if (rq) { |
108 | if (i915_request_wait(rq, flags: 0, HZ) < 0) { |
109 | pr_err("%ps timed out, size: %u\n" , fn, sz); |
110 | err = -ETIME; |
111 | } |
112 | i915_request_put(rq); |
113 | } |
114 | |
115 | for (i = 0; !err && i < sz / PAGE_SIZE; i++) { |
116 | int x = i * 1024 + i915_prandom_u32_max_state(ep_ro: 1024, state: prng); |
117 | |
118 | if (vaddr[x] != x) { |
119 | pr_err("%ps failed, size: %u, offset: %zu\n" , |
120 | fn, sz, x * sizeof(u32)); |
121 | igt_hexdump(buf: vaddr + i * 1024, len: 4096); |
122 | err = -EINVAL; |
123 | } |
124 | } |
125 | |
126 | i915_gem_object_unpin_map(obj: dst); |
127 | i915_gem_object_unpin_map(obj: src); |
128 | |
129 | err_out: |
130 | i915_gem_object_put(obj: dst); |
131 | err_free_src: |
132 | i915_gem_object_put(obj: src); |
133 | |
134 | return err; |
135 | } |
136 | |
137 | static int intel_context_copy_ccs(struct intel_context *ce, |
138 | const struct i915_deps *deps, |
139 | struct scatterlist *sg, |
140 | unsigned int pat_index, |
141 | bool write_to_ccs, |
142 | struct i915_request **out) |
143 | { |
144 | u8 src_access = write_to_ccs ? DIRECT_ACCESS : INDIRECT_ACCESS; |
145 | u8 dst_access = write_to_ccs ? INDIRECT_ACCESS : DIRECT_ACCESS; |
146 | struct sgt_dma it = sg_sgt(sg); |
147 | struct i915_request *rq; |
148 | u32 offset; |
149 | int err; |
150 | |
151 | GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm); |
152 | *out = NULL; |
153 | |
154 | GEM_BUG_ON(ce->ring->size < SZ_64K); |
155 | |
156 | offset = 0; |
157 | if (HAS_64K_PAGES(ce->engine->i915)) |
158 | offset = CHUNK_SZ; |
159 | |
160 | do { |
161 | int len; |
162 | |
163 | rq = i915_request_create(ce); |
164 | if (IS_ERR(ptr: rq)) { |
165 | err = PTR_ERR(ptr: rq); |
166 | goto out_ce; |
167 | } |
168 | |
169 | if (deps) { |
170 | err = i915_request_await_deps(rq, deps); |
171 | if (err) |
172 | goto out_rq; |
173 | |
174 | if (rq->engine->emit_init_breadcrumb) { |
175 | err = rq->engine->emit_init_breadcrumb(rq); |
176 | if (err) |
177 | goto out_rq; |
178 | } |
179 | |
180 | deps = NULL; |
181 | } |
182 | |
183 | /* The PTE updates + clear must not be interrupted. */ |
184 | err = emit_no_arbitration(rq); |
185 | if (err) |
186 | goto out_rq; |
187 | |
188 | len = emit_pte(rq, it: &it, pat_index, is_lmem: true, offset, CHUNK_SZ); |
189 | if (len <= 0) { |
190 | err = len; |
191 | goto out_rq; |
192 | } |
193 | |
194 | err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); |
195 | if (err) |
196 | goto out_rq; |
197 | |
198 | err = emit_copy_ccs(rq, dst_offset: offset, dst_access, |
199 | src_offset: offset, src_access, size: len); |
200 | if (err) |
201 | goto out_rq; |
202 | |
203 | err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); |
204 | |
205 | /* Arbitration is re-enabled between requests. */ |
206 | out_rq: |
207 | if (*out) |
208 | i915_request_put(rq: *out); |
209 | *out = i915_request_get(rq); |
210 | i915_request_add(rq); |
211 | if (err || !it.sg || !sg_dma_len(it.sg)) |
212 | break; |
213 | |
214 | cond_resched(); |
215 | } while (1); |
216 | |
217 | out_ce: |
218 | return err; |
219 | } |
220 | |
221 | static int |
222 | intel_migrate_ccs_copy(struct intel_migrate *m, |
223 | struct i915_gem_ww_ctx *ww, |
224 | const struct i915_deps *deps, |
225 | struct scatterlist *sg, |
226 | unsigned int pat_index, |
227 | bool write_to_ccs, |
228 | struct i915_request **out) |
229 | { |
230 | struct intel_context *ce; |
231 | int err; |
232 | |
233 | *out = NULL; |
234 | if (!m->context) |
235 | return -ENODEV; |
236 | |
237 | ce = intel_migrate_create_context(m); |
238 | if (IS_ERR(ptr: ce)) |
239 | ce = intel_context_get(ce: m->context); |
240 | GEM_BUG_ON(IS_ERR(ce)); |
241 | |
242 | err = intel_context_pin_ww(ce, ww); |
243 | if (err) |
244 | goto out; |
245 | |
246 | err = intel_context_copy_ccs(ce, deps, sg, pat_index, |
247 | write_to_ccs, out); |
248 | |
249 | intel_context_unpin(ce); |
250 | out: |
251 | intel_context_put(ce); |
252 | return err; |
253 | } |
254 | |
255 | static int clear(struct intel_migrate *migrate, |
256 | int (*fn)(struct intel_migrate *migrate, |
257 | struct i915_gem_ww_ctx *ww, |
258 | struct drm_i915_gem_object *obj, |
259 | u32 value, |
260 | struct i915_request **out), |
261 | u32 sz, struct rnd_state *prng) |
262 | { |
263 | struct drm_i915_private *i915 = migrate->context->engine->i915; |
264 | struct drm_i915_gem_object *obj; |
265 | struct i915_request *rq; |
266 | struct i915_gem_ww_ctx ww; |
267 | u32 *vaddr, val = 0; |
268 | bool ccs_cap = false; |
269 | int err = 0; |
270 | int i; |
271 | |
272 | obj = create_lmem_or_internal(i915, size: sz); |
273 | if (IS_ERR(ptr: obj)) |
274 | return 0; |
275 | |
276 | /* Consider the rounded up memory too */ |
277 | sz = obj->base.size; |
278 | |
279 | if (HAS_FLAT_CCS(i915) && i915_gem_object_is_lmem(obj)) |
280 | ccs_cap = true; |
281 | |
282 | for_i915_gem_ww(&ww, err, true) { |
283 | int ccs_bytes, ccs_bytes_per_chunk; |
284 | |
285 | err = i915_gem_object_lock(obj, ww: &ww); |
286 | if (err) |
287 | continue; |
288 | |
289 | vaddr = i915_gem_object_pin_map(obj, type: I915_MAP_WC); |
290 | if (IS_ERR(ptr: vaddr)) { |
291 | err = PTR_ERR(ptr: vaddr); |
292 | continue; |
293 | } |
294 | |
295 | for (i = 0; i < sz / sizeof(u32); i++) |
296 | vaddr[i] = ~i; |
297 | i915_gem_object_flush_map(obj); |
298 | |
299 | if (ccs_cap && !val) { |
300 | /* Write the obj data into ccs surface */ |
301 | err = intel_migrate_ccs_copy(m: migrate, ww: &ww, NULL, |
302 | sg: obj->mm.pages->sgl, |
303 | pat_index: obj->pat_index, |
304 | write_to_ccs: true, out: &rq); |
305 | if (rq && !err) { |
306 | if (i915_request_wait(rq, flags: 0, HZ) < 0) { |
307 | pr_err("%ps timed out, size: %u\n" , |
308 | fn, sz); |
309 | err = -ETIME; |
310 | } |
311 | i915_request_put(rq); |
312 | rq = NULL; |
313 | } |
314 | if (err) |
315 | continue; |
316 | } |
317 | |
318 | err = fn(migrate, &ww, obj, val, &rq); |
319 | if (rq && !err) { |
320 | if (i915_request_wait(rq, flags: 0, HZ) < 0) { |
321 | pr_err("%ps timed out, size: %u\n" , fn, sz); |
322 | err = -ETIME; |
323 | } |
324 | i915_request_put(rq); |
325 | rq = NULL; |
326 | } |
327 | if (err) |
328 | continue; |
329 | |
330 | i915_gem_object_flush_map(obj); |
331 | |
332 | /* Verify the set/clear of the obj mem */ |
333 | for (i = 0; !err && i < sz / PAGE_SIZE; i++) { |
334 | int x = i * 1024 + |
335 | i915_prandom_u32_max_state(ep_ro: 1024, state: prng); |
336 | |
337 | if (vaddr[x] != val) { |
338 | pr_err("%ps failed, (%u != %u), offset: %zu\n" , |
339 | fn, vaddr[x], val, x * sizeof(u32)); |
340 | igt_hexdump(buf: vaddr + i * 1024, len: 4096); |
341 | err = -EINVAL; |
342 | } |
343 | } |
344 | if (err) |
345 | continue; |
346 | |
347 | if (ccs_cap && !val) { |
348 | for (i = 0; i < sz / sizeof(u32); i++) |
349 | vaddr[i] = ~i; |
350 | i915_gem_object_flush_map(obj); |
351 | |
352 | err = intel_migrate_ccs_copy(m: migrate, ww: &ww, NULL, |
353 | sg: obj->mm.pages->sgl, |
354 | pat_index: obj->pat_index, |
355 | write_to_ccs: false, out: &rq); |
356 | if (rq && !err) { |
357 | if (i915_request_wait(rq, flags: 0, HZ) < 0) { |
358 | pr_err("%ps timed out, size: %u\n" , |
359 | fn, sz); |
360 | err = -ETIME; |
361 | } |
362 | i915_request_put(rq); |
363 | rq = NULL; |
364 | } |
365 | if (err) |
366 | continue; |
367 | |
368 | ccs_bytes = GET_CCS_BYTES(i915, sz); |
369 | ccs_bytes_per_chunk = GET_CCS_BYTES(i915, CHUNK_SZ); |
370 | i915_gem_object_flush_map(obj); |
371 | |
372 | for (i = 0; !err && i < DIV_ROUND_UP(ccs_bytes, PAGE_SIZE); i++) { |
373 | int offset = ((i * PAGE_SIZE) / |
374 | ccs_bytes_per_chunk) * CHUNK_SZ / sizeof(u32); |
375 | int ccs_bytes_left = (ccs_bytes - i * PAGE_SIZE) / sizeof(u32); |
376 | int x = i915_prandom_u32_max_state(min_t(int, 1024, |
377 | ccs_bytes_left), state: prng); |
378 | |
379 | if (vaddr[offset + x]) { |
380 | pr_err("%ps ccs clearing failed, offset: %ld/%d\n" , |
381 | fn, i * PAGE_SIZE + x * sizeof(u32), ccs_bytes); |
382 | igt_hexdump(buf: vaddr + offset, |
383 | min_t(int, 4096, |
384 | ccs_bytes_left * sizeof(u32))); |
385 | err = -EINVAL; |
386 | } |
387 | } |
388 | |
389 | if (err) |
390 | continue; |
391 | } |
392 | i915_gem_object_unpin_map(obj); |
393 | } |
394 | |
395 | if (err) { |
396 | if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS) |
397 | pr_err("%ps failed, size: %u\n" , fn, sz); |
398 | if (rq && err != -EINVAL) { |
399 | i915_request_wait(rq, flags: 0, HZ); |
400 | i915_request_put(rq); |
401 | } |
402 | |
403 | i915_gem_object_unpin_map(obj); |
404 | } |
405 | |
406 | i915_gem_object_put(obj); |
407 | return err; |
408 | } |
409 | |
410 | static int __migrate_copy(struct intel_migrate *migrate, |
411 | struct i915_gem_ww_ctx *ww, |
412 | struct drm_i915_gem_object *src, |
413 | struct drm_i915_gem_object *dst, |
414 | struct i915_request **out) |
415 | { |
416 | return intel_migrate_copy(m: migrate, ww, NULL, |
417 | src: src->mm.pages->sgl, src_pat_index: src->pat_index, |
418 | src_is_lmem: i915_gem_object_is_lmem(obj: src), |
419 | dst: dst->mm.pages->sgl, dst_pat_index: dst->pat_index, |
420 | dst_is_lmem: i915_gem_object_is_lmem(obj: dst), |
421 | out); |
422 | } |
423 | |
424 | static int __global_copy(struct intel_migrate *migrate, |
425 | struct i915_gem_ww_ctx *ww, |
426 | struct drm_i915_gem_object *src, |
427 | struct drm_i915_gem_object *dst, |
428 | struct i915_request **out) |
429 | { |
430 | return intel_context_migrate_copy(ce: migrate->context, NULL, |
431 | src: src->mm.pages->sgl, src_pat_index: src->pat_index, |
432 | src_is_lmem: i915_gem_object_is_lmem(obj: src), |
433 | dst: dst->mm.pages->sgl, dst_pat_index: dst->pat_index, |
434 | dst_is_lmem: i915_gem_object_is_lmem(obj: dst), |
435 | out); |
436 | } |
437 | |
438 | static int |
439 | migrate_copy(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng) |
440 | { |
441 | return copy(migrate, fn: __migrate_copy, sz, prng); |
442 | } |
443 | |
444 | static int |
445 | global_copy(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng) |
446 | { |
447 | return copy(migrate, fn: __global_copy, sz, prng); |
448 | } |
449 | |
450 | static int __migrate_clear(struct intel_migrate *migrate, |
451 | struct i915_gem_ww_ctx *ww, |
452 | struct drm_i915_gem_object *obj, |
453 | u32 value, |
454 | struct i915_request **out) |
455 | { |
456 | return intel_migrate_clear(m: migrate, ww, NULL, |
457 | sg: obj->mm.pages->sgl, |
458 | pat_index: obj->pat_index, |
459 | is_lmem: i915_gem_object_is_lmem(obj), |
460 | value, out); |
461 | } |
462 | |
463 | static int __global_clear(struct intel_migrate *migrate, |
464 | struct i915_gem_ww_ctx *ww, |
465 | struct drm_i915_gem_object *obj, |
466 | u32 value, |
467 | struct i915_request **out) |
468 | { |
469 | return intel_context_migrate_clear(ce: migrate->context, NULL, |
470 | sg: obj->mm.pages->sgl, |
471 | pat_index: obj->pat_index, |
472 | is_lmem: i915_gem_object_is_lmem(obj), |
473 | value, out); |
474 | } |
475 | |
476 | static int |
477 | migrate_clear(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng) |
478 | { |
479 | return clear(migrate, fn: __migrate_clear, sz, prng); |
480 | } |
481 | |
482 | static int |
483 | global_clear(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng) |
484 | { |
485 | return clear(migrate, fn: __global_clear, sz, prng); |
486 | } |
487 | |
488 | static int live_migrate_copy(void *arg) |
489 | { |
490 | struct intel_gt *gt = arg; |
491 | struct intel_migrate *migrate = >->migrate; |
492 | struct drm_i915_private *i915 = migrate->context->engine->i915; |
493 | I915_RND_STATE(prng); |
494 | int i; |
495 | |
496 | for (i = 0; i < ARRAY_SIZE(sizes); i++) { |
497 | int err; |
498 | |
499 | err = migrate_copy(migrate, sz: sizes[i], prng: &prng); |
500 | if (err == 0) |
501 | err = global_copy(migrate, sz: sizes[i], prng: &prng); |
502 | i915_gem_drain_freed_objects(i915); |
503 | if (err) |
504 | return err; |
505 | } |
506 | |
507 | return 0; |
508 | } |
509 | |
510 | static int live_migrate_clear(void *arg) |
511 | { |
512 | struct intel_gt *gt = arg; |
513 | struct intel_migrate *migrate = >->migrate; |
514 | struct drm_i915_private *i915 = migrate->context->engine->i915; |
515 | I915_RND_STATE(prng); |
516 | int i; |
517 | |
518 | for (i = 0; i < ARRAY_SIZE(sizes); i++) { |
519 | int err; |
520 | |
521 | err = migrate_clear(migrate, sz: sizes[i], prng: &prng); |
522 | if (err == 0) |
523 | err = global_clear(migrate, sz: sizes[i], prng: &prng); |
524 | |
525 | i915_gem_drain_freed_objects(i915); |
526 | if (err) |
527 | return err; |
528 | } |
529 | |
530 | return 0; |
531 | } |
532 | |
533 | struct spinner_timer { |
534 | struct timer_list timer; |
535 | struct igt_spinner spin; |
536 | }; |
537 | |
538 | static void spinner_kill(struct timer_list *timer) |
539 | { |
540 | struct spinner_timer *st = from_timer(st, timer, timer); |
541 | |
542 | igt_spinner_end(spin: &st->spin); |
543 | pr_info("%s\n" , __func__); |
544 | } |
545 | |
546 | static int live_emit_pte_full_ring(void *arg) |
547 | { |
548 | struct intel_gt *gt = arg; |
549 | struct intel_migrate *migrate = >->migrate; |
550 | struct drm_i915_private *i915 = migrate->context->engine->i915; |
551 | struct drm_i915_gem_object *obj; |
552 | struct intel_context *ce; |
553 | struct i915_request *rq, *prev; |
554 | struct spinner_timer st; |
555 | struct sgt_dma it; |
556 | int len, sz, err; |
557 | u32 *cs; |
558 | |
559 | /* |
560 | * Simple regression test to check that we don't trample the |
561 | * rq->reserved_space when returning from emit_pte(), if the ring is |
562 | * nearly full. |
563 | */ |
564 | |
565 | if (igt_spinner_init(spin: &st.spin, gt: to_gt(i915))) |
566 | return -ENOMEM; |
567 | |
568 | obj = i915_gem_object_create_internal(i915, size: 2 * PAGE_SIZE); |
569 | if (IS_ERR(ptr: obj)) { |
570 | err = PTR_ERR(ptr: obj); |
571 | goto out_spinner; |
572 | } |
573 | |
574 | err = i915_gem_object_pin_pages_unlocked(obj); |
575 | if (err) |
576 | goto out_obj; |
577 | |
578 | ce = intel_migrate_create_context(m: migrate); |
579 | if (IS_ERR(ptr: ce)) { |
580 | err = PTR_ERR(ptr: ce); |
581 | goto out_obj; |
582 | } |
583 | |
584 | ce->ring_size = SZ_4K; /* Not too big */ |
585 | |
586 | err = intel_context_pin(ce); |
587 | if (err) |
588 | goto out_put; |
589 | |
590 | rq = igt_spinner_create_request(spin: &st.spin, ce, MI_ARB_CHECK); |
591 | if (IS_ERR(ptr: rq)) { |
592 | err = PTR_ERR(ptr: rq); |
593 | goto out_unpin; |
594 | } |
595 | |
596 | i915_request_add(rq); |
597 | if (!igt_wait_for_spinner(spin: &st.spin, rq)) { |
598 | err = -EIO; |
599 | goto out_unpin; |
600 | } |
601 | |
602 | /* |
603 | * Fill the rest of the ring leaving I915_EMIT_PTE_NUM_DWORDS + |
604 | * ring->reserved_space at the end. To actually emit the PTEs we require |
605 | * slightly more than I915_EMIT_PTE_NUM_DWORDS, since our object size is |
606 | * greater than PAGE_SIZE. The correct behaviour is to wait for more |
607 | * ring space in emit_pte(), otherwise we trample on the reserved_space |
608 | * resulting in crashes when later submitting the rq. |
609 | */ |
610 | |
611 | prev = NULL; |
612 | do { |
613 | if (prev) |
614 | i915_request_add(rq); |
615 | |
616 | rq = i915_request_create(ce); |
617 | if (IS_ERR(ptr: rq)) { |
618 | err = PTR_ERR(ptr: rq); |
619 | goto out_unpin; |
620 | } |
621 | |
622 | sz = (rq->ring->space - rq->reserved_space) / sizeof(u32) - |
623 | I915_EMIT_PTE_NUM_DWORDS; |
624 | sz = min_t(u32, sz, (SZ_1K - rq->reserved_space) / sizeof(u32) - |
625 | I915_EMIT_PTE_NUM_DWORDS); |
626 | cs = intel_ring_begin(rq, num_dwords: sz); |
627 | if (IS_ERR(ptr: cs)) { |
628 | err = PTR_ERR(ptr: cs); |
629 | goto out_rq; |
630 | } |
631 | |
632 | memset32(s: cs, MI_NOOP, n: sz); |
633 | cs += sz; |
634 | intel_ring_advance(rq, cs); |
635 | |
636 | pr_info("%s emit=%u sz=%d\n" , __func__, rq->ring->emit, sz); |
637 | |
638 | prev = rq; |
639 | } while (rq->ring->space > (rq->reserved_space + |
640 | I915_EMIT_PTE_NUM_DWORDS * sizeof(u32))); |
641 | |
642 | timer_setup_on_stack(&st.timer, spinner_kill, 0); |
643 | mod_timer(timer: &st.timer, expires: jiffies + 2 * HZ); |
644 | |
645 | /* |
646 | * This should wait for the spinner to be killed, otherwise we should go |
647 | * down in flames when doing i915_request_add(). |
648 | */ |
649 | pr_info("%s emite_pte ring space=%u\n" , __func__, rq->ring->space); |
650 | it = sg_sgt(sg: obj->mm.pages->sgl); |
651 | len = emit_pte(rq, it: &it, pat_index: obj->pat_index, is_lmem: false, offset: 0, CHUNK_SZ); |
652 | if (!len) { |
653 | err = -EINVAL; |
654 | goto out_rq; |
655 | } |
656 | if (len < 0) { |
657 | err = len; |
658 | goto out_rq; |
659 | } |
660 | |
661 | out_rq: |
662 | i915_request_add(rq); /* GEM_BUG_ON(rq->reserved_space > ring->space)? */ |
663 | del_timer_sync(timer: &st.timer); |
664 | destroy_timer_on_stack(timer: &st.timer); |
665 | out_unpin: |
666 | intel_context_unpin(ce); |
667 | out_put: |
668 | intel_context_put(ce); |
669 | out_obj: |
670 | i915_gem_object_put(obj); |
671 | out_spinner: |
672 | igt_spinner_fini(spin: &st.spin); |
673 | return err; |
674 | } |
675 | |
676 | struct threaded_migrate { |
677 | struct intel_migrate *migrate; |
678 | struct task_struct *tsk; |
679 | struct rnd_state prng; |
680 | }; |
681 | |
682 | static int threaded_migrate(struct intel_migrate *migrate, |
683 | int (*fn)(void *arg), |
684 | unsigned int flags) |
685 | { |
686 | const unsigned int n_cpus = num_online_cpus() + 1; |
687 | struct threaded_migrate *thread; |
688 | I915_RND_STATE(prng); |
689 | unsigned int i; |
690 | int err = 0; |
691 | |
692 | thread = kcalloc(n: n_cpus, size: sizeof(*thread), GFP_KERNEL); |
693 | if (!thread) |
694 | return 0; |
695 | |
696 | for (i = 0; i < n_cpus; ++i) { |
697 | struct task_struct *tsk; |
698 | |
699 | thread[i].migrate = migrate; |
700 | thread[i].prng = |
701 | I915_RND_STATE_INITIALIZER(prandom_u32_state(&prng)); |
702 | |
703 | tsk = kthread_run(fn, &thread[i], "igt-%d" , i); |
704 | if (IS_ERR(ptr: tsk)) { |
705 | err = PTR_ERR(ptr: tsk); |
706 | break; |
707 | } |
708 | |
709 | get_task_struct(t: tsk); |
710 | thread[i].tsk = tsk; |
711 | } |
712 | |
713 | msleep(msecs: 10 * n_cpus); /* start all threads before we kthread_stop() */ |
714 | |
715 | for (i = 0; i < n_cpus; ++i) { |
716 | struct task_struct *tsk = thread[i].tsk; |
717 | int status; |
718 | |
719 | if (IS_ERR_OR_NULL(ptr: tsk)) |
720 | continue; |
721 | |
722 | status = kthread_stop_put(k: tsk); |
723 | if (status && !err) |
724 | err = status; |
725 | } |
726 | |
727 | kfree(objp: thread); |
728 | return err; |
729 | } |
730 | |
731 | static int __thread_migrate_copy(void *arg) |
732 | { |
733 | struct threaded_migrate *tm = arg; |
734 | |
735 | return migrate_copy(migrate: tm->migrate, sz: 2 * CHUNK_SZ, prng: &tm->prng); |
736 | } |
737 | |
738 | static int thread_migrate_copy(void *arg) |
739 | { |
740 | struct intel_gt *gt = arg; |
741 | struct intel_migrate *migrate = >->migrate; |
742 | |
743 | return threaded_migrate(migrate, fn: __thread_migrate_copy, flags: 0); |
744 | } |
745 | |
746 | static int __thread_global_copy(void *arg) |
747 | { |
748 | struct threaded_migrate *tm = arg; |
749 | |
750 | return global_copy(migrate: tm->migrate, sz: 2 * CHUNK_SZ, prng: &tm->prng); |
751 | } |
752 | |
753 | static int thread_global_copy(void *arg) |
754 | { |
755 | struct intel_gt *gt = arg; |
756 | struct intel_migrate *migrate = >->migrate; |
757 | |
758 | return threaded_migrate(migrate, fn: __thread_global_copy, flags: 0); |
759 | } |
760 | |
761 | static int __thread_migrate_clear(void *arg) |
762 | { |
763 | struct threaded_migrate *tm = arg; |
764 | |
765 | return migrate_clear(migrate: tm->migrate, sz: 2 * CHUNK_SZ, prng: &tm->prng); |
766 | } |
767 | |
768 | static int __thread_global_clear(void *arg) |
769 | { |
770 | struct threaded_migrate *tm = arg; |
771 | |
772 | return global_clear(migrate: tm->migrate, sz: 2 * CHUNK_SZ, prng: &tm->prng); |
773 | } |
774 | |
775 | static int thread_migrate_clear(void *arg) |
776 | { |
777 | struct intel_gt *gt = arg; |
778 | struct intel_migrate *migrate = >->migrate; |
779 | |
780 | return threaded_migrate(migrate, fn: __thread_migrate_clear, flags: 0); |
781 | } |
782 | |
783 | static int thread_global_clear(void *arg) |
784 | { |
785 | struct intel_gt *gt = arg; |
786 | struct intel_migrate *migrate = >->migrate; |
787 | |
788 | return threaded_migrate(migrate, fn: __thread_global_clear, flags: 0); |
789 | } |
790 | |
791 | int intel_migrate_live_selftests(struct drm_i915_private *i915) |
792 | { |
793 | static const struct i915_subtest tests[] = { |
794 | SUBTEST(live_migrate_copy), |
795 | SUBTEST(live_migrate_clear), |
796 | SUBTEST(live_emit_pte_full_ring), |
797 | SUBTEST(thread_migrate_copy), |
798 | SUBTEST(thread_migrate_clear), |
799 | SUBTEST(thread_global_copy), |
800 | SUBTEST(thread_global_clear), |
801 | }; |
802 | struct intel_gt *gt = to_gt(i915); |
803 | |
804 | if (!gt->migrate.context) |
805 | return 0; |
806 | |
807 | return intel_gt_live_subtests(tests, gt); |
808 | } |
809 | |
810 | static struct drm_i915_gem_object * |
811 | create_init_lmem_internal(struct intel_gt *gt, size_t sz, bool try_lmem) |
812 | { |
813 | struct drm_i915_gem_object *obj = NULL; |
814 | int err; |
815 | |
816 | if (try_lmem) |
817 | obj = i915_gem_object_create_lmem(i915: gt->i915, size: sz, flags: 0); |
818 | |
819 | if (IS_ERR_OR_NULL(ptr: obj)) { |
820 | obj = i915_gem_object_create_internal(i915: gt->i915, size: sz); |
821 | if (IS_ERR(ptr: obj)) |
822 | return obj; |
823 | } |
824 | |
825 | i915_gem_object_trylock(obj, NULL); |
826 | err = i915_gem_object_pin_pages(obj); |
827 | if (err) { |
828 | i915_gem_object_unlock(obj); |
829 | i915_gem_object_put(obj); |
830 | return ERR_PTR(error: err); |
831 | } |
832 | |
833 | return obj; |
834 | } |
835 | |
836 | static int wrap_ktime_compare(const void *A, const void *B) |
837 | { |
838 | const ktime_t *a = A, *b = B; |
839 | |
840 | return ktime_compare(cmp1: *a, cmp2: *b); |
841 | } |
842 | |
843 | static int __perf_clear_blt(struct intel_context *ce, |
844 | struct scatterlist *sg, |
845 | unsigned int pat_index, |
846 | bool is_lmem, |
847 | size_t sz) |
848 | { |
849 | ktime_t t[5]; |
850 | int pass; |
851 | int err = 0; |
852 | |
853 | for (pass = 0; pass < ARRAY_SIZE(t); pass++) { |
854 | struct i915_request *rq; |
855 | ktime_t t0, t1; |
856 | |
857 | t0 = ktime_get(); |
858 | |
859 | err = intel_context_migrate_clear(ce, NULL, sg, pat_index, |
860 | is_lmem, value: 0, out: &rq); |
861 | if (rq) { |
862 | if (i915_request_wait(rq, flags: 0, MAX_SCHEDULE_TIMEOUT) < 0) |
863 | err = -EIO; |
864 | i915_request_put(rq); |
865 | } |
866 | if (err) |
867 | break; |
868 | |
869 | t1 = ktime_get(); |
870 | t[pass] = ktime_sub(t1, t0); |
871 | } |
872 | if (err) |
873 | return err; |
874 | |
875 | sort(base: t, ARRAY_SIZE(t), size: sizeof(*t), cmp_func: wrap_ktime_compare, NULL); |
876 | pr_info("%s: %zd KiB fill: %lld MiB/s\n" , |
877 | ce->engine->name, sz >> 10, |
878 | div64_u64(mul_u32_u32(4 * sz, |
879 | 1000 * 1000 * 1000), |
880 | t[1] + 2 * t[2] + t[3]) >> 20); |
881 | return 0; |
882 | } |
883 | |
884 | static int perf_clear_blt(void *arg) |
885 | { |
886 | struct intel_gt *gt = arg; |
887 | static const unsigned long sizes[] = { |
888 | SZ_4K, |
889 | SZ_64K, |
890 | SZ_2M, |
891 | SZ_64M |
892 | }; |
893 | int i; |
894 | |
895 | for (i = 0; i < ARRAY_SIZE(sizes); i++) { |
896 | struct drm_i915_gem_object *dst; |
897 | int err; |
898 | |
899 | dst = create_init_lmem_internal(gt, sz: sizes[i], try_lmem: true); |
900 | if (IS_ERR(ptr: dst)) |
901 | return PTR_ERR(ptr: dst); |
902 | |
903 | err = __perf_clear_blt(ce: gt->migrate.context, |
904 | sg: dst->mm.pages->sgl, |
905 | pat_index: i915_gem_get_pat_index(i915: gt->i915, |
906 | level: I915_CACHE_NONE), |
907 | is_lmem: i915_gem_object_is_lmem(obj: dst), |
908 | sz: sizes[i]); |
909 | |
910 | i915_gem_object_unlock(obj: dst); |
911 | i915_gem_object_put(obj: dst); |
912 | if (err) |
913 | return err; |
914 | } |
915 | |
916 | return 0; |
917 | } |
918 | |
919 | static int __perf_copy_blt(struct intel_context *ce, |
920 | struct scatterlist *src, |
921 | unsigned int src_pat_index, |
922 | bool src_is_lmem, |
923 | struct scatterlist *dst, |
924 | unsigned int dst_pat_index, |
925 | bool dst_is_lmem, |
926 | size_t sz) |
927 | { |
928 | ktime_t t[5]; |
929 | int pass; |
930 | int err = 0; |
931 | |
932 | for (pass = 0; pass < ARRAY_SIZE(t); pass++) { |
933 | struct i915_request *rq; |
934 | ktime_t t0, t1; |
935 | |
936 | t0 = ktime_get(); |
937 | |
938 | err = intel_context_migrate_copy(ce, NULL, |
939 | src, src_pat_index, |
940 | src_is_lmem, |
941 | dst, dst_pat_index, |
942 | dst_is_lmem, |
943 | out: &rq); |
944 | if (rq) { |
945 | if (i915_request_wait(rq, flags: 0, MAX_SCHEDULE_TIMEOUT) < 0) |
946 | err = -EIO; |
947 | i915_request_put(rq); |
948 | } |
949 | if (err) |
950 | break; |
951 | |
952 | t1 = ktime_get(); |
953 | t[pass] = ktime_sub(t1, t0); |
954 | } |
955 | if (err) |
956 | return err; |
957 | |
958 | sort(base: t, ARRAY_SIZE(t), size: sizeof(*t), cmp_func: wrap_ktime_compare, NULL); |
959 | pr_info("%s: %zd KiB copy: %lld MiB/s\n" , |
960 | ce->engine->name, sz >> 10, |
961 | div64_u64(mul_u32_u32(4 * sz, |
962 | 1000 * 1000 * 1000), |
963 | t[1] + 2 * t[2] + t[3]) >> 20); |
964 | return 0; |
965 | } |
966 | |
967 | static int perf_copy_blt(void *arg) |
968 | { |
969 | struct intel_gt *gt = arg; |
970 | static const unsigned long sizes[] = { |
971 | SZ_4K, |
972 | SZ_64K, |
973 | SZ_2M, |
974 | SZ_64M |
975 | }; |
976 | int i; |
977 | |
978 | for (i = 0; i < ARRAY_SIZE(sizes); i++) { |
979 | struct drm_i915_gem_object *src, *dst; |
980 | size_t sz; |
981 | int err; |
982 | |
983 | src = create_init_lmem_internal(gt, sz: sizes[i], try_lmem: true); |
984 | if (IS_ERR(ptr: src)) |
985 | return PTR_ERR(ptr: src); |
986 | |
987 | sz = src->base.size; |
988 | dst = create_init_lmem_internal(gt, sz, try_lmem: false); |
989 | if (IS_ERR(ptr: dst)) { |
990 | err = PTR_ERR(ptr: dst); |
991 | goto err_src; |
992 | } |
993 | |
994 | err = __perf_copy_blt(ce: gt->migrate.context, |
995 | src: src->mm.pages->sgl, |
996 | src_pat_index: i915_gem_get_pat_index(i915: gt->i915, |
997 | level: I915_CACHE_NONE), |
998 | src_is_lmem: i915_gem_object_is_lmem(obj: src), |
999 | dst: dst->mm.pages->sgl, |
1000 | dst_pat_index: i915_gem_get_pat_index(i915: gt->i915, |
1001 | level: I915_CACHE_NONE), |
1002 | dst_is_lmem: i915_gem_object_is_lmem(obj: dst), |
1003 | sz); |
1004 | |
1005 | i915_gem_object_unlock(obj: dst); |
1006 | i915_gem_object_put(obj: dst); |
1007 | err_src: |
1008 | i915_gem_object_unlock(obj: src); |
1009 | i915_gem_object_put(obj: src); |
1010 | if (err) |
1011 | return err; |
1012 | } |
1013 | |
1014 | return 0; |
1015 | } |
1016 | |
1017 | int intel_migrate_perf_selftests(struct drm_i915_private *i915) |
1018 | { |
1019 | static const struct i915_subtest tests[] = { |
1020 | SUBTEST(perf_clear_blt), |
1021 | SUBTEST(perf_copy_blt), |
1022 | }; |
1023 | struct intel_gt *gt = to_gt(i915); |
1024 | |
1025 | if (intel_gt_is_wedged(gt)) |
1026 | return 0; |
1027 | |
1028 | if (!gt->migrate.context) |
1029 | return 0; |
1030 | |
1031 | return intel_gt_live_subtests(tests, gt); |
1032 | } |
1033 | |