1 | /* |
2 | * SPDX-License-Identifier: MIT |
3 | * |
4 | * Copyright © 2017 Intel Corporation |
5 | */ |
6 | |
7 | #include <linux/prime_numbers.h> |
8 | |
9 | #include "gt/intel_engine_pm.h" |
10 | #include "gt/intel_gpu_commands.h" |
11 | #include "gt/intel_gt.h" |
12 | #include "gt/intel_gt_pm.h" |
13 | #include "gt/intel_ring.h" |
14 | |
15 | #include "i915_selftest.h" |
16 | #include "selftests/i915_random.h" |
17 | |
18 | struct context { |
19 | struct drm_i915_gem_object *obj; |
20 | struct intel_engine_cs *engine; |
21 | }; |
22 | |
23 | static int cpu_set(struct context *ctx, unsigned long offset, u32 v) |
24 | { |
25 | unsigned int needs_clflush; |
26 | struct page *page; |
27 | u32 *cpu; |
28 | int err; |
29 | |
30 | i915_gem_object_lock(obj: ctx->obj, NULL); |
31 | err = i915_gem_object_prepare_write(obj: ctx->obj, needs_clflush: &needs_clflush); |
32 | if (err) |
33 | goto out; |
34 | |
35 | page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT); |
36 | cpu = kmap_local_page(page) + offset_in_page(offset); |
37 | |
38 | if (needs_clflush & CLFLUSH_BEFORE) |
39 | drm_clflush_virt_range(addr: cpu, length: sizeof(*cpu)); |
40 | |
41 | *cpu = v; |
42 | |
43 | if (needs_clflush & CLFLUSH_AFTER) |
44 | drm_clflush_virt_range(addr: cpu, length: sizeof(*cpu)); |
45 | |
46 | kunmap_local(cpu); |
47 | i915_gem_object_finish_access(obj: ctx->obj); |
48 | |
49 | out: |
50 | i915_gem_object_unlock(obj: ctx->obj); |
51 | return err; |
52 | } |
53 | |
54 | static int cpu_get(struct context *ctx, unsigned long offset, u32 *v) |
55 | { |
56 | unsigned int needs_clflush; |
57 | struct page *page; |
58 | u32 *cpu; |
59 | int err; |
60 | |
61 | i915_gem_object_lock(obj: ctx->obj, NULL); |
62 | err = i915_gem_object_prepare_read(obj: ctx->obj, needs_clflush: &needs_clflush); |
63 | if (err) |
64 | goto out; |
65 | |
66 | page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT); |
67 | cpu = kmap_local_page(page) + offset_in_page(offset); |
68 | |
69 | if (needs_clflush & CLFLUSH_BEFORE) |
70 | drm_clflush_virt_range(addr: cpu, length: sizeof(*cpu)); |
71 | |
72 | *v = *cpu; |
73 | |
74 | kunmap_local(cpu); |
75 | i915_gem_object_finish_access(obj: ctx->obj); |
76 | |
77 | out: |
78 | i915_gem_object_unlock(obj: ctx->obj); |
79 | return err; |
80 | } |
81 | |
82 | static int gtt_set(struct context *ctx, unsigned long offset, u32 v) |
83 | { |
84 | intel_wakeref_t wakeref; |
85 | struct i915_vma *vma; |
86 | u32 __iomem *map; |
87 | int err = 0; |
88 | |
89 | i915_gem_object_lock(obj: ctx->obj, NULL); |
90 | err = i915_gem_object_set_to_gtt_domain(obj: ctx->obj, write: true); |
91 | i915_gem_object_unlock(obj: ctx->obj); |
92 | if (err) |
93 | return err; |
94 | |
95 | vma = i915_gem_object_ggtt_pin(obj: ctx->obj, NULL, size: 0, alignment: 0, PIN_MAPPABLE); |
96 | if (IS_ERR(ptr: vma)) |
97 | return PTR_ERR(ptr: vma); |
98 | |
99 | wakeref = intel_gt_pm_get(gt: vma->vm->gt); |
100 | |
101 | map = i915_vma_pin_iomap(vma); |
102 | i915_vma_unpin(vma); |
103 | if (IS_ERR(ptr: map)) { |
104 | err = PTR_ERR(ptr: map); |
105 | goto out_rpm; |
106 | } |
107 | |
108 | iowrite32(v, &map[offset / sizeof(*map)]); |
109 | i915_vma_unpin_iomap(vma); |
110 | |
111 | out_rpm: |
112 | intel_gt_pm_put(gt: vma->vm->gt, handle: wakeref); |
113 | return err; |
114 | } |
115 | |
116 | static int gtt_get(struct context *ctx, unsigned long offset, u32 *v) |
117 | { |
118 | intel_wakeref_t wakeref; |
119 | struct i915_vma *vma; |
120 | u32 __iomem *map; |
121 | int err = 0; |
122 | |
123 | i915_gem_object_lock(obj: ctx->obj, NULL); |
124 | err = i915_gem_object_set_to_gtt_domain(obj: ctx->obj, write: false); |
125 | i915_gem_object_unlock(obj: ctx->obj); |
126 | if (err) |
127 | return err; |
128 | |
129 | vma = i915_gem_object_ggtt_pin(obj: ctx->obj, NULL, size: 0, alignment: 0, PIN_MAPPABLE); |
130 | if (IS_ERR(ptr: vma)) |
131 | return PTR_ERR(ptr: vma); |
132 | |
133 | wakeref = intel_gt_pm_get(gt: vma->vm->gt); |
134 | |
135 | map = i915_vma_pin_iomap(vma); |
136 | i915_vma_unpin(vma); |
137 | if (IS_ERR(ptr: map)) { |
138 | err = PTR_ERR(ptr: map); |
139 | goto out_rpm; |
140 | } |
141 | |
142 | *v = ioread32(&map[offset / sizeof(*map)]); |
143 | i915_vma_unpin_iomap(vma); |
144 | |
145 | out_rpm: |
146 | intel_gt_pm_put(gt: vma->vm->gt, handle: wakeref); |
147 | return err; |
148 | } |
149 | |
150 | static int wc_set(struct context *ctx, unsigned long offset, u32 v) |
151 | { |
152 | u32 *map; |
153 | int err; |
154 | |
155 | i915_gem_object_lock(obj: ctx->obj, NULL); |
156 | err = i915_gem_object_set_to_wc_domain(obj: ctx->obj, write: true); |
157 | i915_gem_object_unlock(obj: ctx->obj); |
158 | if (err) |
159 | return err; |
160 | |
161 | map = i915_gem_object_pin_map_unlocked(obj: ctx->obj, type: I915_MAP_WC); |
162 | if (IS_ERR(ptr: map)) |
163 | return PTR_ERR(ptr: map); |
164 | |
165 | map[offset / sizeof(*map)] = v; |
166 | |
167 | __i915_gem_object_flush_map(obj: ctx->obj, offset, size: sizeof(*map)); |
168 | i915_gem_object_unpin_map(obj: ctx->obj); |
169 | |
170 | return 0; |
171 | } |
172 | |
173 | static int wc_get(struct context *ctx, unsigned long offset, u32 *v) |
174 | { |
175 | u32 *map; |
176 | int err; |
177 | |
178 | i915_gem_object_lock(obj: ctx->obj, NULL); |
179 | err = i915_gem_object_set_to_wc_domain(obj: ctx->obj, write: false); |
180 | i915_gem_object_unlock(obj: ctx->obj); |
181 | if (err) |
182 | return err; |
183 | |
184 | map = i915_gem_object_pin_map_unlocked(obj: ctx->obj, type: I915_MAP_WC); |
185 | if (IS_ERR(ptr: map)) |
186 | return PTR_ERR(ptr: map); |
187 | |
188 | *v = map[offset / sizeof(*map)]; |
189 | i915_gem_object_unpin_map(obj: ctx->obj); |
190 | |
191 | return 0; |
192 | } |
193 | |
194 | static int gpu_set(struct context *ctx, unsigned long offset, u32 v) |
195 | { |
196 | struct i915_request *rq; |
197 | struct i915_vma *vma; |
198 | u32 *cs; |
199 | int err; |
200 | |
201 | vma = i915_gem_object_ggtt_pin(obj: ctx->obj, NULL, size: 0, alignment: 0, flags: 0); |
202 | if (IS_ERR(ptr: vma)) |
203 | return PTR_ERR(ptr: vma); |
204 | |
205 | i915_gem_object_lock(obj: ctx->obj, NULL); |
206 | err = i915_gem_object_set_to_gtt_domain(obj: ctx->obj, write: true); |
207 | if (err) |
208 | goto out_unlock; |
209 | |
210 | rq = intel_engine_create_kernel_request(engine: ctx->engine); |
211 | if (IS_ERR(ptr: rq)) { |
212 | err = PTR_ERR(ptr: rq); |
213 | goto out_unpin; |
214 | } |
215 | |
216 | cs = intel_ring_begin(rq, num_dwords: 4); |
217 | if (IS_ERR(ptr: cs)) { |
218 | err = PTR_ERR(ptr: cs); |
219 | goto out_rq; |
220 | } |
221 | |
222 | if (GRAPHICS_VER(ctx->engine->i915) >= 8) { |
223 | *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; |
224 | *cs++ = lower_32_bits(i915_ggtt_offset(vma) + offset); |
225 | *cs++ = upper_32_bits(i915_ggtt_offset(vma) + offset); |
226 | *cs++ = v; |
227 | } else if (GRAPHICS_VER(ctx->engine->i915) >= 4) { |
228 | *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; |
229 | *cs++ = 0; |
230 | *cs++ = i915_ggtt_offset(vma) + offset; |
231 | *cs++ = v; |
232 | } else { |
233 | *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; |
234 | *cs++ = i915_ggtt_offset(vma) + offset; |
235 | *cs++ = v; |
236 | *cs++ = MI_NOOP; |
237 | } |
238 | intel_ring_advance(rq, cs); |
239 | |
240 | err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); |
241 | |
242 | out_rq: |
243 | i915_request_add(rq); |
244 | out_unpin: |
245 | i915_vma_unpin(vma); |
246 | out_unlock: |
247 | i915_gem_object_unlock(obj: ctx->obj); |
248 | |
249 | return err; |
250 | } |
251 | |
252 | static bool always_valid(struct context *ctx) |
253 | { |
254 | return true; |
255 | } |
256 | |
257 | static bool needs_fence_registers(struct context *ctx) |
258 | { |
259 | struct intel_gt *gt = ctx->engine->gt; |
260 | |
261 | if (intel_gt_is_wedged(gt)) |
262 | return false; |
263 | |
264 | return gt->ggtt->num_fences; |
265 | } |
266 | |
267 | static bool needs_mi_store_dword(struct context *ctx) |
268 | { |
269 | if (intel_gt_is_wedged(gt: ctx->engine->gt)) |
270 | return false; |
271 | |
272 | return intel_engine_can_store_dword(engine: ctx->engine); |
273 | } |
274 | |
275 | static const struct igt_coherency_mode { |
276 | const char *name; |
277 | int (*set)(struct context *ctx, unsigned long offset, u32 v); |
278 | int (*get)(struct context *ctx, unsigned long offset, u32 *v); |
279 | bool (*valid)(struct context *ctx); |
280 | } igt_coherency_mode[] = { |
281 | { "cpu" , cpu_set, cpu_get, always_valid }, |
282 | { "gtt" , gtt_set, gtt_get, needs_fence_registers }, |
283 | { "wc" , wc_set, wc_get, always_valid }, |
284 | { "gpu" , gpu_set, NULL, needs_mi_store_dword }, |
285 | { }, |
286 | }; |
287 | |
288 | static struct intel_engine_cs * |
289 | random_engine(struct drm_i915_private *i915, struct rnd_state *prng) |
290 | { |
291 | struct intel_engine_cs *engine; |
292 | unsigned int count; |
293 | |
294 | count = 0; |
295 | for_each_uabi_engine(engine, i915) |
296 | count++; |
297 | |
298 | count = i915_prandom_u32_max_state(ep_ro: count, state: prng); |
299 | for_each_uabi_engine(engine, i915) |
300 | if (count-- == 0) |
301 | return engine; |
302 | |
303 | return NULL; |
304 | } |
305 | |
306 | static int igt_gem_coherency(void *arg) |
307 | { |
308 | const unsigned int ncachelines = PAGE_SIZE/64; |
309 | struct drm_i915_private *i915 = arg; |
310 | const struct igt_coherency_mode *read, *write, *over; |
311 | unsigned long count, n; |
312 | u32 *offsets, *values; |
313 | I915_RND_STATE(prng); |
314 | struct context ctx; |
315 | int err = 0; |
316 | |
317 | /* |
318 | * We repeatedly write, overwrite and read from a sequence of |
319 | * cachelines in order to try and detect incoherency (unflushed writes |
320 | * from either the CPU or GPU). Each setter/getter uses our cache |
321 | * domain API which should prevent incoherency. |
322 | */ |
323 | |
324 | offsets = kmalloc_array(n: ncachelines, size: 2*sizeof(u32), GFP_KERNEL); |
325 | if (!offsets) |
326 | return -ENOMEM; |
327 | for (count = 0; count < ncachelines; count++) |
328 | offsets[count] = count * 64 + 4 * (count % 16); |
329 | |
330 | values = offsets + ncachelines; |
331 | |
332 | ctx.engine = random_engine(i915, prng: &prng); |
333 | if (!ctx.engine) { |
334 | err = -ENODEV; |
335 | goto out_free; |
336 | } |
337 | pr_info("%s: using %s\n" , __func__, ctx.engine->name); |
338 | intel_engine_pm_get(engine: ctx.engine); |
339 | |
340 | for (over = igt_coherency_mode; over->name; over++) { |
341 | if (!over->set) |
342 | continue; |
343 | |
344 | if (!over->valid(&ctx)) |
345 | continue; |
346 | |
347 | for (write = igt_coherency_mode; write->name; write++) { |
348 | if (!write->set) |
349 | continue; |
350 | |
351 | if (!write->valid(&ctx)) |
352 | continue; |
353 | |
354 | for (read = igt_coherency_mode; read->name; read++) { |
355 | if (!read->get) |
356 | continue; |
357 | |
358 | if (!read->valid(&ctx)) |
359 | continue; |
360 | |
361 | for_each_prime_number_from(count, 1, ncachelines) { |
362 | ctx.obj = i915_gem_object_create_internal(i915, PAGE_SIZE); |
363 | if (IS_ERR(ptr: ctx.obj)) { |
364 | err = PTR_ERR(ptr: ctx.obj); |
365 | goto out_pm; |
366 | } |
367 | |
368 | i915_random_reorder(order: offsets, count: ncachelines, state: &prng); |
369 | for (n = 0; n < count; n++) |
370 | values[n] = prandom_u32_state(state: &prng); |
371 | |
372 | for (n = 0; n < count; n++) { |
373 | err = over->set(&ctx, offsets[n], ~values[n]); |
374 | if (err) { |
375 | pr_err("Failed to set stale value[%ld/%ld] in object using %s, err=%d\n" , |
376 | n, count, over->name, err); |
377 | goto put_object; |
378 | } |
379 | } |
380 | |
381 | for (n = 0; n < count; n++) { |
382 | err = write->set(&ctx, offsets[n], values[n]); |
383 | if (err) { |
384 | pr_err("Failed to set value[%ld/%ld] in object using %s, err=%d\n" , |
385 | n, count, write->name, err); |
386 | goto put_object; |
387 | } |
388 | } |
389 | |
390 | for (n = 0; n < count; n++) { |
391 | u32 found; |
392 | |
393 | err = read->get(&ctx, offsets[n], &found); |
394 | if (err) { |
395 | pr_err("Failed to get value[%ld/%ld] in object using %s, err=%d\n" , |
396 | n, count, read->name, err); |
397 | goto put_object; |
398 | } |
399 | |
400 | if (found != values[n]) { |
401 | pr_err("Value[%ld/%ld] mismatch, (overwrite with %s) wrote [%s] %x read [%s] %x (inverse %x), at offset %x\n" , |
402 | n, count, over->name, |
403 | write->name, values[n], |
404 | read->name, found, |
405 | ~values[n], offsets[n]); |
406 | err = -EINVAL; |
407 | goto put_object; |
408 | } |
409 | } |
410 | |
411 | i915_gem_object_put(obj: ctx.obj); |
412 | } |
413 | } |
414 | } |
415 | } |
416 | out_pm: |
417 | intel_engine_pm_put(engine: ctx.engine); |
418 | out_free: |
419 | kfree(objp: offsets); |
420 | return err; |
421 | |
422 | put_object: |
423 | i915_gem_object_put(obj: ctx.obj); |
424 | goto out_pm; |
425 | } |
426 | |
427 | int i915_gem_coherency_live_selftests(struct drm_i915_private *i915) |
428 | { |
429 | static const struct i915_subtest tests[] = { |
430 | SUBTEST(igt_gem_coherency), |
431 | }; |
432 | |
433 | return i915_live_subtests(tests, i915); |
434 | } |
435 | |