1 | // SPDX-License-Identifier: MIT |
2 | /* |
3 | * Copyright © 2022 Intel Corporation |
4 | */ |
5 | |
6 | #include "i915_selftest.h" |
7 | |
8 | #include "gem/i915_gem_internal.h" |
9 | #include "gem/i915_gem_lmem.h" |
10 | #include "gem/i915_gem_region.h" |
11 | |
12 | #include "gen8_engine_cs.h" |
13 | #include "i915_gem_ww.h" |
14 | #include "intel_engine_regs.h" |
15 | #include "intel_gpu_commands.h" |
16 | #include "intel_context.h" |
17 | #include "intel_gt.h" |
18 | #include "intel_ring.h" |
19 | |
20 | #include "selftests/igt_flush_test.h" |
21 | #include "selftests/i915_random.h" |
22 | |
23 | static void vma_set_qw(struct i915_vma *vma, u64 addr, u64 val) |
24 | { |
25 | GEM_BUG_ON(addr < i915_vma_offset(vma)); |
26 | GEM_BUG_ON(addr >= i915_vma_offset(vma) + i915_vma_size(vma) + sizeof(val)); |
27 | memset64(page_mask_bits(vma->obj->mm.mapping) + |
28 | (addr - i915_vma_offset(vma)), v: val, n: 1); |
29 | } |
30 | |
31 | static int |
32 | pte_tlbinv(struct intel_context *ce, |
33 | struct i915_vma *va, |
34 | struct i915_vma *vb, |
35 | u64 align, |
36 | void (*tlbinv)(struct i915_address_space *vm, u64 addr, u64 length), |
37 | u64 length, |
38 | struct rnd_state *prng) |
39 | { |
40 | const unsigned int pat_index = |
41 | i915_gem_get_pat_index(i915: ce->vm->i915, level: I915_CACHE_NONE); |
42 | struct drm_i915_gem_object *batch; |
43 | struct drm_mm_node vb_node; |
44 | struct i915_request *rq; |
45 | struct i915_vma *vma; |
46 | u64 addr; |
47 | int err; |
48 | u32 *cs; |
49 | |
50 | batch = i915_gem_object_create_internal(i915: ce->vm->i915, size: 4096); |
51 | if (IS_ERR(ptr: batch)) |
52 | return PTR_ERR(ptr: batch); |
53 | |
54 | vma = i915_vma_instance(obj: batch, vm: ce->vm, NULL); |
55 | if (IS_ERR(ptr: vma)) { |
56 | err = PTR_ERR(ptr: vma); |
57 | goto out; |
58 | } |
59 | |
60 | err = i915_vma_pin(vma, size: 0, alignment: 0, PIN_USER); |
61 | if (err) |
62 | goto out; |
63 | |
64 | /* Pin va at random but aligned offset after vma */ |
65 | addr = round_up(vma->node.start + vma->node.size, align); |
66 | /* MI_CONDITIONAL_BATCH_BUFFER_END limits address to 48b */ |
67 | addr = igt_random_offset(state: prng, start: addr, min(ce->vm->total, BIT_ULL(48)), |
68 | len: va->size, align); |
69 | err = i915_vma_pin(vma: va, size: 0, alignment: 0, flags: addr | PIN_OFFSET_FIXED | PIN_USER); |
70 | if (err) { |
71 | pr_err("Cannot pin at %llx+%llx\n" , addr, va->size); |
72 | goto out; |
73 | } |
74 | GEM_BUG_ON(i915_vma_offset(va) != addr); |
75 | if (vb != va) { |
76 | vb_node = vb->node; |
77 | vb->node = va->node; /* overwrites the _same_ PTE */ |
78 | } |
79 | |
80 | /* |
81 | * Now choose random dword at the 1st pinned page. |
82 | * |
83 | * SZ_64K pages on dg1 require that the whole PT be marked |
84 | * containing 64KiB entries. So we make sure that vma |
85 | * covers the whole PT, despite being randomly aligned to 64KiB |
86 | * and restrict our sampling to the 2MiB PT within where |
87 | * we know that we will be using 64KiB pages. |
88 | */ |
89 | if (align == SZ_64K) |
90 | addr = round_up(addr, SZ_2M); |
91 | addr = igt_random_offset(state: prng, start: addr, end: addr + align, len: 8, align: 8); |
92 | |
93 | if (va != vb) |
94 | pr_info("%s(%s): Sampling %llx, with alignment %llx, using PTE size %x (phys %x, sg %x), invalidate:%llx+%llx\n" , |
95 | ce->engine->name, va->obj->mm.region->name ?: "smem" , |
96 | addr, align, va->resource->page_sizes_gtt, |
97 | va->page_sizes.phys, va->page_sizes.sg, |
98 | addr & -length, length); |
99 | |
100 | cs = i915_gem_object_pin_map_unlocked(obj: batch, type: I915_MAP_WC); |
101 | *cs++ = MI_NOOP; /* for later termination */ |
102 | /* |
103 | * Sample the target to see if we spot the updated backing store. |
104 | * Gen8 VCS compares immediate value with bitwise-and of two |
105 | * consecutive DWORDS pointed by addr, other gen/engines compare value |
106 | * with DWORD pointed by addr. Moreover we want to exercise DWORD size |
107 | * invalidations. To fulfill all these requirements below values |
108 | * have been chosen. |
109 | */ |
110 | *cs++ = MI_CONDITIONAL_BATCH_BUFFER_END | MI_DO_COMPARE | 2; |
111 | *cs++ = 0; /* break if *addr == 0 */ |
112 | *cs++ = lower_32_bits(addr); |
113 | *cs++ = upper_32_bits(addr); |
114 | vma_set_qw(vma: va, addr, val: -1); |
115 | vma_set_qw(vma: vb, addr, val: 0); |
116 | |
117 | /* Keep sampling until we get bored */ |
118 | *cs++ = MI_BATCH_BUFFER_START | BIT(8) | 1; |
119 | *cs++ = lower_32_bits(i915_vma_offset(vma)); |
120 | *cs++ = upper_32_bits(i915_vma_offset(vma)); |
121 | |
122 | i915_gem_object_flush_map(obj: batch); |
123 | |
124 | rq = i915_request_create(ce); |
125 | if (IS_ERR(ptr: rq)) { |
126 | err = PTR_ERR(ptr: rq); |
127 | goto out_va; |
128 | } |
129 | |
130 | err = rq->engine->emit_bb_start(rq, i915_vma_offset(vma), 0, 0); |
131 | if (err) { |
132 | i915_request_add(rq); |
133 | goto out_va; |
134 | } |
135 | |
136 | i915_request_get(rq); |
137 | i915_request_add(rq); |
138 | |
139 | /* |
140 | * Short sleep to sanitycheck the batch is spinning before we begin. |
141 | * FIXME: Why is GSC so slow? |
142 | */ |
143 | if (ce->engine->class == OTHER_CLASS) |
144 | msleep(msecs: 200); |
145 | else |
146 | msleep(msecs: 10); |
147 | |
148 | if (va == vb) { |
149 | if (!i915_request_completed(rq)) { |
150 | pr_err("%s(%s): Semaphore sanitycheck failed %llx, with alignment %llx, using PTE size %x (phys %x, sg %x)\n" , |
151 | ce->engine->name, va->obj->mm.region->name ?: "smem" , |
152 | addr, align, va->resource->page_sizes_gtt, |
153 | va->page_sizes.phys, va->page_sizes.sg); |
154 | err = -EIO; |
155 | } |
156 | } else if (!i915_request_completed(rq)) { |
157 | struct i915_vma_resource vb_res = { |
158 | .bi.pages = vb->obj->mm.pages, |
159 | .bi.page_sizes = vb->obj->mm.page_sizes, |
160 | .start = i915_vma_offset(vma: vb), |
161 | .vma_size = i915_vma_size(vma: vb) |
162 | }; |
163 | unsigned int pte_flags = 0; |
164 | |
165 | /* Flip the PTE between A and B */ |
166 | if (i915_gem_object_is_lmem(obj: vb->obj)) |
167 | pte_flags |= PTE_LM; |
168 | ce->vm->insert_entries(ce->vm, &vb_res, pat_index, pte_flags); |
169 | |
170 | /* Flush the PTE update to concurrent HW */ |
171 | tlbinv(ce->vm, addr & -length, length); |
172 | |
173 | if (wait_for(i915_request_completed(rq), HZ / 2)) { |
174 | pr_err("%s: Request did not complete; the COND_BBE did not read the updated PTE\n" , |
175 | ce->engine->name); |
176 | err = -EINVAL; |
177 | } |
178 | } else { |
179 | pr_err("Spinner ended unexpectedly\n" ); |
180 | err = -EIO; |
181 | } |
182 | i915_request_put(rq); |
183 | |
184 | cs = page_mask_bits(batch->mm.mapping); |
185 | *cs = MI_BATCH_BUFFER_END; |
186 | wmb(); |
187 | |
188 | out_va: |
189 | if (vb != va) |
190 | vb->node = vb_node; |
191 | i915_vma_unpin(vma: va); |
192 | if (i915_vma_unbind_unlocked(vma: va)) |
193 | err = -EIO; |
194 | out: |
195 | i915_gem_object_put(obj: batch); |
196 | return err; |
197 | } |
198 | |
199 | static struct drm_i915_gem_object *create_lmem(struct intel_gt *gt) |
200 | { |
201 | struct intel_memory_region *mr = gt->i915->mm.regions[INTEL_REGION_LMEM_0]; |
202 | resource_size_t size = SZ_1G; |
203 | |
204 | /* |
205 | * Allocation of largest possible page size allows to test all types |
206 | * of pages. To succeed with both allocations, especially in case of Small |
207 | * BAR, try to allocate no more than quarter of mappable memory. |
208 | */ |
209 | if (mr && size > resource_size(res: &mr->io) / 4) |
210 | size = resource_size(res: &mr->io) / 4; |
211 | |
212 | return i915_gem_object_create_lmem(i915: gt->i915, size, I915_BO_ALLOC_CONTIGUOUS); |
213 | } |
214 | |
215 | static struct drm_i915_gem_object *create_smem(struct intel_gt *gt) |
216 | { |
217 | /* |
218 | * SZ_64K pages require covering the whole 2M PT (gen8 to tgl/dg1). |
219 | * While that does not require the whole 2M block to be contiguous |
220 | * it is easier to make it so, since we need that for SZ_2M pagees. |
221 | * Since we randomly offset the start of the vma, we need a 4M object |
222 | * so that there is a 2M range within it is suitable for SZ_64K PTE. |
223 | */ |
224 | return i915_gem_object_create_internal(i915: gt->i915, SZ_4M); |
225 | } |
226 | |
227 | static int |
228 | mem_tlbinv(struct intel_gt *gt, |
229 | struct drm_i915_gem_object *(*create_fn)(struct intel_gt *), |
230 | void (*tlbinv)(struct i915_address_space *vm, u64 addr, u64 length)) |
231 | { |
232 | unsigned int ppgtt_size = RUNTIME_INFO(gt->i915)->ppgtt_size; |
233 | struct intel_engine_cs *engine; |
234 | struct drm_i915_gem_object *A, *B; |
235 | struct i915_ppgtt *ppgtt; |
236 | struct i915_vma *va, *vb; |
237 | enum intel_engine_id id; |
238 | I915_RND_STATE(prng); |
239 | void *vaddr; |
240 | int err; |
241 | |
242 | /* |
243 | * Check that the TLB invalidate is able to revoke an active |
244 | * page. We load a page into a spinning COND_BBE loop and then |
245 | * remap that page to a new physical address. The old address, and |
246 | * so the loop keeps spinning, is retained in the TLB cache until |
247 | * we issue an invalidate. |
248 | */ |
249 | |
250 | A = create_fn(gt); |
251 | if (IS_ERR(ptr: A)) |
252 | return PTR_ERR(ptr: A); |
253 | |
254 | vaddr = i915_gem_object_pin_map_unlocked(obj: A, type: I915_MAP_WC); |
255 | if (IS_ERR(ptr: vaddr)) { |
256 | err = PTR_ERR(ptr: vaddr); |
257 | goto out_a; |
258 | } |
259 | |
260 | B = create_fn(gt); |
261 | if (IS_ERR(ptr: B)) { |
262 | err = PTR_ERR(ptr: B); |
263 | goto out_a; |
264 | } |
265 | |
266 | vaddr = i915_gem_object_pin_map_unlocked(obj: B, type: I915_MAP_WC); |
267 | if (IS_ERR(ptr: vaddr)) { |
268 | err = PTR_ERR(ptr: vaddr); |
269 | goto out_b; |
270 | } |
271 | |
272 | GEM_BUG_ON(A->base.size != B->base.size); |
273 | if ((A->mm.page_sizes.phys | B->mm.page_sizes.phys) & (A->base.size - 1)) |
274 | pr_warn("Failed to allocate contiguous pages for size %zx\n" , |
275 | A->base.size); |
276 | |
277 | ppgtt = i915_ppgtt_create(gt, lmem_pt_obj_flags: 0); |
278 | if (IS_ERR(ptr: ppgtt)) { |
279 | err = PTR_ERR(ptr: ppgtt); |
280 | goto out_b; |
281 | } |
282 | |
283 | va = i915_vma_instance(obj: A, vm: &ppgtt->vm, NULL); |
284 | if (IS_ERR(ptr: va)) { |
285 | err = PTR_ERR(ptr: va); |
286 | goto out_vm; |
287 | } |
288 | |
289 | vb = i915_vma_instance(obj: B, vm: &ppgtt->vm, NULL); |
290 | if (IS_ERR(ptr: vb)) { |
291 | err = PTR_ERR(ptr: vb); |
292 | goto out_vm; |
293 | } |
294 | |
295 | err = 0; |
296 | for_each_engine(engine, gt, id) { |
297 | struct i915_gem_ww_ctx ww; |
298 | struct intel_context *ce; |
299 | int bit; |
300 | |
301 | ce = intel_context_create(engine); |
302 | if (IS_ERR(ptr: ce)) { |
303 | err = PTR_ERR(ptr: ce); |
304 | break; |
305 | } |
306 | |
307 | i915_vm_put(vm: ce->vm); |
308 | ce->vm = i915_vm_get(vm: &ppgtt->vm); |
309 | |
310 | for_i915_gem_ww(&ww, err, true) |
311 | err = intel_context_pin_ww(ce, ww: &ww); |
312 | if (err) |
313 | goto err_put; |
314 | |
315 | for_each_set_bit(bit, |
316 | (unsigned long *)&RUNTIME_INFO(gt->i915)->page_sizes, |
317 | BITS_PER_TYPE(RUNTIME_INFO(gt->i915)->page_sizes)) { |
318 | unsigned int len; |
319 | |
320 | if (BIT_ULL(bit) < i915_vm_obj_min_alignment(vm: va->vm, obj: va->obj)) |
321 | continue; |
322 | |
323 | /* sanitycheck the semaphore wake up */ |
324 | err = pte_tlbinv(ce, va, vb: va, |
325 | BIT_ULL(bit), |
326 | NULL, SZ_4K, |
327 | prng: &prng); |
328 | if (err) |
329 | goto err_unpin; |
330 | |
331 | for (len = 2; len <= ppgtt_size; len = min(2 * len, ppgtt_size)) { |
332 | err = pte_tlbinv(ce, va, vb, |
333 | BIT_ULL(bit), |
334 | tlbinv, |
335 | BIT_ULL(len), |
336 | prng: &prng); |
337 | if (err) |
338 | goto err_unpin; |
339 | if (len == ppgtt_size) |
340 | break; |
341 | } |
342 | } |
343 | err_unpin: |
344 | intel_context_unpin(ce); |
345 | err_put: |
346 | intel_context_put(ce); |
347 | if (err) |
348 | break; |
349 | } |
350 | |
351 | if (igt_flush_test(i915: gt->i915)) |
352 | err = -EIO; |
353 | |
354 | out_vm: |
355 | i915_vm_put(vm: &ppgtt->vm); |
356 | out_b: |
357 | i915_gem_object_put(obj: B); |
358 | out_a: |
359 | i915_gem_object_put(obj: A); |
360 | return err; |
361 | } |
362 | |
363 | static void tlbinv_full(struct i915_address_space *vm, u64 addr, u64 length) |
364 | { |
365 | intel_gt_invalidate_tlb_full(gt: vm->gt, seqno: intel_gt_tlb_seqno(gt: vm->gt) | 1); |
366 | } |
367 | |
368 | static int invalidate_full(void *arg) |
369 | { |
370 | struct intel_gt *gt = arg; |
371 | int err; |
372 | |
373 | if (GRAPHICS_VER(gt->i915) < 8) |
374 | return 0; /* TLB invalidate not implemented */ |
375 | |
376 | err = mem_tlbinv(gt, create_fn: create_smem, tlbinv: tlbinv_full); |
377 | if (err == 0) |
378 | err = mem_tlbinv(gt, create_fn: create_lmem, tlbinv: tlbinv_full); |
379 | if (err == -ENODEV || err == -ENXIO) |
380 | err = 0; |
381 | |
382 | return err; |
383 | } |
384 | |
385 | int intel_tlb_live_selftests(struct drm_i915_private *i915) |
386 | { |
387 | static const struct i915_subtest tests[] = { |
388 | SUBTEST(invalidate_full), |
389 | }; |
390 | struct intel_gt *gt; |
391 | unsigned int i; |
392 | |
393 | for_each_gt(gt, i915, i) { |
394 | int err; |
395 | |
396 | if (intel_gt_is_wedged(gt)) |
397 | continue; |
398 | |
399 | err = intel_gt_live_subtests(tests, gt); |
400 | if (err) |
401 | return err; |
402 | } |
403 | |
404 | return 0; |
405 | } |
406 | |