1// SPDX-License-Identifier: MIT
2/*
3 * Copyright © 2022 Intel Corporation
4 */
5
6#include "i915_selftest.h"
7
8#include "gem/i915_gem_internal.h"
9#include "gem/i915_gem_lmem.h"
10#include "gem/i915_gem_region.h"
11
12#include "gen8_engine_cs.h"
13#include "i915_gem_ww.h"
14#include "intel_engine_regs.h"
15#include "intel_gpu_commands.h"
16#include "intel_context.h"
17#include "intel_gt.h"
18#include "intel_ring.h"
19
20#include "selftests/igt_flush_test.h"
21#include "selftests/i915_random.h"
22
23static void vma_set_qw(struct i915_vma *vma, u64 addr, u64 val)
24{
25 GEM_BUG_ON(addr < i915_vma_offset(vma));
26 GEM_BUG_ON(addr >= i915_vma_offset(vma) + i915_vma_size(vma) + sizeof(val));
27 memset64(page_mask_bits(vma->obj->mm.mapping) +
28 (addr - i915_vma_offset(vma)), v: val, n: 1);
29}
30
31static int
32pte_tlbinv(struct intel_context *ce,
33 struct i915_vma *va,
34 struct i915_vma *vb,
35 u64 align,
36 void (*tlbinv)(struct i915_address_space *vm, u64 addr, u64 length),
37 u64 length,
38 struct rnd_state *prng)
39{
40 const unsigned int pat_index =
41 i915_gem_get_pat_index(i915: ce->vm->i915, level: I915_CACHE_NONE);
42 struct drm_i915_gem_object *batch;
43 struct drm_mm_node vb_node;
44 struct i915_request *rq;
45 struct i915_vma *vma;
46 u64 addr;
47 int err;
48 u32 *cs;
49
50 batch = i915_gem_object_create_internal(i915: ce->vm->i915, size: 4096);
51 if (IS_ERR(ptr: batch))
52 return PTR_ERR(ptr: batch);
53
54 vma = i915_vma_instance(obj: batch, vm: ce->vm, NULL);
55 if (IS_ERR(ptr: vma)) {
56 err = PTR_ERR(ptr: vma);
57 goto out;
58 }
59
60 err = i915_vma_pin(vma, size: 0, alignment: 0, PIN_USER);
61 if (err)
62 goto out;
63
64 /* Pin va at random but aligned offset after vma */
65 addr = round_up(vma->node.start + vma->node.size, align);
66 /* MI_CONDITIONAL_BATCH_BUFFER_END limits address to 48b */
67 addr = igt_random_offset(state: prng, start: addr, min(ce->vm->total, BIT_ULL(48)),
68 len: va->size, align);
69 err = i915_vma_pin(vma: va, size: 0, alignment: 0, flags: addr | PIN_OFFSET_FIXED | PIN_USER);
70 if (err) {
71 pr_err("Cannot pin at %llx+%llx\n", addr, va->size);
72 goto out;
73 }
74 GEM_BUG_ON(i915_vma_offset(va) != addr);
75 if (vb != va) {
76 vb_node = vb->node;
77 vb->node = va->node; /* overwrites the _same_ PTE */
78 }
79
80 /*
81 * Now choose random dword at the 1st pinned page.
82 *
83 * SZ_64K pages on dg1 require that the whole PT be marked
84 * containing 64KiB entries. So we make sure that vma
85 * covers the whole PT, despite being randomly aligned to 64KiB
86 * and restrict our sampling to the 2MiB PT within where
87 * we know that we will be using 64KiB pages.
88 */
89 if (align == SZ_64K)
90 addr = round_up(addr, SZ_2M);
91 addr = igt_random_offset(state: prng, start: addr, end: addr + align, len: 8, align: 8);
92
93 if (va != vb)
94 pr_info("%s(%s): Sampling %llx, with alignment %llx, using PTE size %x (phys %x, sg %x), invalidate:%llx+%llx\n",
95 ce->engine->name, va->obj->mm.region->name ?: "smem",
96 addr, align, va->resource->page_sizes_gtt,
97 va->page_sizes.phys, va->page_sizes.sg,
98 addr & -length, length);
99
100 cs = i915_gem_object_pin_map_unlocked(obj: batch, type: I915_MAP_WC);
101 *cs++ = MI_NOOP; /* for later termination */
102 /*
103 * Sample the target to see if we spot the updated backing store.
104 * Gen8 VCS compares immediate value with bitwise-and of two
105 * consecutive DWORDS pointed by addr, other gen/engines compare value
106 * with DWORD pointed by addr. Moreover we want to exercise DWORD size
107 * invalidations. To fulfill all these requirements below values
108 * have been chosen.
109 */
110 *cs++ = MI_CONDITIONAL_BATCH_BUFFER_END | MI_DO_COMPARE | 2;
111 *cs++ = 0; /* break if *addr == 0 */
112 *cs++ = lower_32_bits(addr);
113 *cs++ = upper_32_bits(addr);
114 vma_set_qw(vma: va, addr, val: -1);
115 vma_set_qw(vma: vb, addr, val: 0);
116
117 /* Keep sampling until we get bored */
118 *cs++ = MI_BATCH_BUFFER_START | BIT(8) | 1;
119 *cs++ = lower_32_bits(i915_vma_offset(vma));
120 *cs++ = upper_32_bits(i915_vma_offset(vma));
121
122 i915_gem_object_flush_map(obj: batch);
123
124 rq = i915_request_create(ce);
125 if (IS_ERR(ptr: rq)) {
126 err = PTR_ERR(ptr: rq);
127 goto out_va;
128 }
129
130 err = rq->engine->emit_bb_start(rq, i915_vma_offset(vma), 0, 0);
131 if (err) {
132 i915_request_add(rq);
133 goto out_va;
134 }
135
136 i915_request_get(rq);
137 i915_request_add(rq);
138
139 /*
140 * Short sleep to sanitycheck the batch is spinning before we begin.
141 * FIXME: Why is GSC so slow?
142 */
143 if (ce->engine->class == OTHER_CLASS)
144 msleep(msecs: 200);
145 else
146 msleep(msecs: 10);
147
148 if (va == vb) {
149 if (!i915_request_completed(rq)) {
150 pr_err("%s(%s): Semaphore sanitycheck failed %llx, with alignment %llx, using PTE size %x (phys %x, sg %x)\n",
151 ce->engine->name, va->obj->mm.region->name ?: "smem",
152 addr, align, va->resource->page_sizes_gtt,
153 va->page_sizes.phys, va->page_sizes.sg);
154 err = -EIO;
155 }
156 } else if (!i915_request_completed(rq)) {
157 struct i915_vma_resource vb_res = {
158 .bi.pages = vb->obj->mm.pages,
159 .bi.page_sizes = vb->obj->mm.page_sizes,
160 .start = i915_vma_offset(vma: vb),
161 .vma_size = i915_vma_size(vma: vb)
162 };
163 unsigned int pte_flags = 0;
164
165 /* Flip the PTE between A and B */
166 if (i915_gem_object_is_lmem(obj: vb->obj))
167 pte_flags |= PTE_LM;
168 ce->vm->insert_entries(ce->vm, &vb_res, pat_index, pte_flags);
169
170 /* Flush the PTE update to concurrent HW */
171 tlbinv(ce->vm, addr & -length, length);
172
173 if (wait_for(i915_request_completed(rq), HZ / 2)) {
174 pr_err("%s: Request did not complete; the COND_BBE did not read the updated PTE\n",
175 ce->engine->name);
176 err = -EINVAL;
177 }
178 } else {
179 pr_err("Spinner ended unexpectedly\n");
180 err = -EIO;
181 }
182 i915_request_put(rq);
183
184 cs = page_mask_bits(batch->mm.mapping);
185 *cs = MI_BATCH_BUFFER_END;
186 wmb();
187
188out_va:
189 if (vb != va)
190 vb->node = vb_node;
191 i915_vma_unpin(vma: va);
192 if (i915_vma_unbind_unlocked(vma: va))
193 err = -EIO;
194out:
195 i915_gem_object_put(obj: batch);
196 return err;
197}
198
199static struct drm_i915_gem_object *create_lmem(struct intel_gt *gt)
200{
201 struct intel_memory_region *mr = gt->i915->mm.regions[INTEL_REGION_LMEM_0];
202 resource_size_t size = SZ_1G;
203
204 /*
205 * Allocation of largest possible page size allows to test all types
206 * of pages. To succeed with both allocations, especially in case of Small
207 * BAR, try to allocate no more than quarter of mappable memory.
208 */
209 if (mr && size > resource_size(res: &mr->io) / 4)
210 size = resource_size(res: &mr->io) / 4;
211
212 return i915_gem_object_create_lmem(i915: gt->i915, size, I915_BO_ALLOC_CONTIGUOUS);
213}
214
215static struct drm_i915_gem_object *create_smem(struct intel_gt *gt)
216{
217 /*
218 * SZ_64K pages require covering the whole 2M PT (gen8 to tgl/dg1).
219 * While that does not require the whole 2M block to be contiguous
220 * it is easier to make it so, since we need that for SZ_2M pagees.
221 * Since we randomly offset the start of the vma, we need a 4M object
222 * so that there is a 2M range within it is suitable for SZ_64K PTE.
223 */
224 return i915_gem_object_create_internal(i915: gt->i915, SZ_4M);
225}
226
227static int
228mem_tlbinv(struct intel_gt *gt,
229 struct drm_i915_gem_object *(*create_fn)(struct intel_gt *),
230 void (*tlbinv)(struct i915_address_space *vm, u64 addr, u64 length))
231{
232 unsigned int ppgtt_size = RUNTIME_INFO(gt->i915)->ppgtt_size;
233 struct intel_engine_cs *engine;
234 struct drm_i915_gem_object *A, *B;
235 struct i915_ppgtt *ppgtt;
236 struct i915_vma *va, *vb;
237 enum intel_engine_id id;
238 I915_RND_STATE(prng);
239 void *vaddr;
240 int err;
241
242 /*
243 * Check that the TLB invalidate is able to revoke an active
244 * page. We load a page into a spinning COND_BBE loop and then
245 * remap that page to a new physical address. The old address, and
246 * so the loop keeps spinning, is retained in the TLB cache until
247 * we issue an invalidate.
248 */
249
250 A = create_fn(gt);
251 if (IS_ERR(ptr: A))
252 return PTR_ERR(ptr: A);
253
254 vaddr = i915_gem_object_pin_map_unlocked(obj: A, type: I915_MAP_WC);
255 if (IS_ERR(ptr: vaddr)) {
256 err = PTR_ERR(ptr: vaddr);
257 goto out_a;
258 }
259
260 B = create_fn(gt);
261 if (IS_ERR(ptr: B)) {
262 err = PTR_ERR(ptr: B);
263 goto out_a;
264 }
265
266 vaddr = i915_gem_object_pin_map_unlocked(obj: B, type: I915_MAP_WC);
267 if (IS_ERR(ptr: vaddr)) {
268 err = PTR_ERR(ptr: vaddr);
269 goto out_b;
270 }
271
272 GEM_BUG_ON(A->base.size != B->base.size);
273 if ((A->mm.page_sizes.phys | B->mm.page_sizes.phys) & (A->base.size - 1))
274 pr_warn("Failed to allocate contiguous pages for size %zx\n",
275 A->base.size);
276
277 ppgtt = i915_ppgtt_create(gt, lmem_pt_obj_flags: 0);
278 if (IS_ERR(ptr: ppgtt)) {
279 err = PTR_ERR(ptr: ppgtt);
280 goto out_b;
281 }
282
283 va = i915_vma_instance(obj: A, vm: &ppgtt->vm, NULL);
284 if (IS_ERR(ptr: va)) {
285 err = PTR_ERR(ptr: va);
286 goto out_vm;
287 }
288
289 vb = i915_vma_instance(obj: B, vm: &ppgtt->vm, NULL);
290 if (IS_ERR(ptr: vb)) {
291 err = PTR_ERR(ptr: vb);
292 goto out_vm;
293 }
294
295 err = 0;
296 for_each_engine(engine, gt, id) {
297 struct i915_gem_ww_ctx ww;
298 struct intel_context *ce;
299 int bit;
300
301 ce = intel_context_create(engine);
302 if (IS_ERR(ptr: ce)) {
303 err = PTR_ERR(ptr: ce);
304 break;
305 }
306
307 i915_vm_put(vm: ce->vm);
308 ce->vm = i915_vm_get(vm: &ppgtt->vm);
309
310 for_i915_gem_ww(&ww, err, true)
311 err = intel_context_pin_ww(ce, ww: &ww);
312 if (err)
313 goto err_put;
314
315 for_each_set_bit(bit,
316 (unsigned long *)&RUNTIME_INFO(gt->i915)->page_sizes,
317 BITS_PER_TYPE(RUNTIME_INFO(gt->i915)->page_sizes)) {
318 unsigned int len;
319
320 if (BIT_ULL(bit) < i915_vm_obj_min_alignment(vm: va->vm, obj: va->obj))
321 continue;
322
323 /* sanitycheck the semaphore wake up */
324 err = pte_tlbinv(ce, va, vb: va,
325 BIT_ULL(bit),
326 NULL, SZ_4K,
327 prng: &prng);
328 if (err)
329 goto err_unpin;
330
331 for (len = 2; len <= ppgtt_size; len = min(2 * len, ppgtt_size)) {
332 err = pte_tlbinv(ce, va, vb,
333 BIT_ULL(bit),
334 tlbinv,
335 BIT_ULL(len),
336 prng: &prng);
337 if (err)
338 goto err_unpin;
339 if (len == ppgtt_size)
340 break;
341 }
342 }
343err_unpin:
344 intel_context_unpin(ce);
345err_put:
346 intel_context_put(ce);
347 if (err)
348 break;
349 }
350
351 if (igt_flush_test(i915: gt->i915))
352 err = -EIO;
353
354out_vm:
355 i915_vm_put(vm: &ppgtt->vm);
356out_b:
357 i915_gem_object_put(obj: B);
358out_a:
359 i915_gem_object_put(obj: A);
360 return err;
361}
362
363static void tlbinv_full(struct i915_address_space *vm, u64 addr, u64 length)
364{
365 intel_gt_invalidate_tlb_full(gt: vm->gt, seqno: intel_gt_tlb_seqno(gt: vm->gt) | 1);
366}
367
368static int invalidate_full(void *arg)
369{
370 struct intel_gt *gt = arg;
371 int err;
372
373 if (GRAPHICS_VER(gt->i915) < 8)
374 return 0; /* TLB invalidate not implemented */
375
376 err = mem_tlbinv(gt, create_fn: create_smem, tlbinv: tlbinv_full);
377 if (err == 0)
378 err = mem_tlbinv(gt, create_fn: create_lmem, tlbinv: tlbinv_full);
379 if (err == -ENODEV || err == -ENXIO)
380 err = 0;
381
382 return err;
383}
384
385int intel_tlb_live_selftests(struct drm_i915_private *i915)
386{
387 static const struct i915_subtest tests[] = {
388 SUBTEST(invalidate_full),
389 };
390 struct intel_gt *gt;
391 unsigned int i;
392
393 for_each_gt(gt, i915, i) {
394 int err;
395
396 if (intel_gt_is_wedged(gt))
397 continue;
398
399 err = intel_gt_live_subtests(tests, gt);
400 if (err)
401 return err;
402 }
403
404 return 0;
405}
406

source code of linux/drivers/gpu/drm/i915/gt/selftest_tlb.c