selftest_tlb.c source code [linux/drivers/gpu/drm/i915/gt/selftest_tlb.c]

1	// SPDX-License-Identifier: MIT
2	/*
3	* Copyright © 2022 Intel Corporation
4	*/
5
6	#include "i915_selftest.h"
7
8	#include "gem/i915_gem_internal.h"
9	#include "gem/i915_gem_lmem.h"
10	#include "gem/i915_gem_region.h"
11
12	#include "gen8_engine_cs.h"
13	#include "i915_gem_ww.h"
14	#include "intel_engine_regs.h"
15	#include "intel_gpu_commands.h"
16	#include "intel_context.h"
17	#include "intel_gt.h"
18	#include "intel_ring.h"
19
20	#include "selftests/igt_flush_test.h"
21	#include "selftests/i915_random.h"
22
23	static void vma_set_qw(struct i915_vma *vma, u64 addr, u64 val)
24	{
25	GEM_BUG_ON(addr < i915_vma_offset(vma));
26	GEM_BUG_ON(addr >= i915_vma_offset(vma) + i915_vma_size(vma) + sizeof(val));
27	memset64(page_mask_bits(vma->obj->mm.mapping) +
28	(addr - i915_vma_offset(vma)), v: val, n: `1`);
29	}
30
31	static int
32	pte_tlbinv(struct intel_context *ce,
33	struct i915_vma *va,
34	struct i915_vma *vb,
35	u64 align,
36	void (tlbinv)(struct* i915_address_space *vm, u64 addr, u64 length),
37	u64 length,
38	struct rnd_state *prng)
39	{
40	const unsigned int pat_index =
41	i915_gem_get_pat_index(i915: ce->vm->i915, level: I915_CACHE_NONE);
42	struct drm_i915_gem_object *batch;
43	struct drm_mm_node vb_node;
44	struct i915_request *rq;
45	struct i915_vma *vma;
46	u64 addr;
47	int err;
48	u32 *cs;
49
50	batch = i915_gem_object_create_internal(i915: ce->vm->i915, size: `4096`);
51	if (IS_ERR(ptr: batch))
52	return PTR_ERR(ptr: batch);
53
54	vma = i915_vma_instance(obj: batch, vm: ce->vm, NULL);
55	if (IS_ERR(ptr: vma)) {
56	err = PTR_ERR(ptr: vma);
57	goto out;
58	}
59
60	err = i915_vma_pin(vma, size: `0`, alignment: `0`, PIN_USER);
61	if (err)
62	goto out;
63
64	/ Pin va at random but aligned offset after vma /
65	addr = round_up(vma->node.start + vma->node.size, align);
66	/ MI_CONDITIONAL_BATCH_BUFFER_END limits address to 48b /
67	addr = igt_random_offset(state: prng, start: addr, min(ce->vm->total, BIT_ULL(`48`)),
68	len: va->size, align);
69	err = i915_vma_pin(vma: va, size: `0`, alignment: `0`, flags: addr \| PIN_OFFSET_FIXED \| PIN_USER);
70	if (err) {
71	pr_err("Cannot pin at %llx+%llx\n", addr, va->size);
72	goto out;
73	}
74	GEM_BUG_ON(i915_vma_offset(va) != addr);
75	if (vb != va) {
76	vb_node = vb->node;
77	vb->node = va->node; / overwrites the _same_ PTE /
78	}
79
80	/*
81	* Now choose random dword at the 1st pinned page.
82	*
83	* SZ_64K pages on dg1 require that the whole PT be marked
84	* containing 64KiB entries. So we make sure that vma
85	* covers the whole PT, despite being randomly aligned to 64KiB
86	* and restrict our sampling to the 2MiB PT within where
87	* we know that we will be using 64KiB pages.
88	*/
89	if (align == SZ_64K)
90	addr = round_up(addr, SZ_2M);
91	addr = igt_random_offset(state: prng, start: addr, end: addr + align, len: `8`, align: `8`);
92
93	if (va != vb)
94	pr_info("%s(%s): Sampling %llx, with alignment %llx, using PTE size %x (phys %x, sg %x), invalidate:%llx+%llx\n",
95	ce->engine->name, va->obj->mm.region->name ?: "smem",
96	addr, align, va->resource->page_sizes_gtt,
97	va->page_sizes.phys, va->page_sizes.sg,
98	addr & -length, length);
99
100	cs = i915_gem_object_pin_map_unlocked(obj: batch, type: I915_MAP_WC);
101	cs++ = MI_NOOP; /* for later termination /
102	/*
103	* Sample the target to see if we spot the updated backing store.
104	* Gen8 VCS compares immediate value with bitwise-and of two
105	* consecutive DWORDS pointed by addr, other gen/engines compare value
106	* with DWORD pointed by addr. Moreover we want to exercise DWORD size
107	* invalidations. To fulfill all these requirements below values
108	* have been chosen.
109	*/
110	*cs++ = MI_CONDITIONAL_BATCH_BUFFER_END \| MI_DO_COMPARE \| `2`;
111	cs++ = `0`; /* break if addr == 0 /*
112	*cs++ = lower_32_bits(addr);
113	*cs++ = upper_32_bits(addr);
114	vma_set_qw(vma: va, addr, val: -`1`);
115	vma_set_qw(vma: vb, addr, val: `0`);
116
117	/ Keep sampling until we get bored /
118	*cs++ = MI_BATCH_BUFFER_START \| BIT(`8`) \| `1`;
119	*cs++ = lower_32_bits(i915_vma_offset(vma));
120	*cs++ = upper_32_bits(i915_vma_offset(vma));
121
122	i915_gem_object_flush_map(obj: batch);
123
124	rq = i915_request_create(ce);
125	if (IS_ERR(ptr: rq)) {
126	err = PTR_ERR(ptr: rq);
127	goto out_va;
128	}
129
130	err = rq->engine->emit_bb_start(rq, i915_vma_offset(vma), `0`, `0`);
131	if (err) {
132	i915_request_add(rq);
133	goto out_va;
134	}
135
136	i915_request_get(rq);
137	i915_request_add(rq);
138
139	/*
140	* Short sleep to sanitycheck the batch is spinning before we begin.
141	* FIXME: Why is GSC so slow?
142	*/
143	if (ce->engine->class == OTHER_CLASS)
144	msleep(msecs: `200`);
145	else
146	msleep(msecs: `10`);
147
148	if (va == vb) {
149	if (!i915_request_completed(rq)) {
150	pr_err("%s(%s): Semaphore sanitycheck failed %llx, with alignment %llx, using PTE size %x (phys %x, sg %x)\n",
151	ce->engine->name, va->obj->mm.region->name ?: "smem",
152	addr, align, va->resource->page_sizes_gtt,
153	va->page_sizes.phys, va->page_sizes.sg);
154	err = -EIO;
155	}
156	} else if (!i915_request_completed(rq)) {
157	struct i915_vma_resource vb_res = {
158	.bi.pages = vb->obj->mm.pages,
159	.bi.page_sizes = vb->obj->mm.page_sizes,
160	.start = i915_vma_offset(vma: vb),
161	.vma_size = i915_vma_size(vma: vb)
162	};
163	unsigned int pte_flags = `0`;
164
165	/ Flip the PTE between A and B /
166	if (i915_gem_object_is_lmem(obj: vb->obj))
167	pte_flags \|= PTE_LM;
168	ce->vm->insert_entries(ce->vm, &vb_res, pat_index, pte_flags);
169
170	/ Flush the PTE update to concurrent HW /
171	tlbinv(ce->vm, addr & -length, length);
172
173	if (wait_for(i915_request_completed(rq), HZ / `2`)) {
174	pr_err("%s: Request did not complete; the COND_BBE did not read the updated PTE\n",
175	ce->engine->name);
176	err = -EINVAL;
177	}
178	} else {
179	pr_err("Spinner ended unexpectedly\n");
180	err = -EIO;
181	}
182	i915_request_put(rq);
183
184	cs = page_mask_bits(batch->mm.mapping);
185	*cs = MI_BATCH_BUFFER_END;
186	wmb();
187
188	out_va:
189	if (vb != va)
190	vb->node = vb_node;
191	i915_vma_unpin(vma: va);
192	if (i915_vma_unbind_unlocked(vma: va))
193	err = -EIO;
194	out:
195	i915_gem_object_put(obj: batch);
196	return err;
197	}
198
199	static struct drm_i915_gem_object create_lmem(struct* intel_gt *gt)
200	{
201	struct intel_memory_region *mr = gt->i915->mm.regions[INTEL_REGION_LMEM_0];
202	resource_size_t size = SZ_1G;
203
204	/*
205	* Allocation of largest possible page size allows to test all types
206	* of pages. To succeed with both allocations, especially in case of Small
207	* BAR, try to allocate no more than quarter of mappable memory.
208	*/
209	if (mr && size > resource_size(res: &mr->io) / `4`)
210	size = resource_size(res: &mr->io) / `4`;
211
212	return i915_gem_object_create_lmem(i915: gt->i915, size, I915_BO_ALLOC_CONTIGUOUS);
213	}
214
215	static struct drm_i915_gem_object create_smem(struct* intel_gt *gt)
216	{
217	/*
218	* SZ_64K pages require covering the whole 2M PT (gen8 to tgl/dg1).
219	* While that does not require the whole 2M block to be contiguous
220	* it is easier to make it so, since we need that for SZ_2M pagees.
221	* Since we randomly offset the start of the vma, we need a 4M object
222	* so that there is a 2M range within it is suitable for SZ_64K PTE.
223	*/
224	return i915_gem_object_create_internal(i915: gt->i915, SZ_4M);
225	}
226
227	static int
228	mem_tlbinv(struct intel_gt *gt,
229	struct drm_i915_gem_object (create_fn)(struct intel_gt *),
230	void (tlbinv)(struct* i915_address_space *vm, u64 addr, u64 length))
231	{
232	unsigned int ppgtt_size = RUNTIME_INFO(gt->i915)->ppgtt_size;
233	struct intel_engine_cs *engine;
234	struct drm_i915_gem_object A, B;
235	struct i915_ppgtt *ppgtt;
236	struct i915_vma va, vb;
237	enum intel_engine_id id;
238	I915_RND_STATE(prng);
239	void *vaddr;
240	int err;
241
242	/*
243	* Check that the TLB invalidate is able to revoke an active
244	* page. We load a page into a spinning COND_BBE loop and then
245	* remap that page to a new physical address. The old address, and
246	* so the loop keeps spinning, is retained in the TLB cache until
247	* we issue an invalidate.
248	*/
249
250	A = create_fn(gt);
251	if (IS_ERR(ptr: A))
252	return PTR_ERR(ptr: A);
253
254	vaddr = i915_gem_object_pin_map_unlocked(obj: A, type: I915_MAP_WC);
255	if (IS_ERR(ptr: vaddr)) {
256	err = PTR_ERR(ptr: vaddr);
257	goto out_a;
258	}
259
260	B = create_fn(gt);
261	if (IS_ERR(ptr: B)) {
262	err = PTR_ERR(ptr: B);
263	goto out_a;
264	}
265
266	vaddr = i915_gem_object_pin_map_unlocked(obj: B, type: I915_MAP_WC);
267	if (IS_ERR(ptr: vaddr)) {
268	err = PTR_ERR(ptr: vaddr);
269	goto out_b;
270	}
271
272	GEM_BUG_ON(A->base.size != B->base.size);
273	if ((A->mm.page_sizes.phys \| B->mm.page_sizes.phys) & (A->base.size - `1`))
274	pr_warn("Failed to allocate contiguous pages for size %zx\n",
275	A->base.size);
276
277	ppgtt = i915_ppgtt_create(gt, lmem_pt_obj_flags: `0`);
278	if (IS_ERR(ptr: ppgtt)) {
279	err = PTR_ERR(ptr: ppgtt);
280	goto out_b;
281	}
282
283	va = i915_vma_instance(obj: A, vm: &ppgtt->vm, NULL);
284	if (IS_ERR(ptr: va)) {
285	err = PTR_ERR(ptr: va);
286	goto out_vm;
287	}
288
289	vb = i915_vma_instance(obj: B, vm: &ppgtt->vm, NULL);
290	if (IS_ERR(ptr: vb)) {
291	err = PTR_ERR(ptr: vb);
292	goto out_vm;
293	}
294
295	err = `0`;
296	for_each_engine(engine, gt, id) {
297	struct i915_gem_ww_ctx ww;
298	struct intel_context *ce;
299	int bit;
300
301	ce = intel_context_create(engine);
302	if (IS_ERR(ptr: ce)) {
303	err = PTR_ERR(ptr: ce);
304	break;
305	}
306
307	i915_vm_put(vm: ce->vm);
308	ce->vm = i915_vm_get(vm: &ppgtt->vm);
309
310	for_i915_gem_ww(&ww, err, true)
311	err = intel_context_pin_ww(ce, ww: &ww);
312	if (err)
313	goto err_put;
314
315	for_each_set_bit(bit,
316	(unsigned long *)&RUNTIME_INFO(gt->i915)->page_sizes,
317	BITS_PER_TYPE(RUNTIME_INFO(gt->i915)->page_sizes)) {
318	unsigned int len;
319
320	if (BIT_ULL(bit) < i915_vm_obj_min_alignment(vm: va->vm, obj: va->obj))
321	continue;
322
323	/ sanitycheck the semaphore wake up /
324	err = pte_tlbinv(ce, va, vb: va,
325	BIT_ULL(bit),
326	NULL, SZ_4K,
327	prng: &prng);
328	if (err)
329	goto err_unpin;
330
331	for (len = `2`; len <= ppgtt_size; len = min(`2` * len, ppgtt_size)) {
332	err = pte_tlbinv(ce, va, vb,
333	BIT_ULL(bit),
334	tlbinv,
335	BIT_ULL(len),
336	prng: &prng);
337	if (err)
338	goto err_unpin;
339	if (len == ppgtt_size)
340	break;
341	}
342	}
343	err_unpin:
344	intel_context_unpin(ce);
345	err_put:
346	intel_context_put(ce);
347	if (err)
348	break;
349	}
350
351	if (igt_flush_test(i915: gt->i915))
352	err = -EIO;
353
354	out_vm:
355	i915_vm_put(vm: &ppgtt->vm);
356	out_b:
357	i915_gem_object_put(obj: B);
358	out_a:
359	i915_gem_object_put(obj: A);
360	return err;
361	}
362
363	static void tlbinv_full(struct i915_address_space *vm, u64 addr, u64 length)
364	{
365	intel_gt_invalidate_tlb_full(gt: vm->gt, seqno: intel_gt_tlb_seqno(gt: vm->gt) \| `1`);
366	}
367
368	static int invalidate_full(void *arg)
369	{
370	struct intel_gt *gt = arg;
371	int err;
372
373	if (GRAPHICS_VER(gt->i915) < `8`)
374	return `0`; / TLB invalidate not implemented /
375
376	err = mem_tlbinv(gt, create_fn: create_smem, tlbinv: tlbinv_full);
377	if (err == `0`)
378	err = mem_tlbinv(gt, create_fn: create_lmem, tlbinv: tlbinv_full);
379	if (err == -ENODEV \|\| err == -ENXIO)
380	err = `0`;
381
382	return err;
383	}
384
385	int intel_tlb_live_selftests(struct drm_i915_private *i915)
386	{
387	static const struct i915_subtest tests[] = {
388	SUBTEST(invalidate_full),
389	};
390	struct intel_gt *gt;
391	unsigned int i;
392
393	for_each_gt(gt, i915, i) {
394	int err;
395
396	if (intel_gt_is_wedged(gt))
397	continue;
398
399	err = intel_gt_live_subtests(tests, gt);
400	if (err)
401	return err;
402	}
403
404	return `0`;
405	}
406

source code of linux/drivers/gpu/drm/i915/gt/selftest_tlb.c