intel_gtt.c source code [linux/drivers/gpu/drm/i915/gt/intel_gtt.c]

1	// SPDX-License-Identifier: MIT
2	/*
3	* Copyright © 2020 Intel Corporation
4	*/
5
6	#include <linux/slab.h> /* fault-inject.h is not standalone! */
7
8	#include <linux/fault-inject.h>
9	#include <linux/sched/mm.h>
10
11	#include <drm/drm_cache.h>
12
13	#include "gem/i915_gem_internal.h"
14	#include "gem/i915_gem_lmem.h"
15	#include "i915_reg.h"
16	#include "i915_trace.h"
17	#include "i915_utils.h"
18	#include "intel_gt.h"
19	#include "intel_gt_mcr.h"
20	#include "intel_gt_print.h"
21	#include "intel_gt_regs.h"
22	#include "intel_gtt.h"
23
24	bool i915_ggtt_require_binder(struct drm_i915_private *i915)
25	{
26	/ Wa_13010847436 & Wa_14019519902 /
27	return !i915_direct_stolen_access(i915) &&
28	MEDIA_VER_FULL(i915) == IP_VER(`13`, `0`);
29	}
30
31	static bool intel_ggtt_update_needs_vtd_wa(struct drm_i915_private *i915)
32	{
33	return IS_BROXTON(i915) && i915_vtd_active(i915);
34	}
35
36	bool intel_vm_no_concurrent_access_wa(struct drm_i915_private *i915)
37	{
38	return IS_CHERRYVIEW(i915) \|\| intel_ggtt_update_needs_vtd_wa(i915);
39	}
40
41	struct drm_i915_gem_object alloc_pt_lmem(struct* i915_address_space vm, int* sz)
42	{
43	struct drm_i915_gem_object *obj;
44
45	/*
46	* To avoid severe over-allocation when dealing with min_page_size
47	* restrictions, we override that behaviour here by allowing an object
48	* size and page layout which can be smaller. In practice this should be
49	* totally fine, since GTT paging structures are not typically inserted
50	* into the GTT.
51	*
52	* Note that we also hit this path for the scratch page, and for this
53	* case it might need to be 64K, but that should work fine here since we
54	* used the passed in size for the page size, which should ensure it
55	* also has the same alignment.
56	*/
57	obj = __i915_gem_object_create_lmem_with_ps(i915: vm->i915, size: sz, page_size: sz,
58	flags: vm->lmem_pt_obj_flags);
59	/*
60	* Ensure all paging structures for this vm share the same dma-resv
61	* object underneath, with the idea that one object_lock() will lock
62	* them all at once.
63	*/
64	if (!IS_ERR(ptr: obj)) {
65	obj->base.resv = i915_vm_resv_get(vm);
66	obj->shares_resv_from = vm;
67
68	if (vm->fpriv)
69	i915_drm_client_add_object(client: vm->fpriv->client, obj);
70	}
71
72	return obj;
73	}
74
75	struct drm_i915_gem_object alloc_pt_dma(struct* i915_address_space vm, int* sz)
76	{
77	struct drm_i915_gem_object *obj;
78
79	if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, `1`)))
80	i915_gem_shrink_all(i915: vm->i915);
81
82	obj = i915_gem_object_create_internal(i915: vm->i915, size: sz);
83	/*
84	* Ensure all paging structures for this vm share the same dma-resv
85	* object underneath, with the idea that one object_lock() will lock
86	* them all at once.
87	*/
88	if (!IS_ERR(ptr: obj)) {
89	obj->base.resv = i915_vm_resv_get(vm);
90	obj->shares_resv_from = vm;
91
92	if (vm->fpriv)
93	i915_drm_client_add_object(client: vm->fpriv->client, obj);
94	}
95
96	return obj;
97	}
98
99	int map_pt_dma(struct i915_address_space vm, struct* drm_i915_gem_object *obj)
100	{
101	enum i915_map_type type;
102	void *vaddr;
103
104	type = intel_gt_coherent_map_type(gt: vm->gt, obj, always_coherent: true);
105	/*
106	* FIXME: It is suspected that some Address Translation Service (ATS)
107	* issue on IOMMU is causing CAT errors to occur on some MTL workloads.
108	* Applying a write barrier to the ppgtt set entry functions appeared
109	* to have no effect, so we must temporarily use I915_MAP_WC here on
110	* MTL until a proper ATS solution is found.
111	*/
112	if (IS_METEORLAKE(vm->i915))
113	type = I915_MAP_WC;
114
115	vaddr = i915_gem_object_pin_map_unlocked(obj, type);
116	if (IS_ERR(ptr: vaddr))
117	return PTR_ERR(ptr: vaddr);
118
119	i915_gem_object_make_unshrinkable(obj);
120	return `0`;
121	}
122
123	int map_pt_dma_locked(struct i915_address_space vm, struct* drm_i915_gem_object *obj)
124	{
125	enum i915_map_type type;
126	void *vaddr;
127
128	type = intel_gt_coherent_map_type(gt: vm->gt, obj, always_coherent: true);
129	/*
130	* FIXME: It is suspected that some Address Translation Service (ATS)
131	* issue on IOMMU is causing CAT errors to occur on some MTL workloads.
132	* Applying a write barrier to the ppgtt set entry functions appeared
133	* to have no effect, so we must temporarily use I915_MAP_WC here on
134	* MTL until a proper ATS solution is found.
135	*/
136	if (IS_METEORLAKE(vm->i915))
137	type = I915_MAP_WC;
138
139	vaddr = i915_gem_object_pin_map(obj, type);
140	if (IS_ERR(ptr: vaddr))
141	return PTR_ERR(ptr: vaddr);
142
143	i915_gem_object_make_unshrinkable(obj);
144	return `0`;
145	}
146
147	static void clear_vm_list(struct list_head *list)
148	{
149	struct i915_vma vma, vn;
150
151	list_for_each_entry_safe(vma, vn, list, vm_link) {
152	struct drm_i915_gem_object *obj = vma->obj;
153
154	if (!i915_gem_object_get_rcu(obj)) {
155	/*
156	* Object is dying, but has not yet cleared its
157	* vma list.
158	* Unbind the dying vma to ensure our list
159	* is completely drained. We leave the destruction to
160	* the object destructor to avoid the vma
161	* disappearing under it.
162	*/
163	atomic_and(i: ~I915_VMA_PIN_MASK, v: &vma->flags);
164	WARN_ON(__i915_vma_unbind(vma));
165
166	/ Remove from the unbound list /
167	list_del_init(entry: &vma->vm_link);
168
169	/*
170	* Delay the vm and vm mutex freeing until the
171	* object is done with destruction.
172	*/
173	i915_vm_resv_get(vm: vma->vm);
174	vma->vm_ddestroy = true;
175	} else {
176	i915_vma_destroy_locked(vma);
177	i915_gem_object_put(obj);
178	}
179
180	}
181	}
182
183	static void __i915_vm_close(struct i915_address_space *vm)
184	{
185	mutex_lock(&vm->mutex);
186
187	clear_vm_list(list: &vm->bound_list);
188	clear_vm_list(list: &vm->unbound_list);
189
190	/ Check for must-fix unanticipated side-effects /
191	GEM_BUG_ON(!list_empty(&vm->bound_list));
192	GEM_BUG_ON(!list_empty(&vm->unbound_list));
193
194	mutex_unlock(lock: &vm->mutex);
195	}
196
197	/ lock the vm into the current ww, if we lock one, we lock all /
198	int i915_vm_lock_objects(struct i915_address_space *vm,
199	struct i915_gem_ww_ctx *ww)
200	{
201	if (vm->scratch[`0`]->base.resv == &vm->_resv) {
202	return i915_gem_object_lock(obj: vm->scratch[`0`], ww);
203	} else {
204	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
205
206	/ We borrowed the scratch page from ggtt, take the top level object /
207	return i915_gem_object_lock(obj: ppgtt->pd->pt.base, ww);
208	}
209	}
210
211	void i915_address_space_fini(struct i915_address_space *vm)
212	{
213	drm_mm_takedown(mm: &vm->mm);
214	}
215
216	/**
217	* i915_vm_resv_release - Final struct i915_address_space destructor
218	* @kref: Pointer to the &i915_address_space.resv_ref member.
219	*
220	* This function is called when the last lock sharer no longer shares the
221	* &i915_address_space._resv lock, and also if we raced when
222	* destroying a vma by the vma destruction
223	*/
224	void i915_vm_resv_release(struct kref *kref)
225	{
226	struct i915_address_space *vm =
227	container_of(kref, typeof(*vm), resv_ref);
228
229	dma_resv_fini(obj: &vm->_resv);
230	mutex_destroy(lock: &vm->mutex);
231
232	kfree(objp: vm);
233	}
234
235	static void __i915_vm_release(struct work_struct *work)
236	{
237	struct i915_address_space *vm =
238	container_of(work, struct i915_address_space, release_work);
239
240	__i915_vm_close(vm);
241
242	/ Synchronize async unbinds. /
243	i915_vma_resource_bind_dep_sync_all(vm);
244
245	vm->cleanup(vm);
246	i915_address_space_fini(vm);
247
248	i915_vm_resv_put(vm);
249	}
250
251	void i915_vm_release(struct kref *kref)
252	{
253	struct i915_address_space *vm =
254	container_of(kref, struct i915_address_space, ref);
255
256	GEM_BUG_ON(i915_is_ggtt(vm));
257	trace_i915_ppgtt_release(vm);
258
259	queue_work(wq: vm->i915->wq, work: &vm->release_work);
260	}
261
262	void i915_address_space_init(struct i915_address_space vm, int* subclass)
263	{
264	kref_init(kref: &vm->ref);
265
266	/*
267	* Special case for GGTT that has already done an early
268	* kref_init here.
269	*/
270	if (!kref_read(kref: &vm->resv_ref))
271	kref_init(kref: &vm->resv_ref);
272
273	vm->pending_unbind = RB_ROOT_CACHED;
274	INIT_WORK(&vm->release_work, __i915_vm_release);
275
276	/*
277	* The vm->mutex must be reclaim safe (for use in the shrinker).
278	* Do a dummy acquire now under fs_reclaim so that any allocation
279	* attempt holding the lock is immediately reported by lockdep.
280	*/
281	mutex_init(&vm->mutex);
282	lockdep_set_subclass(&vm->mutex, subclass);
283
284	if (!intel_vm_no_concurrent_access_wa(i915: vm->i915)) {
285	i915_gem_shrinker_taints_mutex(i915: vm->i915, mutex: &vm->mutex);
286	} else {
287	/*
288	* CHV + BXT VTD workaround use stop_machine(),
289	* which is allowed to allocate memory. This means &vm->mutex
290	* is the outer lock, and in theory we can allocate memory inside
291	* it through stop_machine().
292	*
293	* Add the annotation for this, we use trylock in shrinker.
294	*/
295	mutex_acquire(&vm->mutex.dep_map, `0`, `0`, _THIS_IP_);
296	might_alloc(GFP_KERNEL);
297	mutex_release(&vm->mutex.dep_map, _THIS_IP_);
298	}
299	dma_resv_init(obj: &vm->_resv);
300
301	GEM_BUG_ON(!vm->total);
302	drm_mm_init(mm: &vm->mm, start: `0`, size: vm->total);
303
304	memset64(s: vm->min_alignment, I915_GTT_MIN_ALIGNMENT,
305	ARRAY_SIZE(vm->min_alignment));
306
307	if (HAS_64K_PAGES(vm->i915)) {
308	vm->min_alignment[INTEL_MEMORY_LOCAL] = I915_GTT_PAGE_SIZE_64K;
309	vm->min_alignment[INTEL_MEMORY_STOLEN_LOCAL] = I915_GTT_PAGE_SIZE_64K;
310	}
311
312	vm->mm.head_node.color = I915_COLOR_UNEVICTABLE;
313
314	INIT_LIST_HEAD(list: &vm->bound_list);
315	INIT_LIST_HEAD(list: &vm->unbound_list);
316	}
317
318	void __px_vaddr(struct* drm_i915_gem_object *p)
319	{
320	enum i915_map_type type;
321
322	GEM_BUG_ON(!i915_gem_object_has_pages(p));
323	return page_unpack_bits(p->mm.mapping, &type);
324	}
325
326	dma_addr_t __px_dma(struct drm_i915_gem_object *p)
327	{
328	GEM_BUG_ON(!i915_gem_object_has_pages(p));
329	return sg_dma_address(p->mm.pages->sgl);
330	}
331
332	struct page __px_page(struct* drm_i915_gem_object *p)
333	{
334	GEM_BUG_ON(!i915_gem_object_has_pages(p));
335	return sg_page(sg: p->mm.pages->sgl);
336	}
337
338	void
339	fill_page_dma(struct drm_i915_gem_object p, const* u64 val, unsigned int count)
340	{
341	void *vaddr = __px_vaddr(p);
342
343	memset64(s: vaddr, v: val, n: count);
344	drm_clflush_virt_range(addr: vaddr, PAGE_SIZE);
345	}
346
347	static void poison_scratch_page(struct drm_i915_gem_object *scratch)
348	{
349	void *vaddr = __px_vaddr(p: scratch);
350	u8 val;
351
352	val = `0`;
353	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
354	val = POISON_FREE;
355
356	memset(vaddr, val, scratch->base.size);
357	drm_clflush_virt_range(addr: vaddr, length: scratch->base.size);
358	}
359
360	int setup_scratch_page(struct i915_address_space *vm)
361	{
362	unsigned long size;
363
364	/*
365	* In order to utilize 64K pages for an object with a size < 2M, we will
366	* need to support a 64K scratch page, given that every 16th entry for a
367	* page-table operating in 64K mode must point to a properly aligned 64K
368	* region, including any PTEs which happen to point to scratch.
369	*
370	* This is only relevant for the 48b PPGTT where we support
371	* huge-gtt-pages, see also i915_vma_insert(). However, as we share the
372	* scratch (read-only) between all vm, we create one 64k scratch page
373	* for all.
374	*/
375	size = I915_GTT_PAGE_SIZE_4K;
376	if (i915_vm_is_4lvl(vm) &&
377	HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K) &&
378	!HAS_64K_PAGES(vm->i915))
379	size = I915_GTT_PAGE_SIZE_64K;
380
381	do {
382	struct drm_i915_gem_object *obj;
383
384	obj = vm->alloc_scratch_dma(vm, size);
385	if (IS_ERR(ptr: obj))
386	goto skip;
387
388	if (map_pt_dma(vm, obj))
389	goto skip_obj;
390
391	/ We need a single contiguous page for our scratch /
392	if (obj->mm.page_sizes.sg < size)
393	goto skip_obj;
394
395	/ And it needs to be correspondingly aligned /
396	if (__px_dma(p: obj) & (size - `1`))
397	goto skip_obj;
398
399	/*
400	* Use a non-zero scratch page for debugging.
401	*
402	* We want a value that should be reasonably obvious
403	* to spot in the error state, while also causing a GPU hang
404	* if executed. We prefer using a clear page in production, so
405	* should it ever be accidentally used, the effect should be
406	* fairly benign.
407	*/
408	poison_scratch_page(scratch: obj);
409
410	vm->scratch[`0`] = obj;
411	vm->scratch_order = get_order(size);
412	return `0`;
413
414	skip_obj:
415	i915_gem_object_put(obj);
416	skip:
417	if (size == I915_GTT_PAGE_SIZE_4K)
418	return -ENOMEM;
419
420	size = I915_GTT_PAGE_SIZE_4K;
421	} while (`1`);
422	}
423
424	void free_scratch(struct i915_address_space *vm)
425	{
426	int i;
427
428	if (!vm->scratch[`0`])
429	return;
430
431	for (i = `0`; i <= vm->top; i++)
432	i915_gem_object_put(obj: vm->scratch[i]);
433	}
434
435	void gtt_write_workarounds(struct intel_gt *gt)
436	{
437	struct drm_i915_private *i915 = gt->i915;
438	struct intel_uncore *uncore = gt->uncore;
439
440	/*
441	* This function is for gtt related workarounds. This function is
442	* called on driver load and after a GPU reset, so you can place
443	* workarounds here even if they get overwritten by GPU reset.
444	*/
445	/ WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl /
446	if (IS_BROADWELL(i915))
447	intel_uncore_write(uncore,
448	GEN8_L3_LRA_1_GPGPU,
449	GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW);
450	else if (IS_CHERRYVIEW(i915))
451	intel_uncore_write(uncore,
452	GEN8_L3_LRA_1_GPGPU,
453	GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV);
454	else if (IS_GEN9_LP(i915))
455	intel_uncore_write(uncore,
456	GEN8_L3_LRA_1_GPGPU,
457	GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
458	else if (GRAPHICS_VER(i915) >= `9` && GRAPHICS_VER(i915) <= `11`)
459	intel_uncore_write(uncore,
460	GEN8_L3_LRA_1_GPGPU,
461	GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL);
462
463	/*
464	* To support 64K PTEs we need to first enable the use of the
465	* Intermediate-Page-Size(IPS) bit of the PDE field via some magical
466	* mmio, otherwise the page-walker will simply ignore the IPS bit. This
467	* shouldn't be needed after GEN10.
468	*
469	* 64K pages were first introduced from BDW+, although technically they
470	* only work from gen9+. For pre-BDW we instead have the option for
471	* 32K pages, but we don't currently have any support for it in our
472	* driver.
473	*/
474	if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K) &&
475	GRAPHICS_VER(i915) <= `10`)
476	intel_uncore_rmw(uncore,
477	GEN8_GAMW_ECO_DEV_RW_IA,
478	clear: `0`,
479	GAMW_ECO_ENABLE_64K_IPS_FIELD);
480
481	if (IS_GRAPHICS_VER(i915, `8`, `11`)) {
482	bool can_use_gtt_cache = true;
483
484	/*
485	* According to the BSpec if we use 2M/1G pages then we also
486	* need to disable the GTT cache. At least on BDW we can see
487	* visual corruption when using 2M pages, and not disabling the
488	* GTT cache.
489	*/
490	if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_2M))
491	can_use_gtt_cache = false;
492
493	/ WaGttCachingOffByDefault /
494	intel_uncore_write(uncore,
495	HSW_GTT_CACHE_EN,
496	val: can_use_gtt_cache ? GTT_CACHE_EN_ALL : `0`);
497	gt_WARN_ON_ONCE(gt, can_use_gtt_cache &&
498	intel_uncore_read(uncore,
499	HSW_GTT_CACHE_EN) == `0`);
500	}
501	}
502
503	static void xelpmp_setup_private_ppat(struct intel_uncore *uncore)
504	{
505	intel_uncore_write(uncore, XELPMP_PAT_INDEX(`0`),
506	MTL_PPAT_L4_0_WB);
507	intel_uncore_write(uncore, XELPMP_PAT_INDEX(`1`),
508	MTL_PPAT_L4_1_WT);
509	intel_uncore_write(uncore, XELPMP_PAT_INDEX(`2`),
510	MTL_PPAT_L4_3_UC);
511	intel_uncore_write(uncore, XELPMP_PAT_INDEX(`3`),
512	MTL_PPAT_L4_0_WB \| MTL_2_COH_1W);
513	intel_uncore_write(uncore, XELPMP_PAT_INDEX(`4`),
514	MTL_PPAT_L4_0_WB \| MTL_3_COH_2W);
515
516	/*
517	* Remaining PAT entries are left at the hardware-default
518	* fully-cached setting
519	*/
520	}
521
522	static void xelpg_setup_private_ppat(struct intel_gt *gt)
523	{
524	intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(`0`),
525	MTL_PPAT_L4_0_WB);
526	intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(`1`),
527	MTL_PPAT_L4_1_WT);
528	intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(`2`),
529	MTL_PPAT_L4_3_UC);
530	intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(`3`),
531	MTL_PPAT_L4_0_WB \| MTL_2_COH_1W);
532	intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(`4`),
533	MTL_PPAT_L4_0_WB \| MTL_3_COH_2W);
534
535	/*
536	* Remaining PAT entries are left at the hardware-default
537	* fully-cached setting
538	*/
539	}
540
541	static void tgl_setup_private_ppat(struct intel_uncore *uncore)
542	{
543	/ TGL doesn't support LLC or AGE settings /
544	intel_uncore_write(uncore, GEN12_PAT_INDEX(`0`), GEN8_PPAT_WB);
545	intel_uncore_write(uncore, GEN12_PAT_INDEX(`1`), GEN8_PPAT_WC);
546	intel_uncore_write(uncore, GEN12_PAT_INDEX(`2`), GEN8_PPAT_WT);
547	intel_uncore_write(uncore, GEN12_PAT_INDEX(`3`), GEN8_PPAT_UC);
548	intel_uncore_write(uncore, GEN12_PAT_INDEX(`4`), GEN8_PPAT_WB);
549	intel_uncore_write(uncore, GEN12_PAT_INDEX(`5`), GEN8_PPAT_WB);
550	intel_uncore_write(uncore, GEN12_PAT_INDEX(`6`), GEN8_PPAT_WB);
551	intel_uncore_write(uncore, GEN12_PAT_INDEX(`7`), GEN8_PPAT_WB);
552	}
553
554	static void xehp_setup_private_ppat(struct intel_gt *gt)
555	{
556	enum forcewake_domains fw;
557	unsigned long flags;
558
559	fw = intel_uncore_forcewake_for_reg(uncore: gt->uncore, _MMIO(XEHP_PAT_INDEX(`0`).reg),
560	FW_REG_WRITE);
561	intel_uncore_forcewake_get(uncore: gt->uncore, domains: fw);
562
563	intel_gt_mcr_lock(gt, flags: &flags);
564	intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(`0`), GEN8_PPAT_WB);
565	intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(`1`), GEN8_PPAT_WC);
566	intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(`2`), GEN8_PPAT_WT);
567	intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(`3`), GEN8_PPAT_UC);
568	intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(`4`), GEN8_PPAT_WB);
569	intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(`5`), GEN8_PPAT_WB);
570	intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(`6`), GEN8_PPAT_WB);
571	intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(`7`), GEN8_PPAT_WB);
572	intel_gt_mcr_unlock(gt, flags);
573
574	intel_uncore_forcewake_put(uncore: gt->uncore, domains: fw);
575	}
576
577	static void icl_setup_private_ppat(struct intel_uncore *uncore)
578	{
579	intel_uncore_write(uncore,
580	GEN10_PAT_INDEX(`0`),
581	GEN8_PPAT_WB \| GEN8_PPAT_LLC);
582	intel_uncore_write(uncore,
583	GEN10_PAT_INDEX(`1`),
584	GEN8_PPAT_WC \| GEN8_PPAT_LLCELLC);
585	intel_uncore_write(uncore,
586	GEN10_PAT_INDEX(`2`),
587	GEN8_PPAT_WB \| GEN8_PPAT_ELLC_OVERRIDE);
588	intel_uncore_write(uncore,
589	GEN10_PAT_INDEX(`3`),
590	GEN8_PPAT_UC);
591	intel_uncore_write(uncore,
592	GEN10_PAT_INDEX(`4`),
593	GEN8_PPAT_WB \| GEN8_PPAT_LLCELLC \| GEN8_PPAT_AGE(`0`));
594	intel_uncore_write(uncore,
595	GEN10_PAT_INDEX(`5`),
596	GEN8_PPAT_WB \| GEN8_PPAT_LLCELLC \| GEN8_PPAT_AGE(`1`));
597	intel_uncore_write(uncore,
598	GEN10_PAT_INDEX(`6`),
599	GEN8_PPAT_WB \| GEN8_PPAT_LLCELLC \| GEN8_PPAT_AGE(`2`));
600	intel_uncore_write(uncore,
601	GEN10_PAT_INDEX(`7`),
602	GEN8_PPAT_WB \| GEN8_PPAT_LLCELLC \| GEN8_PPAT_AGE(`3`));
603	}
604
605	/*
606	* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
607	* bits. When using advanced contexts each context stores its own PAT, but
608	* writing this data shouldn't be harmful even in those cases.
609	*/
610	static void bdw_setup_private_ppat(struct intel_uncore *uncore)
611	{
612	struct drm_i915_private *i915 = uncore->i915;
613	u64 pat;
614
615	pat = GEN8_PPAT(`0`, GEN8_PPAT_WB \| GEN8_PPAT_LLC) \| / for normal objects, no eLLC /
616	GEN8_PPAT(`1`, GEN8_PPAT_WC \| GEN8_PPAT_LLCELLC) \| / for something pointing to ptes? /
617	GEN8_PPAT(`3`, GEN8_PPAT_UC) \| / Uncached objects, mostly for scanout /
618	GEN8_PPAT(`4`, GEN8_PPAT_WB \| GEN8_PPAT_LLCELLC \| GEN8_PPAT_AGE(`0`)) \|
619	GEN8_PPAT(`5`, GEN8_PPAT_WB \| GEN8_PPAT_LLCELLC \| GEN8_PPAT_AGE(`1`)) \|
620	GEN8_PPAT(`6`, GEN8_PPAT_WB \| GEN8_PPAT_LLCELLC \| GEN8_PPAT_AGE(`2`)) \|
621	GEN8_PPAT(`7`, GEN8_PPAT_WB \| GEN8_PPAT_LLCELLC \| GEN8_PPAT_AGE(`3`));
622
623	/ for scanout with eLLC /
624	if (GRAPHICS_VER(i915) >= `9`)
625	pat \|= GEN8_PPAT(`2`, GEN8_PPAT_WB \| GEN8_PPAT_ELLC_OVERRIDE);
626	else
627	pat \|= GEN8_PPAT(`2`, GEN8_PPAT_WT \| GEN8_PPAT_LLCELLC);
628
629	intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
630	intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
631	}
632
633	static void chv_setup_private_ppat(struct intel_uncore *uncore)
634	{
635	u64 pat;
636
637	/*
638	* Map WB on BDW to snooped on CHV.
639	*
640	* Only the snoop bit has meaning for CHV, the rest is
641	* ignored.
642	*
643	* The hardware will never snoop for certain types of accesses:
644	* - CPU GTT (GMADR->GGTT->no snoop->memory)
645	* - PPGTT page tables
646	* - some other special cycles
647	*
648	* As with BDW, we also need to consider the following for GT accesses:
649	* "For GGTT, there is NO pat_sel[2:0] from the entry,
650	* so RTL will always use the value corresponding to
651	* pat_sel = 000".
652	* Which means we must set the snoop bit in PAT entry 0
653	* in order to keep the global status page working.
654	*/
655
656	pat = GEN8_PPAT(`0`, CHV_PPAT_SNOOP) \|
657	GEN8_PPAT(`1`, `0`) \|
658	GEN8_PPAT(`2`, `0`) \|
659	GEN8_PPAT(`3`, `0`) \|
660	GEN8_PPAT(`4`, CHV_PPAT_SNOOP) \|
661	GEN8_PPAT(`5`, CHV_PPAT_SNOOP) \|
662	GEN8_PPAT(`6`, CHV_PPAT_SNOOP) \|
663	GEN8_PPAT(`7`, CHV_PPAT_SNOOP);
664
665	intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
666	intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
667	}
668
669	void setup_private_pat(struct intel_gt *gt)
670	{
671	struct intel_uncore *uncore = gt->uncore;
672	struct drm_i915_private *i915 = gt->i915;
673
674	GEM_BUG_ON(GRAPHICS_VER(i915) < `8`);
675
676	if (gt->type == GT_MEDIA) {
677	xelpmp_setup_private_ppat(uncore: gt->uncore);
678	return;
679	}
680
681	if (GRAPHICS_VER_FULL(i915) >= IP_VER(`12`, `70`))
682	xelpg_setup_private_ppat(gt);
683	else if (GRAPHICS_VER_FULL(i915) >= IP_VER(`12`, `50`))
684	xehp_setup_private_ppat(gt);
685	else if (GRAPHICS_VER(i915) >= `12`)
686	tgl_setup_private_ppat(uncore);
687	else if (GRAPHICS_VER(i915) >= `11`)
688	icl_setup_private_ppat(uncore);
689	else if (IS_CHERRYVIEW(i915) \|\| IS_GEN9_LP(i915))
690	chv_setup_private_ppat(uncore);
691	else
692	bdw_setup_private_ppat(uncore);
693	}
694
695	struct i915_vma *
696	__vm_create_scratch_for_read(struct i915_address_space vm, unsigned* long size)
697	{
698	struct drm_i915_gem_object *obj;
699	struct i915_vma *vma;
700
701	obj = i915_gem_object_create_internal(i915: vm->i915, PAGE_ALIGN(size));
702	if (IS_ERR(ptr: obj))
703	return ERR_CAST(ptr: obj);
704
705	i915_gem_object_set_cache_coherency(obj, cache_level: I915_CACHE_LLC);
706
707	vma = i915_vma_instance(obj, vm, NULL);
708	if (IS_ERR(ptr: vma)) {
709	i915_gem_object_put(obj);
710	return vma;
711	}
712
713	return vma;
714	}
715
716	struct i915_vma *
717	__vm_create_scratch_for_read_pinned(struct i915_address_space vm, unsigned* long size)
718	{
719	struct i915_vma *vma;
720	int err;
721
722	vma = __vm_create_scratch_for_read(vm, size);
723	if (IS_ERR(ptr: vma))
724	return vma;
725
726	err = i915_vma_pin(vma, size: `0`, alignment: `0`,
727	flags: i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER);
728	if (err) {
729	i915_vma_put(vma);
730	return ERR_PTR(error: err);
731	}
732
733	return vma;
734	}
735
736	#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
737	#include "selftests/mock_gtt.c"
738	#endif
739

source code of linux/drivers/gpu/drm/i915/gt/intel_gtt.c