1 | // SPDX-License-Identifier: MIT |
2 | /* |
3 | * Copyright © 2020 Intel Corporation |
4 | */ |
5 | |
6 | #include <linux/slab.h> /* fault-inject.h is not standalone! */ |
7 | |
8 | #include <linux/fault-inject.h> |
9 | #include <linux/sched/mm.h> |
10 | |
11 | #include <drm/drm_cache.h> |
12 | |
13 | #include "gem/i915_gem_internal.h" |
14 | #include "gem/i915_gem_lmem.h" |
15 | #include "i915_reg.h" |
16 | #include "i915_trace.h" |
17 | #include "i915_utils.h" |
18 | #include "intel_gt.h" |
19 | #include "intel_gt_mcr.h" |
20 | #include "intel_gt_print.h" |
21 | #include "intel_gt_regs.h" |
22 | #include "intel_gtt.h" |
23 | |
24 | bool i915_ggtt_require_binder(struct drm_i915_private *i915) |
25 | { |
26 | /* Wa_13010847436 & Wa_14019519902 */ |
27 | return !i915_direct_stolen_access(i915) && |
28 | MEDIA_VER_FULL(i915) == IP_VER(13, 0); |
29 | } |
30 | |
31 | static bool intel_ggtt_update_needs_vtd_wa(struct drm_i915_private *i915) |
32 | { |
33 | return IS_BROXTON(i915) && i915_vtd_active(i915); |
34 | } |
35 | |
36 | bool intel_vm_no_concurrent_access_wa(struct drm_i915_private *i915) |
37 | { |
38 | return IS_CHERRYVIEW(i915) || intel_ggtt_update_needs_vtd_wa(i915); |
39 | } |
40 | |
41 | struct drm_i915_gem_object *alloc_pt_lmem(struct i915_address_space *vm, int sz) |
42 | { |
43 | struct drm_i915_gem_object *obj; |
44 | |
45 | /* |
46 | * To avoid severe over-allocation when dealing with min_page_size |
47 | * restrictions, we override that behaviour here by allowing an object |
48 | * size and page layout which can be smaller. In practice this should be |
49 | * totally fine, since GTT paging structures are not typically inserted |
50 | * into the GTT. |
51 | * |
52 | * Note that we also hit this path for the scratch page, and for this |
53 | * case it might need to be 64K, but that should work fine here since we |
54 | * used the passed in size for the page size, which should ensure it |
55 | * also has the same alignment. |
56 | */ |
57 | obj = __i915_gem_object_create_lmem_with_ps(i915: vm->i915, size: sz, page_size: sz, |
58 | flags: vm->lmem_pt_obj_flags); |
59 | /* |
60 | * Ensure all paging structures for this vm share the same dma-resv |
61 | * object underneath, with the idea that one object_lock() will lock |
62 | * them all at once. |
63 | */ |
64 | if (!IS_ERR(ptr: obj)) { |
65 | obj->base.resv = i915_vm_resv_get(vm); |
66 | obj->shares_resv_from = vm; |
67 | |
68 | if (vm->fpriv) |
69 | i915_drm_client_add_object(client: vm->fpriv->client, obj); |
70 | } |
71 | |
72 | return obj; |
73 | } |
74 | |
75 | struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz) |
76 | { |
77 | struct drm_i915_gem_object *obj; |
78 | |
79 | if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1))) |
80 | i915_gem_shrink_all(i915: vm->i915); |
81 | |
82 | obj = i915_gem_object_create_internal(i915: vm->i915, size: sz); |
83 | /* |
84 | * Ensure all paging structures for this vm share the same dma-resv |
85 | * object underneath, with the idea that one object_lock() will lock |
86 | * them all at once. |
87 | */ |
88 | if (!IS_ERR(ptr: obj)) { |
89 | obj->base.resv = i915_vm_resv_get(vm); |
90 | obj->shares_resv_from = vm; |
91 | |
92 | if (vm->fpriv) |
93 | i915_drm_client_add_object(client: vm->fpriv->client, obj); |
94 | } |
95 | |
96 | return obj; |
97 | } |
98 | |
99 | int map_pt_dma(struct i915_address_space *vm, struct drm_i915_gem_object *obj) |
100 | { |
101 | enum i915_map_type type; |
102 | void *vaddr; |
103 | |
104 | type = intel_gt_coherent_map_type(gt: vm->gt, obj, always_coherent: true); |
105 | /* |
106 | * FIXME: It is suspected that some Address Translation Service (ATS) |
107 | * issue on IOMMU is causing CAT errors to occur on some MTL workloads. |
108 | * Applying a write barrier to the ppgtt set entry functions appeared |
109 | * to have no effect, so we must temporarily use I915_MAP_WC here on |
110 | * MTL until a proper ATS solution is found. |
111 | */ |
112 | if (IS_METEORLAKE(vm->i915)) |
113 | type = I915_MAP_WC; |
114 | |
115 | vaddr = i915_gem_object_pin_map_unlocked(obj, type); |
116 | if (IS_ERR(ptr: vaddr)) |
117 | return PTR_ERR(ptr: vaddr); |
118 | |
119 | i915_gem_object_make_unshrinkable(obj); |
120 | return 0; |
121 | } |
122 | |
123 | int map_pt_dma_locked(struct i915_address_space *vm, struct drm_i915_gem_object *obj) |
124 | { |
125 | enum i915_map_type type; |
126 | void *vaddr; |
127 | |
128 | type = intel_gt_coherent_map_type(gt: vm->gt, obj, always_coherent: true); |
129 | /* |
130 | * FIXME: It is suspected that some Address Translation Service (ATS) |
131 | * issue on IOMMU is causing CAT errors to occur on some MTL workloads. |
132 | * Applying a write barrier to the ppgtt set entry functions appeared |
133 | * to have no effect, so we must temporarily use I915_MAP_WC here on |
134 | * MTL until a proper ATS solution is found. |
135 | */ |
136 | if (IS_METEORLAKE(vm->i915)) |
137 | type = I915_MAP_WC; |
138 | |
139 | vaddr = i915_gem_object_pin_map(obj, type); |
140 | if (IS_ERR(ptr: vaddr)) |
141 | return PTR_ERR(ptr: vaddr); |
142 | |
143 | i915_gem_object_make_unshrinkable(obj); |
144 | return 0; |
145 | } |
146 | |
147 | static void clear_vm_list(struct list_head *list) |
148 | { |
149 | struct i915_vma *vma, *vn; |
150 | |
151 | list_for_each_entry_safe(vma, vn, list, vm_link) { |
152 | struct drm_i915_gem_object *obj = vma->obj; |
153 | |
154 | if (!i915_gem_object_get_rcu(obj)) { |
155 | /* |
156 | * Object is dying, but has not yet cleared its |
157 | * vma list. |
158 | * Unbind the dying vma to ensure our list |
159 | * is completely drained. We leave the destruction to |
160 | * the object destructor to avoid the vma |
161 | * disappearing under it. |
162 | */ |
163 | atomic_and(i: ~I915_VMA_PIN_MASK, v: &vma->flags); |
164 | WARN_ON(__i915_vma_unbind(vma)); |
165 | |
166 | /* Remove from the unbound list */ |
167 | list_del_init(entry: &vma->vm_link); |
168 | |
169 | /* |
170 | * Delay the vm and vm mutex freeing until the |
171 | * object is done with destruction. |
172 | */ |
173 | i915_vm_resv_get(vm: vma->vm); |
174 | vma->vm_ddestroy = true; |
175 | } else { |
176 | i915_vma_destroy_locked(vma); |
177 | i915_gem_object_put(obj); |
178 | } |
179 | |
180 | } |
181 | } |
182 | |
183 | static void __i915_vm_close(struct i915_address_space *vm) |
184 | { |
185 | mutex_lock(&vm->mutex); |
186 | |
187 | clear_vm_list(list: &vm->bound_list); |
188 | clear_vm_list(list: &vm->unbound_list); |
189 | |
190 | /* Check for must-fix unanticipated side-effects */ |
191 | GEM_BUG_ON(!list_empty(&vm->bound_list)); |
192 | GEM_BUG_ON(!list_empty(&vm->unbound_list)); |
193 | |
194 | mutex_unlock(lock: &vm->mutex); |
195 | } |
196 | |
197 | /* lock the vm into the current ww, if we lock one, we lock all */ |
198 | int i915_vm_lock_objects(struct i915_address_space *vm, |
199 | struct i915_gem_ww_ctx *ww) |
200 | { |
201 | if (vm->scratch[0]->base.resv == &vm->_resv) { |
202 | return i915_gem_object_lock(obj: vm->scratch[0], ww); |
203 | } else { |
204 | struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); |
205 | |
206 | /* We borrowed the scratch page from ggtt, take the top level object */ |
207 | return i915_gem_object_lock(obj: ppgtt->pd->pt.base, ww); |
208 | } |
209 | } |
210 | |
211 | void i915_address_space_fini(struct i915_address_space *vm) |
212 | { |
213 | drm_mm_takedown(mm: &vm->mm); |
214 | } |
215 | |
216 | /** |
217 | * i915_vm_resv_release - Final struct i915_address_space destructor |
218 | * @kref: Pointer to the &i915_address_space.resv_ref member. |
219 | * |
220 | * This function is called when the last lock sharer no longer shares the |
221 | * &i915_address_space._resv lock, and also if we raced when |
222 | * destroying a vma by the vma destruction |
223 | */ |
224 | void i915_vm_resv_release(struct kref *kref) |
225 | { |
226 | struct i915_address_space *vm = |
227 | container_of(kref, typeof(*vm), resv_ref); |
228 | |
229 | dma_resv_fini(obj: &vm->_resv); |
230 | mutex_destroy(lock: &vm->mutex); |
231 | |
232 | kfree(objp: vm); |
233 | } |
234 | |
235 | static void __i915_vm_release(struct work_struct *work) |
236 | { |
237 | struct i915_address_space *vm = |
238 | container_of(work, struct i915_address_space, release_work); |
239 | |
240 | __i915_vm_close(vm); |
241 | |
242 | /* Synchronize async unbinds. */ |
243 | i915_vma_resource_bind_dep_sync_all(vm); |
244 | |
245 | vm->cleanup(vm); |
246 | i915_address_space_fini(vm); |
247 | |
248 | i915_vm_resv_put(vm); |
249 | } |
250 | |
251 | void i915_vm_release(struct kref *kref) |
252 | { |
253 | struct i915_address_space *vm = |
254 | container_of(kref, struct i915_address_space, ref); |
255 | |
256 | GEM_BUG_ON(i915_is_ggtt(vm)); |
257 | trace_i915_ppgtt_release(vm); |
258 | |
259 | queue_work(wq: vm->i915->wq, work: &vm->release_work); |
260 | } |
261 | |
262 | void i915_address_space_init(struct i915_address_space *vm, int subclass) |
263 | { |
264 | kref_init(kref: &vm->ref); |
265 | |
266 | /* |
267 | * Special case for GGTT that has already done an early |
268 | * kref_init here. |
269 | */ |
270 | if (!kref_read(kref: &vm->resv_ref)) |
271 | kref_init(kref: &vm->resv_ref); |
272 | |
273 | vm->pending_unbind = RB_ROOT_CACHED; |
274 | INIT_WORK(&vm->release_work, __i915_vm_release); |
275 | |
276 | /* |
277 | * The vm->mutex must be reclaim safe (for use in the shrinker). |
278 | * Do a dummy acquire now under fs_reclaim so that any allocation |
279 | * attempt holding the lock is immediately reported by lockdep. |
280 | */ |
281 | mutex_init(&vm->mutex); |
282 | lockdep_set_subclass(&vm->mutex, subclass); |
283 | |
284 | if (!intel_vm_no_concurrent_access_wa(i915: vm->i915)) { |
285 | i915_gem_shrinker_taints_mutex(i915: vm->i915, mutex: &vm->mutex); |
286 | } else { |
287 | /* |
288 | * CHV + BXT VTD workaround use stop_machine(), |
289 | * which is allowed to allocate memory. This means &vm->mutex |
290 | * is the outer lock, and in theory we can allocate memory inside |
291 | * it through stop_machine(). |
292 | * |
293 | * Add the annotation for this, we use trylock in shrinker. |
294 | */ |
295 | mutex_acquire(&vm->mutex.dep_map, 0, 0, _THIS_IP_); |
296 | might_alloc(GFP_KERNEL); |
297 | mutex_release(&vm->mutex.dep_map, _THIS_IP_); |
298 | } |
299 | dma_resv_init(obj: &vm->_resv); |
300 | |
301 | GEM_BUG_ON(!vm->total); |
302 | drm_mm_init(mm: &vm->mm, start: 0, size: vm->total); |
303 | |
304 | memset64(s: vm->min_alignment, I915_GTT_MIN_ALIGNMENT, |
305 | ARRAY_SIZE(vm->min_alignment)); |
306 | |
307 | if (HAS_64K_PAGES(vm->i915)) { |
308 | vm->min_alignment[INTEL_MEMORY_LOCAL] = I915_GTT_PAGE_SIZE_64K; |
309 | vm->min_alignment[INTEL_MEMORY_STOLEN_LOCAL] = I915_GTT_PAGE_SIZE_64K; |
310 | } |
311 | |
312 | vm->mm.head_node.color = I915_COLOR_UNEVICTABLE; |
313 | |
314 | INIT_LIST_HEAD(list: &vm->bound_list); |
315 | INIT_LIST_HEAD(list: &vm->unbound_list); |
316 | } |
317 | |
318 | void *__px_vaddr(struct drm_i915_gem_object *p) |
319 | { |
320 | enum i915_map_type type; |
321 | |
322 | GEM_BUG_ON(!i915_gem_object_has_pages(p)); |
323 | return page_unpack_bits(p->mm.mapping, &type); |
324 | } |
325 | |
326 | dma_addr_t __px_dma(struct drm_i915_gem_object *p) |
327 | { |
328 | GEM_BUG_ON(!i915_gem_object_has_pages(p)); |
329 | return sg_dma_address(p->mm.pages->sgl); |
330 | } |
331 | |
332 | struct page *__px_page(struct drm_i915_gem_object *p) |
333 | { |
334 | GEM_BUG_ON(!i915_gem_object_has_pages(p)); |
335 | return sg_page(sg: p->mm.pages->sgl); |
336 | } |
337 | |
338 | void |
339 | fill_page_dma(struct drm_i915_gem_object *p, const u64 val, unsigned int count) |
340 | { |
341 | void *vaddr = __px_vaddr(p); |
342 | |
343 | memset64(s: vaddr, v: val, n: count); |
344 | drm_clflush_virt_range(addr: vaddr, PAGE_SIZE); |
345 | } |
346 | |
347 | static void poison_scratch_page(struct drm_i915_gem_object *scratch) |
348 | { |
349 | void *vaddr = __px_vaddr(p: scratch); |
350 | u8 val; |
351 | |
352 | val = 0; |
353 | if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) |
354 | val = POISON_FREE; |
355 | |
356 | memset(vaddr, val, scratch->base.size); |
357 | drm_clflush_virt_range(addr: vaddr, length: scratch->base.size); |
358 | } |
359 | |
360 | int setup_scratch_page(struct i915_address_space *vm) |
361 | { |
362 | unsigned long size; |
363 | |
364 | /* |
365 | * In order to utilize 64K pages for an object with a size < 2M, we will |
366 | * need to support a 64K scratch page, given that every 16th entry for a |
367 | * page-table operating in 64K mode must point to a properly aligned 64K |
368 | * region, including any PTEs which happen to point to scratch. |
369 | * |
370 | * This is only relevant for the 48b PPGTT where we support |
371 | * huge-gtt-pages, see also i915_vma_insert(). However, as we share the |
372 | * scratch (read-only) between all vm, we create one 64k scratch page |
373 | * for all. |
374 | */ |
375 | size = I915_GTT_PAGE_SIZE_4K; |
376 | if (i915_vm_is_4lvl(vm) && |
377 | HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K) && |
378 | !HAS_64K_PAGES(vm->i915)) |
379 | size = I915_GTT_PAGE_SIZE_64K; |
380 | |
381 | do { |
382 | struct drm_i915_gem_object *obj; |
383 | |
384 | obj = vm->alloc_scratch_dma(vm, size); |
385 | if (IS_ERR(ptr: obj)) |
386 | goto skip; |
387 | |
388 | if (map_pt_dma(vm, obj)) |
389 | goto skip_obj; |
390 | |
391 | /* We need a single contiguous page for our scratch */ |
392 | if (obj->mm.page_sizes.sg < size) |
393 | goto skip_obj; |
394 | |
395 | /* And it needs to be correspondingly aligned */ |
396 | if (__px_dma(p: obj) & (size - 1)) |
397 | goto skip_obj; |
398 | |
399 | /* |
400 | * Use a non-zero scratch page for debugging. |
401 | * |
402 | * We want a value that should be reasonably obvious |
403 | * to spot in the error state, while also causing a GPU hang |
404 | * if executed. We prefer using a clear page in production, so |
405 | * should it ever be accidentally used, the effect should be |
406 | * fairly benign. |
407 | */ |
408 | poison_scratch_page(scratch: obj); |
409 | |
410 | vm->scratch[0] = obj; |
411 | vm->scratch_order = get_order(size); |
412 | return 0; |
413 | |
414 | skip_obj: |
415 | i915_gem_object_put(obj); |
416 | skip: |
417 | if (size == I915_GTT_PAGE_SIZE_4K) |
418 | return -ENOMEM; |
419 | |
420 | size = I915_GTT_PAGE_SIZE_4K; |
421 | } while (1); |
422 | } |
423 | |
424 | void free_scratch(struct i915_address_space *vm) |
425 | { |
426 | int i; |
427 | |
428 | if (!vm->scratch[0]) |
429 | return; |
430 | |
431 | for (i = 0; i <= vm->top; i++) |
432 | i915_gem_object_put(obj: vm->scratch[i]); |
433 | } |
434 | |
435 | void gtt_write_workarounds(struct intel_gt *gt) |
436 | { |
437 | struct drm_i915_private *i915 = gt->i915; |
438 | struct intel_uncore *uncore = gt->uncore; |
439 | |
440 | /* |
441 | * This function is for gtt related workarounds. This function is |
442 | * called on driver load and after a GPU reset, so you can place |
443 | * workarounds here even if they get overwritten by GPU reset. |
444 | */ |
445 | /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */ |
446 | if (IS_BROADWELL(i915)) |
447 | intel_uncore_write(uncore, |
448 | GEN8_L3_LRA_1_GPGPU, |
449 | GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW); |
450 | else if (IS_CHERRYVIEW(i915)) |
451 | intel_uncore_write(uncore, |
452 | GEN8_L3_LRA_1_GPGPU, |
453 | GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV); |
454 | else if (IS_GEN9_LP(i915)) |
455 | intel_uncore_write(uncore, |
456 | GEN8_L3_LRA_1_GPGPU, |
457 | GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); |
458 | else if (GRAPHICS_VER(i915) >= 9 && GRAPHICS_VER(i915) <= 11) |
459 | intel_uncore_write(uncore, |
460 | GEN8_L3_LRA_1_GPGPU, |
461 | GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); |
462 | |
463 | /* |
464 | * To support 64K PTEs we need to first enable the use of the |
465 | * Intermediate-Page-Size(IPS) bit of the PDE field via some magical |
466 | * mmio, otherwise the page-walker will simply ignore the IPS bit. This |
467 | * shouldn't be needed after GEN10. |
468 | * |
469 | * 64K pages were first introduced from BDW+, although technically they |
470 | * only *work* from gen9+. For pre-BDW we instead have the option for |
471 | * 32K pages, but we don't currently have any support for it in our |
472 | * driver. |
473 | */ |
474 | if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K) && |
475 | GRAPHICS_VER(i915) <= 10) |
476 | intel_uncore_rmw(uncore, |
477 | GEN8_GAMW_ECO_DEV_RW_IA, |
478 | clear: 0, |
479 | GAMW_ECO_ENABLE_64K_IPS_FIELD); |
480 | |
481 | if (IS_GRAPHICS_VER(i915, 8, 11)) { |
482 | bool can_use_gtt_cache = true; |
483 | |
484 | /* |
485 | * According to the BSpec if we use 2M/1G pages then we also |
486 | * need to disable the GTT cache. At least on BDW we can see |
487 | * visual corruption when using 2M pages, and not disabling the |
488 | * GTT cache. |
489 | */ |
490 | if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_2M)) |
491 | can_use_gtt_cache = false; |
492 | |
493 | /* WaGttCachingOffByDefault */ |
494 | intel_uncore_write(uncore, |
495 | HSW_GTT_CACHE_EN, |
496 | val: can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0); |
497 | gt_WARN_ON_ONCE(gt, can_use_gtt_cache && |
498 | intel_uncore_read(uncore, |
499 | HSW_GTT_CACHE_EN) == 0); |
500 | } |
501 | } |
502 | |
503 | static void xelpmp_setup_private_ppat(struct intel_uncore *uncore) |
504 | { |
505 | intel_uncore_write(uncore, XELPMP_PAT_INDEX(0), |
506 | MTL_PPAT_L4_0_WB); |
507 | intel_uncore_write(uncore, XELPMP_PAT_INDEX(1), |
508 | MTL_PPAT_L4_1_WT); |
509 | intel_uncore_write(uncore, XELPMP_PAT_INDEX(2), |
510 | MTL_PPAT_L4_3_UC); |
511 | intel_uncore_write(uncore, XELPMP_PAT_INDEX(3), |
512 | MTL_PPAT_L4_0_WB | MTL_2_COH_1W); |
513 | intel_uncore_write(uncore, XELPMP_PAT_INDEX(4), |
514 | MTL_PPAT_L4_0_WB | MTL_3_COH_2W); |
515 | |
516 | /* |
517 | * Remaining PAT entries are left at the hardware-default |
518 | * fully-cached setting |
519 | */ |
520 | } |
521 | |
522 | static void xelpg_setup_private_ppat(struct intel_gt *gt) |
523 | { |
524 | intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(0), |
525 | MTL_PPAT_L4_0_WB); |
526 | intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(1), |
527 | MTL_PPAT_L4_1_WT); |
528 | intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(2), |
529 | MTL_PPAT_L4_3_UC); |
530 | intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(3), |
531 | MTL_PPAT_L4_0_WB | MTL_2_COH_1W); |
532 | intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(4), |
533 | MTL_PPAT_L4_0_WB | MTL_3_COH_2W); |
534 | |
535 | /* |
536 | * Remaining PAT entries are left at the hardware-default |
537 | * fully-cached setting |
538 | */ |
539 | } |
540 | |
541 | static void tgl_setup_private_ppat(struct intel_uncore *uncore) |
542 | { |
543 | /* TGL doesn't support LLC or AGE settings */ |
544 | intel_uncore_write(uncore, GEN12_PAT_INDEX(0), GEN8_PPAT_WB); |
545 | intel_uncore_write(uncore, GEN12_PAT_INDEX(1), GEN8_PPAT_WC); |
546 | intel_uncore_write(uncore, GEN12_PAT_INDEX(2), GEN8_PPAT_WT); |
547 | intel_uncore_write(uncore, GEN12_PAT_INDEX(3), GEN8_PPAT_UC); |
548 | intel_uncore_write(uncore, GEN12_PAT_INDEX(4), GEN8_PPAT_WB); |
549 | intel_uncore_write(uncore, GEN12_PAT_INDEX(5), GEN8_PPAT_WB); |
550 | intel_uncore_write(uncore, GEN12_PAT_INDEX(6), GEN8_PPAT_WB); |
551 | intel_uncore_write(uncore, GEN12_PAT_INDEX(7), GEN8_PPAT_WB); |
552 | } |
553 | |
554 | static void xehp_setup_private_ppat(struct intel_gt *gt) |
555 | { |
556 | enum forcewake_domains fw; |
557 | unsigned long flags; |
558 | |
559 | fw = intel_uncore_forcewake_for_reg(uncore: gt->uncore, _MMIO(XEHP_PAT_INDEX(0).reg), |
560 | FW_REG_WRITE); |
561 | intel_uncore_forcewake_get(uncore: gt->uncore, domains: fw); |
562 | |
563 | intel_gt_mcr_lock(gt, flags: &flags); |
564 | intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(0), GEN8_PPAT_WB); |
565 | intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(1), GEN8_PPAT_WC); |
566 | intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(2), GEN8_PPAT_WT); |
567 | intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(3), GEN8_PPAT_UC); |
568 | intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(4), GEN8_PPAT_WB); |
569 | intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(5), GEN8_PPAT_WB); |
570 | intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(6), GEN8_PPAT_WB); |
571 | intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(7), GEN8_PPAT_WB); |
572 | intel_gt_mcr_unlock(gt, flags); |
573 | |
574 | intel_uncore_forcewake_put(uncore: gt->uncore, domains: fw); |
575 | } |
576 | |
577 | static void icl_setup_private_ppat(struct intel_uncore *uncore) |
578 | { |
579 | intel_uncore_write(uncore, |
580 | GEN10_PAT_INDEX(0), |
581 | GEN8_PPAT_WB | GEN8_PPAT_LLC); |
582 | intel_uncore_write(uncore, |
583 | GEN10_PAT_INDEX(1), |
584 | GEN8_PPAT_WC | GEN8_PPAT_LLCELLC); |
585 | intel_uncore_write(uncore, |
586 | GEN10_PAT_INDEX(2), |
587 | GEN8_PPAT_WB | GEN8_PPAT_ELLC_OVERRIDE); |
588 | intel_uncore_write(uncore, |
589 | GEN10_PAT_INDEX(3), |
590 | GEN8_PPAT_UC); |
591 | intel_uncore_write(uncore, |
592 | GEN10_PAT_INDEX(4), |
593 | GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)); |
594 | intel_uncore_write(uncore, |
595 | GEN10_PAT_INDEX(5), |
596 | GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)); |
597 | intel_uncore_write(uncore, |
598 | GEN10_PAT_INDEX(6), |
599 | GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)); |
600 | intel_uncore_write(uncore, |
601 | GEN10_PAT_INDEX(7), |
602 | GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); |
603 | } |
604 | |
605 | /* |
606 | * The GGTT and PPGTT need a private PPAT setup in order to handle cacheability |
607 | * bits. When using advanced contexts each context stores its own PAT, but |
608 | * writing this data shouldn't be harmful even in those cases. |
609 | */ |
610 | static void bdw_setup_private_ppat(struct intel_uncore *uncore) |
611 | { |
612 | struct drm_i915_private *i915 = uncore->i915; |
613 | u64 pat; |
614 | |
615 | pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */ |
616 | GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */ |
617 | GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */ |
618 | GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) | |
619 | GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) | |
620 | GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) | |
621 | GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); |
622 | |
623 | /* for scanout with eLLC */ |
624 | if (GRAPHICS_VER(i915) >= 9) |
625 | pat |= GEN8_PPAT(2, GEN8_PPAT_WB | GEN8_PPAT_ELLC_OVERRIDE); |
626 | else |
627 | pat |= GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC); |
628 | |
629 | intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat)); |
630 | intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); |
631 | } |
632 | |
633 | static void chv_setup_private_ppat(struct intel_uncore *uncore) |
634 | { |
635 | u64 pat; |
636 | |
637 | /* |
638 | * Map WB on BDW to snooped on CHV. |
639 | * |
640 | * Only the snoop bit has meaning for CHV, the rest is |
641 | * ignored. |
642 | * |
643 | * The hardware will never snoop for certain types of accesses: |
644 | * - CPU GTT (GMADR->GGTT->no snoop->memory) |
645 | * - PPGTT page tables |
646 | * - some other special cycles |
647 | * |
648 | * As with BDW, we also need to consider the following for GT accesses: |
649 | * "For GGTT, there is NO pat_sel[2:0] from the entry, |
650 | * so RTL will always use the value corresponding to |
651 | * pat_sel = 000". |
652 | * Which means we must set the snoop bit in PAT entry 0 |
653 | * in order to keep the global status page working. |
654 | */ |
655 | |
656 | pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) | |
657 | GEN8_PPAT(1, 0) | |
658 | GEN8_PPAT(2, 0) | |
659 | GEN8_PPAT(3, 0) | |
660 | GEN8_PPAT(4, CHV_PPAT_SNOOP) | |
661 | GEN8_PPAT(5, CHV_PPAT_SNOOP) | |
662 | GEN8_PPAT(6, CHV_PPAT_SNOOP) | |
663 | GEN8_PPAT(7, CHV_PPAT_SNOOP); |
664 | |
665 | intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat)); |
666 | intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); |
667 | } |
668 | |
669 | void setup_private_pat(struct intel_gt *gt) |
670 | { |
671 | struct intel_uncore *uncore = gt->uncore; |
672 | struct drm_i915_private *i915 = gt->i915; |
673 | |
674 | GEM_BUG_ON(GRAPHICS_VER(i915) < 8); |
675 | |
676 | if (gt->type == GT_MEDIA) { |
677 | xelpmp_setup_private_ppat(uncore: gt->uncore); |
678 | return; |
679 | } |
680 | |
681 | if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) |
682 | xelpg_setup_private_ppat(gt); |
683 | else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) |
684 | xehp_setup_private_ppat(gt); |
685 | else if (GRAPHICS_VER(i915) >= 12) |
686 | tgl_setup_private_ppat(uncore); |
687 | else if (GRAPHICS_VER(i915) >= 11) |
688 | icl_setup_private_ppat(uncore); |
689 | else if (IS_CHERRYVIEW(i915) || IS_GEN9_LP(i915)) |
690 | chv_setup_private_ppat(uncore); |
691 | else |
692 | bdw_setup_private_ppat(uncore); |
693 | } |
694 | |
695 | struct i915_vma * |
696 | __vm_create_scratch_for_read(struct i915_address_space *vm, unsigned long size) |
697 | { |
698 | struct drm_i915_gem_object *obj; |
699 | struct i915_vma *vma; |
700 | |
701 | obj = i915_gem_object_create_internal(i915: vm->i915, PAGE_ALIGN(size)); |
702 | if (IS_ERR(ptr: obj)) |
703 | return ERR_CAST(ptr: obj); |
704 | |
705 | i915_gem_object_set_cache_coherency(obj, cache_level: I915_CACHE_LLC); |
706 | |
707 | vma = i915_vma_instance(obj, vm, NULL); |
708 | if (IS_ERR(ptr: vma)) { |
709 | i915_gem_object_put(obj); |
710 | return vma; |
711 | } |
712 | |
713 | return vma; |
714 | } |
715 | |
716 | struct i915_vma * |
717 | __vm_create_scratch_for_read_pinned(struct i915_address_space *vm, unsigned long size) |
718 | { |
719 | struct i915_vma *vma; |
720 | int err; |
721 | |
722 | vma = __vm_create_scratch_for_read(vm, size); |
723 | if (IS_ERR(ptr: vma)) |
724 | return vma; |
725 | |
726 | err = i915_vma_pin(vma, size: 0, alignment: 0, |
727 | flags: i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER); |
728 | if (err) { |
729 | i915_vma_put(vma); |
730 | return ERR_PTR(error: err); |
731 | } |
732 | |
733 | return vma; |
734 | } |
735 | |
736 | #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) |
737 | #include "selftests/mock_gtt.c" |
738 | #endif |
739 | |