1 | // SPDX-License-Identifier: MIT |
2 | /* |
3 | * Copyright © 2020 Intel Corporation |
4 | */ |
5 | |
6 | #include <linux/log2.h> |
7 | |
8 | #include "gem/i915_gem_internal.h" |
9 | #include "gem/i915_gem_lmem.h" |
10 | |
11 | #include "gen8_ppgtt.h" |
12 | #include "i915_scatterlist.h" |
13 | #include "i915_trace.h" |
14 | #include "i915_pvinfo.h" |
15 | #include "i915_vgpu.h" |
16 | #include "intel_gt.h" |
17 | #include "intel_gtt.h" |
18 | |
19 | static u64 gen8_pde_encode(const dma_addr_t addr, |
20 | const enum i915_cache_level level) |
21 | { |
22 | u64 pde = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW; |
23 | |
24 | if (level != I915_CACHE_NONE) |
25 | pde |= PPAT_CACHED_PDE; |
26 | else |
27 | pde |= PPAT_UNCACHED; |
28 | |
29 | return pde; |
30 | } |
31 | |
32 | static u64 gen8_pte_encode(dma_addr_t addr, |
33 | unsigned int pat_index, |
34 | u32 flags) |
35 | { |
36 | gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW; |
37 | |
38 | if (unlikely(flags & PTE_READ_ONLY)) |
39 | pte &= ~GEN8_PAGE_RW; |
40 | |
41 | /* |
42 | * For pre-gen12 platforms pat_index is the same as enum |
43 | * i915_cache_level, so the switch-case here is still valid. |
44 | * See translation table defined by LEGACY_CACHELEVEL. |
45 | */ |
46 | switch (pat_index) { |
47 | case I915_CACHE_NONE: |
48 | pte |= PPAT_UNCACHED; |
49 | break; |
50 | case I915_CACHE_WT: |
51 | pte |= PPAT_DISPLAY_ELLC; |
52 | break; |
53 | default: |
54 | pte |= PPAT_CACHED; |
55 | break; |
56 | } |
57 | |
58 | return pte; |
59 | } |
60 | |
61 | static u64 gen12_pte_encode(dma_addr_t addr, |
62 | unsigned int pat_index, |
63 | u32 flags) |
64 | { |
65 | gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW; |
66 | |
67 | if (unlikely(flags & PTE_READ_ONLY)) |
68 | pte &= ~GEN8_PAGE_RW; |
69 | |
70 | if (flags & PTE_LM) |
71 | pte |= GEN12_PPGTT_PTE_LM; |
72 | |
73 | if (pat_index & BIT(0)) |
74 | pte |= GEN12_PPGTT_PTE_PAT0; |
75 | |
76 | if (pat_index & BIT(1)) |
77 | pte |= GEN12_PPGTT_PTE_PAT1; |
78 | |
79 | if (pat_index & BIT(2)) |
80 | pte |= GEN12_PPGTT_PTE_PAT2; |
81 | |
82 | if (pat_index & BIT(3)) |
83 | pte |= MTL_PPGTT_PTE_PAT3; |
84 | |
85 | return pte; |
86 | } |
87 | |
88 | static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create) |
89 | { |
90 | struct drm_i915_private *i915 = ppgtt->vm.i915; |
91 | struct intel_uncore *uncore = ppgtt->vm.gt->uncore; |
92 | enum vgt_g2v_type msg; |
93 | int i; |
94 | |
95 | if (create) |
96 | atomic_inc(px_used(ppgtt->pd)); /* never remove */ |
97 | else |
98 | atomic_dec(px_used(ppgtt->pd)); |
99 | |
100 | mutex_lock(&i915->vgpu.lock); |
101 | |
102 | if (i915_vm_is_4lvl(vm: &ppgtt->vm)) { |
103 | const u64 daddr = px_dma(ppgtt->pd); |
104 | |
105 | intel_uncore_write(uncore, |
106 | vgtif_reg(pdp[0].lo), lower_32_bits(daddr)); |
107 | intel_uncore_write(uncore, |
108 | vgtif_reg(pdp[0].hi), upper_32_bits(daddr)); |
109 | |
110 | msg = create ? |
111 | VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE : |
112 | VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY; |
113 | } else { |
114 | for (i = 0; i < GEN8_3LVL_PDPES; i++) { |
115 | const u64 daddr = i915_page_dir_dma_addr(ppgtt, n: i); |
116 | |
117 | intel_uncore_write(uncore, |
118 | vgtif_reg(pdp[i].lo), |
119 | lower_32_bits(daddr)); |
120 | intel_uncore_write(uncore, |
121 | vgtif_reg(pdp[i].hi), |
122 | upper_32_bits(daddr)); |
123 | } |
124 | |
125 | msg = create ? |
126 | VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE : |
127 | VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY; |
128 | } |
129 | |
130 | /* g2v_notify atomically (via hv trap) consumes the message packet. */ |
131 | intel_uncore_write(uncore, vgtif_reg(g2v_notify), val: msg); |
132 | |
133 | mutex_unlock(lock: &i915->vgpu.lock); |
134 | } |
135 | |
136 | /* Index shifts into the pagetable are offset by GEN8_PTE_SHIFT [12] */ |
137 | #define GEN8_PAGE_SIZE (SZ_4K) /* page and page-directory sizes are the same */ |
138 | #define GEN8_PTE_SHIFT (ilog2(GEN8_PAGE_SIZE)) |
139 | #define GEN8_PDES (GEN8_PAGE_SIZE / sizeof(u64)) |
140 | #define gen8_pd_shift(lvl) ((lvl) * ilog2(GEN8_PDES)) |
141 | #define gen8_pd_index(i, lvl) i915_pde_index((i), gen8_pd_shift(lvl)) |
142 | #define __gen8_pte_shift(lvl) (GEN8_PTE_SHIFT + gen8_pd_shift(lvl)) |
143 | #define __gen8_pte_index(a, lvl) i915_pde_index((a), __gen8_pte_shift(lvl)) |
144 | |
145 | #define as_pd(x) container_of((x), typeof(struct i915_page_directory), pt) |
146 | |
147 | static unsigned int |
148 | gen8_pd_range(u64 start, u64 end, int lvl, unsigned int *idx) |
149 | { |
150 | const int shift = gen8_pd_shift(lvl); |
151 | const u64 mask = ~0ull << gen8_pd_shift(lvl + 1); |
152 | |
153 | GEM_BUG_ON(start >= end); |
154 | end += ~mask >> gen8_pd_shift(1); |
155 | |
156 | *idx = i915_pde_index(addr: start, shift); |
157 | if ((start ^ end) & mask) |
158 | return GEN8_PDES - *idx; |
159 | else |
160 | return i915_pde_index(addr: end, shift) - *idx; |
161 | } |
162 | |
163 | static bool gen8_pd_contains(u64 start, u64 end, int lvl) |
164 | { |
165 | const u64 mask = ~0ull << gen8_pd_shift(lvl + 1); |
166 | |
167 | GEM_BUG_ON(start >= end); |
168 | return (start ^ end) & mask && (start & ~mask) == 0; |
169 | } |
170 | |
171 | static unsigned int gen8_pt_count(u64 start, u64 end) |
172 | { |
173 | GEM_BUG_ON(start >= end); |
174 | if ((start ^ end) >> gen8_pd_shift(1)) |
175 | return GEN8_PDES - (start & (GEN8_PDES - 1)); |
176 | else |
177 | return end - start; |
178 | } |
179 | |
180 | static unsigned int gen8_pd_top_count(const struct i915_address_space *vm) |
181 | { |
182 | unsigned int shift = __gen8_pte_shift(vm->top); |
183 | |
184 | return (vm->total + (1ull << shift) - 1) >> shift; |
185 | } |
186 | |
187 | static struct i915_page_directory * |
188 | gen8_pdp_for_page_index(struct i915_address_space * const vm, const u64 idx) |
189 | { |
190 | struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm); |
191 | |
192 | if (vm->top == 2) |
193 | return ppgtt->pd; |
194 | else |
195 | return i915_pd_entry(pdp: ppgtt->pd, gen8_pd_index(idx, vm->top)); |
196 | } |
197 | |
198 | static struct i915_page_directory * |
199 | gen8_pdp_for_page_address(struct i915_address_space * const vm, const u64 addr) |
200 | { |
201 | return gen8_pdp_for_page_index(vm, idx: addr >> GEN8_PTE_SHIFT); |
202 | } |
203 | |
204 | static void __gen8_ppgtt_cleanup(struct i915_address_space *vm, |
205 | struct i915_page_directory *pd, |
206 | int count, int lvl) |
207 | { |
208 | if (lvl) { |
209 | void **pde = pd->entry; |
210 | |
211 | do { |
212 | if (!*pde) |
213 | continue; |
214 | |
215 | __gen8_ppgtt_cleanup(vm, pd: *pde, GEN8_PDES, lvl: lvl - 1); |
216 | } while (pde++, --count); |
217 | } |
218 | |
219 | free_px(vm, pt: &pd->pt, lvl); |
220 | } |
221 | |
222 | static void gen8_ppgtt_cleanup(struct i915_address_space *vm) |
223 | { |
224 | struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); |
225 | |
226 | if (vm->rsvd.obj) |
227 | i915_gem_object_put(obj: vm->rsvd.obj); |
228 | |
229 | if (intel_vgpu_active(i915: vm->i915)) |
230 | gen8_ppgtt_notify_vgt(ppgtt, create: false); |
231 | |
232 | if (ppgtt->pd) |
233 | __gen8_ppgtt_cleanup(vm, pd: ppgtt->pd, |
234 | count: gen8_pd_top_count(vm), lvl: vm->top); |
235 | |
236 | free_scratch(vm); |
237 | } |
238 | |
239 | static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm, |
240 | struct i915_page_directory * const pd, |
241 | u64 start, const u64 end, int lvl) |
242 | { |
243 | const struct drm_i915_gem_object * const scratch = vm->scratch[lvl]; |
244 | unsigned int idx, len; |
245 | |
246 | GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT); |
247 | |
248 | len = gen8_pd_range(start, end, lvl: lvl--, idx: &idx); |
249 | GTT_TRACE("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n" , |
250 | __func__, vm, lvl + 1, start, end, |
251 | idx, len, atomic_read(px_used(pd))); |
252 | GEM_BUG_ON(!len || len >= atomic_read(px_used(pd))); |
253 | |
254 | do { |
255 | struct i915_page_table *pt = pd->entry[idx]; |
256 | |
257 | if (atomic_fetch_inc(v: &pt->used) >> gen8_pd_shift(1) && |
258 | gen8_pd_contains(start, end, lvl)) { |
259 | GTT_TRACE("%s(%p):{ lvl:%d, idx:%d, start:%llx, end:%llx } removing pd\n" , |
260 | __func__, vm, lvl + 1, idx, start, end); |
261 | clear_pd_entry(pd, idx, scratch); |
262 | __gen8_ppgtt_cleanup(vm, as_pd(pt), I915_PDES, lvl); |
263 | start += (u64)I915_PDES << gen8_pd_shift(lvl); |
264 | continue; |
265 | } |
266 | |
267 | if (lvl) { |
268 | start = __gen8_ppgtt_clear(vm, as_pd(pt), |
269 | start, end, lvl); |
270 | } else { |
271 | unsigned int count; |
272 | unsigned int pte = gen8_pd_index(start, 0); |
273 | unsigned int num_ptes; |
274 | u64 *vaddr; |
275 | |
276 | count = gen8_pt_count(start, end); |
277 | GTT_TRACE("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } removing pte\n" , |
278 | __func__, vm, lvl, start, end, |
279 | gen8_pd_index(start, 0), count, |
280 | atomic_read(&pt->used)); |
281 | GEM_BUG_ON(!count || count >= atomic_read(&pt->used)); |
282 | |
283 | num_ptes = count; |
284 | if (pt->is_compact) { |
285 | GEM_BUG_ON(num_ptes % 16); |
286 | GEM_BUG_ON(pte % 16); |
287 | num_ptes /= 16; |
288 | pte /= 16; |
289 | } |
290 | |
291 | vaddr = px_vaddr(pt); |
292 | memset64(s: vaddr + pte, |
293 | v: vm->scratch[0]->encode, |
294 | n: num_ptes); |
295 | |
296 | atomic_sub(i: count, v: &pt->used); |
297 | start += count; |
298 | } |
299 | |
300 | if (release_pd_entry(pd, idx, pt, scratch)) |
301 | free_px(vm, pt, lvl); |
302 | } while (idx++, --len); |
303 | |
304 | return start; |
305 | } |
306 | |
307 | static void gen8_ppgtt_clear(struct i915_address_space *vm, |
308 | u64 start, u64 length) |
309 | { |
310 | GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT))); |
311 | GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT))); |
312 | GEM_BUG_ON(range_overflows(start, length, vm->total)); |
313 | |
314 | start >>= GEN8_PTE_SHIFT; |
315 | length >>= GEN8_PTE_SHIFT; |
316 | GEM_BUG_ON(length == 0); |
317 | |
318 | __gen8_ppgtt_clear(vm, pd: i915_vm_to_ppgtt(vm)->pd, |
319 | start, end: start + length, lvl: vm->top); |
320 | } |
321 | |
322 | static void __gen8_ppgtt_alloc(struct i915_address_space * const vm, |
323 | struct i915_vm_pt_stash *stash, |
324 | struct i915_page_directory * const pd, |
325 | u64 * const start, const u64 end, int lvl) |
326 | { |
327 | unsigned int idx, len; |
328 | |
329 | GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT); |
330 | |
331 | len = gen8_pd_range(start: *start, end, lvl: lvl--, idx: &idx); |
332 | GTT_TRACE("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n" , |
333 | __func__, vm, lvl + 1, *start, end, |
334 | idx, len, atomic_read(px_used(pd))); |
335 | GEM_BUG_ON(!len || (idx + len - 1) >> gen8_pd_shift(1)); |
336 | |
337 | spin_lock(lock: &pd->lock); |
338 | GEM_BUG_ON(!atomic_read(px_used(pd))); /* Must be pinned! */ |
339 | do { |
340 | struct i915_page_table *pt = pd->entry[idx]; |
341 | |
342 | if (!pt) { |
343 | spin_unlock(lock: &pd->lock); |
344 | |
345 | GTT_TRACE("%s(%p):{ lvl:%d, idx:%d } allocating new tree\n" , |
346 | __func__, vm, lvl + 1, idx); |
347 | |
348 | pt = stash->pt[!!lvl]; |
349 | __i915_gem_object_pin_pages(obj: pt->base); |
350 | |
351 | fill_px(pt, vm->scratch[lvl]->encode); |
352 | |
353 | spin_lock(lock: &pd->lock); |
354 | if (likely(!pd->entry[idx])) { |
355 | stash->pt[!!lvl] = pt->stash; |
356 | atomic_set(v: &pt->used, i: 0); |
357 | set_pd_entry(pd, idx, pt); |
358 | } else { |
359 | pt = pd->entry[idx]; |
360 | } |
361 | } |
362 | |
363 | if (lvl) { |
364 | atomic_inc(v: &pt->used); |
365 | spin_unlock(lock: &pd->lock); |
366 | |
367 | __gen8_ppgtt_alloc(vm, stash, |
368 | as_pd(pt), start, end, lvl); |
369 | |
370 | spin_lock(lock: &pd->lock); |
371 | atomic_dec(v: &pt->used); |
372 | GEM_BUG_ON(!atomic_read(&pt->used)); |
373 | } else { |
374 | unsigned int count = gen8_pt_count(start: *start, end); |
375 | |
376 | GTT_TRACE("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } inserting pte\n" , |
377 | __func__, vm, lvl, *start, end, |
378 | gen8_pd_index(*start, 0), count, |
379 | atomic_read(&pt->used)); |
380 | |
381 | atomic_add(i: count, v: &pt->used); |
382 | /* All other pdes may be simultaneously removed */ |
383 | GEM_BUG_ON(atomic_read(&pt->used) > NALLOC * I915_PDES); |
384 | *start += count; |
385 | } |
386 | } while (idx++, --len); |
387 | spin_unlock(lock: &pd->lock); |
388 | } |
389 | |
390 | static void gen8_ppgtt_alloc(struct i915_address_space *vm, |
391 | struct i915_vm_pt_stash *stash, |
392 | u64 start, u64 length) |
393 | { |
394 | GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT))); |
395 | GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT))); |
396 | GEM_BUG_ON(range_overflows(start, length, vm->total)); |
397 | |
398 | start >>= GEN8_PTE_SHIFT; |
399 | length >>= GEN8_PTE_SHIFT; |
400 | GEM_BUG_ON(length == 0); |
401 | |
402 | __gen8_ppgtt_alloc(vm, stash, pd: i915_vm_to_ppgtt(vm)->pd, |
403 | start: &start, end: start + length, lvl: vm->top); |
404 | } |
405 | |
406 | static void __gen8_ppgtt_foreach(struct i915_address_space *vm, |
407 | struct i915_page_directory *pd, |
408 | u64 *start, u64 end, int lvl, |
409 | void (*fn)(struct i915_address_space *vm, |
410 | struct i915_page_table *pt, |
411 | void *data), |
412 | void *data) |
413 | { |
414 | unsigned int idx, len; |
415 | |
416 | len = gen8_pd_range(start: *start, end, lvl: lvl--, idx: &idx); |
417 | |
418 | spin_lock(lock: &pd->lock); |
419 | do { |
420 | struct i915_page_table *pt = pd->entry[idx]; |
421 | |
422 | atomic_inc(v: &pt->used); |
423 | spin_unlock(lock: &pd->lock); |
424 | |
425 | if (lvl) { |
426 | __gen8_ppgtt_foreach(vm, as_pd(pt), start, end, lvl, |
427 | fn, data); |
428 | } else { |
429 | fn(vm, pt, data); |
430 | *start += gen8_pt_count(start: *start, end); |
431 | } |
432 | |
433 | spin_lock(lock: &pd->lock); |
434 | atomic_dec(v: &pt->used); |
435 | } while (idx++, --len); |
436 | spin_unlock(lock: &pd->lock); |
437 | } |
438 | |
439 | static void gen8_ppgtt_foreach(struct i915_address_space *vm, |
440 | u64 start, u64 length, |
441 | void (*fn)(struct i915_address_space *vm, |
442 | struct i915_page_table *pt, |
443 | void *data), |
444 | void *data) |
445 | { |
446 | start >>= GEN8_PTE_SHIFT; |
447 | length >>= GEN8_PTE_SHIFT; |
448 | |
449 | __gen8_ppgtt_foreach(vm, pd: i915_vm_to_ppgtt(vm)->pd, |
450 | start: &start, end: start + length, lvl: vm->top, |
451 | fn, data); |
452 | } |
453 | |
454 | static __always_inline u64 |
455 | gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt, |
456 | struct i915_page_directory *pdp, |
457 | struct sgt_dma *iter, |
458 | u64 idx, |
459 | unsigned int pat_index, |
460 | u32 flags) |
461 | { |
462 | struct i915_page_directory *pd; |
463 | const gen8_pte_t pte_encode = ppgtt->vm.pte_encode(0, pat_index, flags); |
464 | gen8_pte_t *vaddr; |
465 | |
466 | pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2)); |
467 | vaddr = px_vaddr(i915_pt_entry(pd, gen8_pd_index(idx, 1))); |
468 | do { |
469 | GEM_BUG_ON(sg_dma_len(iter->sg) < I915_GTT_PAGE_SIZE); |
470 | vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma; |
471 | |
472 | iter->dma += I915_GTT_PAGE_SIZE; |
473 | if (iter->dma >= iter->max) { |
474 | iter->sg = __sg_next(sg: iter->sg); |
475 | if (!iter->sg || sg_dma_len(iter->sg) == 0) { |
476 | idx = 0; |
477 | break; |
478 | } |
479 | |
480 | iter->dma = sg_dma_address(iter->sg); |
481 | iter->max = iter->dma + sg_dma_len(iter->sg); |
482 | } |
483 | |
484 | if (gen8_pd_index(++idx, 0) == 0) { |
485 | if (gen8_pd_index(idx, 1) == 0) { |
486 | /* Limited by sg length for 3lvl */ |
487 | if (gen8_pd_index(idx, 2) == 0) |
488 | break; |
489 | |
490 | pd = pdp->entry[gen8_pd_index(idx, 2)]; |
491 | } |
492 | |
493 | drm_clflush_virt_range(addr: vaddr, PAGE_SIZE); |
494 | vaddr = px_vaddr(i915_pt_entry(pd, gen8_pd_index(idx, 1))); |
495 | } |
496 | } while (1); |
497 | drm_clflush_virt_range(addr: vaddr, PAGE_SIZE); |
498 | |
499 | return idx; |
500 | } |
501 | |
502 | static void |
503 | xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm, |
504 | struct i915_vma_resource *vma_res, |
505 | struct sgt_dma *iter, |
506 | unsigned int pat_index, |
507 | u32 flags) |
508 | { |
509 | const gen8_pte_t pte_encode = vm->pte_encode(0, pat_index, flags); |
510 | unsigned int rem = sg_dma_len(iter->sg); |
511 | u64 start = vma_res->start; |
512 | u64 end = start + vma_res->vma_size; |
513 | |
514 | GEM_BUG_ON(!i915_vm_is_4lvl(vm)); |
515 | |
516 | do { |
517 | struct i915_page_directory * const pdp = |
518 | gen8_pdp_for_page_address(vm, addr: start); |
519 | struct i915_page_directory * const pd = |
520 | i915_pd_entry(pdp, __gen8_pte_index(start, 2)); |
521 | struct i915_page_table *pt = |
522 | i915_pt_entry(pd, __gen8_pte_index(start, 1)); |
523 | gen8_pte_t encode = pte_encode; |
524 | unsigned int page_size; |
525 | gen8_pte_t *vaddr; |
526 | u16 index, max, nent, i; |
527 | |
528 | max = I915_PDES; |
529 | nent = 1; |
530 | |
531 | if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_2M && |
532 | IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) && |
533 | rem >= I915_GTT_PAGE_SIZE_2M && |
534 | !__gen8_pte_index(start, 0)) { |
535 | index = __gen8_pte_index(start, 1); |
536 | encode |= GEN8_PDE_PS_2M; |
537 | page_size = I915_GTT_PAGE_SIZE_2M; |
538 | |
539 | vaddr = px_vaddr(pd); |
540 | } else { |
541 | index = __gen8_pte_index(start, 0); |
542 | page_size = I915_GTT_PAGE_SIZE; |
543 | |
544 | if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_64K) { |
545 | /* |
546 | * Device local-memory on these platforms should |
547 | * always use 64K pages or larger (including GTT |
548 | * alignment), therefore if we know the whole |
549 | * page-table needs to be filled we can always |
550 | * safely use the compact-layout. Otherwise fall |
551 | * back to the TLB hint with PS64. If this is |
552 | * system memory we only bother with PS64. |
553 | */ |
554 | if ((encode & GEN12_PPGTT_PTE_LM) && |
555 | end - start >= SZ_2M && !index) { |
556 | index = __gen8_pte_index(start, 0) / 16; |
557 | page_size = I915_GTT_PAGE_SIZE_64K; |
558 | |
559 | max /= 16; |
560 | |
561 | vaddr = px_vaddr(pd); |
562 | vaddr[__gen8_pte_index(start, 1)] |= GEN12_PDE_64K; |
563 | |
564 | pt->is_compact = true; |
565 | } else if (IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && |
566 | rem >= I915_GTT_PAGE_SIZE_64K && |
567 | !(index % 16)) { |
568 | encode |= GEN12_PTE_PS64; |
569 | page_size = I915_GTT_PAGE_SIZE_64K; |
570 | nent = 16; |
571 | } |
572 | } |
573 | |
574 | vaddr = px_vaddr(pt); |
575 | } |
576 | |
577 | do { |
578 | GEM_BUG_ON(rem < page_size); |
579 | |
580 | for (i = 0; i < nent; i++) { |
581 | vaddr[index++] = |
582 | encode | (iter->dma + i * |
583 | I915_GTT_PAGE_SIZE); |
584 | } |
585 | |
586 | start += page_size; |
587 | iter->dma += page_size; |
588 | rem -= page_size; |
589 | if (iter->dma >= iter->max) { |
590 | iter->sg = __sg_next(sg: iter->sg); |
591 | if (!iter->sg) |
592 | break; |
593 | |
594 | rem = sg_dma_len(iter->sg); |
595 | if (!rem) |
596 | break; |
597 | |
598 | iter->dma = sg_dma_address(iter->sg); |
599 | iter->max = iter->dma + rem; |
600 | |
601 | if (unlikely(!IS_ALIGNED(iter->dma, page_size))) |
602 | break; |
603 | } |
604 | } while (rem >= page_size && index < max); |
605 | |
606 | drm_clflush_virt_range(addr: vaddr, PAGE_SIZE); |
607 | vma_res->page_sizes_gtt |= page_size; |
608 | } while (iter->sg && sg_dma_len(iter->sg)); |
609 | } |
610 | |
611 | static void gen8_ppgtt_insert_huge(struct i915_address_space *vm, |
612 | struct i915_vma_resource *vma_res, |
613 | struct sgt_dma *iter, |
614 | unsigned int pat_index, |
615 | u32 flags) |
616 | { |
617 | const gen8_pte_t pte_encode = vm->pte_encode(0, pat_index, flags); |
618 | unsigned int rem = sg_dma_len(iter->sg); |
619 | u64 start = vma_res->start; |
620 | |
621 | GEM_BUG_ON(!i915_vm_is_4lvl(vm)); |
622 | |
623 | do { |
624 | struct i915_page_directory * const pdp = |
625 | gen8_pdp_for_page_address(vm, addr: start); |
626 | struct i915_page_directory * const pd = |
627 | i915_pd_entry(pdp, __gen8_pte_index(start, 2)); |
628 | gen8_pte_t encode = pte_encode; |
629 | unsigned int maybe_64K = -1; |
630 | unsigned int page_size; |
631 | gen8_pte_t *vaddr; |
632 | u16 index; |
633 | |
634 | if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_2M && |
635 | IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) && |
636 | rem >= I915_GTT_PAGE_SIZE_2M && |
637 | !__gen8_pte_index(start, 0)) { |
638 | index = __gen8_pte_index(start, 1); |
639 | encode |= GEN8_PDE_PS_2M; |
640 | page_size = I915_GTT_PAGE_SIZE_2M; |
641 | |
642 | vaddr = px_vaddr(pd); |
643 | } else { |
644 | struct i915_page_table *pt = |
645 | i915_pt_entry(pd, __gen8_pte_index(start, 1)); |
646 | |
647 | index = __gen8_pte_index(start, 0); |
648 | page_size = I915_GTT_PAGE_SIZE; |
649 | |
650 | if (!index && |
651 | vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_64K && |
652 | IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && |
653 | (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) || |
654 | rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE)) |
655 | maybe_64K = __gen8_pte_index(start, 1); |
656 | |
657 | vaddr = px_vaddr(pt); |
658 | } |
659 | |
660 | do { |
661 | GEM_BUG_ON(sg_dma_len(iter->sg) < page_size); |
662 | vaddr[index++] = encode | iter->dma; |
663 | |
664 | start += page_size; |
665 | iter->dma += page_size; |
666 | rem -= page_size; |
667 | if (iter->dma >= iter->max) { |
668 | iter->sg = __sg_next(sg: iter->sg); |
669 | if (!iter->sg) |
670 | break; |
671 | |
672 | rem = sg_dma_len(iter->sg); |
673 | if (!rem) |
674 | break; |
675 | |
676 | iter->dma = sg_dma_address(iter->sg); |
677 | iter->max = iter->dma + rem; |
678 | |
679 | if (maybe_64K != -1 && index < I915_PDES && |
680 | !(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && |
681 | (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) || |
682 | rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE))) |
683 | maybe_64K = -1; |
684 | |
685 | if (unlikely(!IS_ALIGNED(iter->dma, page_size))) |
686 | break; |
687 | } |
688 | } while (rem >= page_size && index < I915_PDES); |
689 | |
690 | drm_clflush_virt_range(addr: vaddr, PAGE_SIZE); |
691 | |
692 | /* |
693 | * Is it safe to mark the 2M block as 64K? -- Either we have |
694 | * filled whole page-table with 64K entries, or filled part of |
695 | * it and have reached the end of the sg table and we have |
696 | * enough padding. |
697 | */ |
698 | if (maybe_64K != -1 && |
699 | (index == I915_PDES || |
700 | (i915_vm_has_scratch_64K(vm) && |
701 | !iter->sg && IS_ALIGNED(vma_res->start + |
702 | vma_res->node_size, |
703 | I915_GTT_PAGE_SIZE_2M)))) { |
704 | vaddr = px_vaddr(pd); |
705 | vaddr[maybe_64K] |= GEN8_PDE_IPS_64K; |
706 | drm_clflush_virt_range(addr: vaddr, PAGE_SIZE); |
707 | page_size = I915_GTT_PAGE_SIZE_64K; |
708 | |
709 | /* |
710 | * We write all 4K page entries, even when using 64K |
711 | * pages. In order to verify that the HW isn't cheating |
712 | * by using the 4K PTE instead of the 64K PTE, we want |
713 | * to remove all the surplus entries. If the HW skipped |
714 | * the 64K PTE, it will read/write into the scratch page |
715 | * instead - which we detect as missing results during |
716 | * selftests. |
717 | */ |
718 | if (I915_SELFTEST_ONLY(vm->scrub_64K)) { |
719 | u16 i; |
720 | |
721 | encode = vm->scratch[0]->encode; |
722 | vaddr = px_vaddr(i915_pt_entry(pd, maybe_64K)); |
723 | |
724 | for (i = 1; i < index; i += 16) |
725 | memset64(s: vaddr + i, v: encode, n: 15); |
726 | |
727 | drm_clflush_virt_range(addr: vaddr, PAGE_SIZE); |
728 | } |
729 | } |
730 | |
731 | vma_res->page_sizes_gtt |= page_size; |
732 | } while (iter->sg && sg_dma_len(iter->sg)); |
733 | } |
734 | |
735 | static void gen8_ppgtt_insert(struct i915_address_space *vm, |
736 | struct i915_vma_resource *vma_res, |
737 | unsigned int pat_index, |
738 | u32 flags) |
739 | { |
740 | struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm); |
741 | struct sgt_dma iter = sgt_dma(vma_res); |
742 | |
743 | if (vma_res->bi.page_sizes.sg > I915_GTT_PAGE_SIZE) { |
744 | if (GRAPHICS_VER_FULL(vm->i915) >= IP_VER(12, 50)) |
745 | xehpsdv_ppgtt_insert_huge(vm, vma_res, iter: &iter, pat_index, flags); |
746 | else |
747 | gen8_ppgtt_insert_huge(vm, vma_res, iter: &iter, pat_index, flags); |
748 | } else { |
749 | u64 idx = vma_res->start >> GEN8_PTE_SHIFT; |
750 | |
751 | do { |
752 | struct i915_page_directory * const pdp = |
753 | gen8_pdp_for_page_index(vm, idx); |
754 | |
755 | idx = gen8_ppgtt_insert_pte(ppgtt, pdp, iter: &iter, idx, |
756 | pat_index, flags); |
757 | } while (idx); |
758 | |
759 | vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE; |
760 | } |
761 | } |
762 | |
763 | static void gen8_ppgtt_insert_entry(struct i915_address_space *vm, |
764 | dma_addr_t addr, |
765 | u64 offset, |
766 | unsigned int pat_index, |
767 | u32 flags) |
768 | { |
769 | u64 idx = offset >> GEN8_PTE_SHIFT; |
770 | struct i915_page_directory * const pdp = |
771 | gen8_pdp_for_page_index(vm, idx); |
772 | struct i915_page_directory *pd = |
773 | i915_pd_entry(pdp, gen8_pd_index(idx, 2)); |
774 | struct i915_page_table *pt = i915_pt_entry(pd, gen8_pd_index(idx, 1)); |
775 | gen8_pte_t *vaddr; |
776 | |
777 | GEM_BUG_ON(pt->is_compact); |
778 | |
779 | vaddr = px_vaddr(pt); |
780 | vaddr[gen8_pd_index(idx, 0)] = vm->pte_encode(addr, pat_index, flags); |
781 | drm_clflush_virt_range(addr: &vaddr[gen8_pd_index(idx, 0)], length: sizeof(*vaddr)); |
782 | } |
783 | |
784 | static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm, |
785 | dma_addr_t addr, |
786 | u64 offset, |
787 | unsigned int pat_index, |
788 | u32 flags) |
789 | { |
790 | u64 idx = offset >> GEN8_PTE_SHIFT; |
791 | struct i915_page_directory * const pdp = |
792 | gen8_pdp_for_page_index(vm, idx); |
793 | struct i915_page_directory *pd = |
794 | i915_pd_entry(pdp, gen8_pd_index(idx, 2)); |
795 | struct i915_page_table *pt = i915_pt_entry(pd, gen8_pd_index(idx, 1)); |
796 | gen8_pte_t *vaddr; |
797 | |
798 | GEM_BUG_ON(!IS_ALIGNED(addr, SZ_64K)); |
799 | GEM_BUG_ON(!IS_ALIGNED(offset, SZ_64K)); |
800 | |
801 | /* XXX: we don't strictly need to use this layout */ |
802 | |
803 | if (!pt->is_compact) { |
804 | vaddr = px_vaddr(pd); |
805 | vaddr[gen8_pd_index(idx, 1)] |= GEN12_PDE_64K; |
806 | pt->is_compact = true; |
807 | } |
808 | |
809 | vaddr = px_vaddr(pt); |
810 | vaddr[gen8_pd_index(idx, 0) / 16] = vm->pte_encode(addr, pat_index, flags); |
811 | } |
812 | |
813 | static void xehpsdv_ppgtt_insert_entry(struct i915_address_space *vm, |
814 | dma_addr_t addr, |
815 | u64 offset, |
816 | unsigned int pat_index, |
817 | u32 flags) |
818 | { |
819 | if (flags & PTE_LM) |
820 | return __xehpsdv_ppgtt_insert_entry_lm(vm, addr, offset, |
821 | pat_index, flags); |
822 | |
823 | return gen8_ppgtt_insert_entry(vm, addr, offset, pat_index, flags); |
824 | } |
825 | |
826 | static int gen8_init_scratch(struct i915_address_space *vm) |
827 | { |
828 | u32 pte_flags; |
829 | int ret; |
830 | int i; |
831 | |
832 | /* |
833 | * If everybody agrees to not to write into the scratch page, |
834 | * we can reuse it for all vm, keeping contexts and processes separate. |
835 | */ |
836 | if (vm->has_read_only && vm->gt->vm && !i915_is_ggtt(vm->gt->vm)) { |
837 | struct i915_address_space *clone = vm->gt->vm; |
838 | |
839 | GEM_BUG_ON(!clone->has_read_only); |
840 | |
841 | vm->scratch_order = clone->scratch_order; |
842 | for (i = 0; i <= vm->top; i++) |
843 | vm->scratch[i] = i915_gem_object_get(obj: clone->scratch[i]); |
844 | |
845 | return 0; |
846 | } |
847 | |
848 | ret = setup_scratch_page(vm); |
849 | if (ret) |
850 | return ret; |
851 | |
852 | pte_flags = vm->has_read_only; |
853 | if (i915_gem_object_is_lmem(obj: vm->scratch[0])) |
854 | pte_flags |= PTE_LM; |
855 | |
856 | vm->scratch[0]->encode = |
857 | vm->pte_encode(px_dma(vm->scratch[0]), |
858 | i915_gem_get_pat_index(i915: vm->i915, |
859 | level: I915_CACHE_NONE), |
860 | pte_flags); |
861 | |
862 | for (i = 1; i <= vm->top; i++) { |
863 | struct drm_i915_gem_object *obj; |
864 | |
865 | obj = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K); |
866 | if (IS_ERR(ptr: obj)) { |
867 | ret = PTR_ERR(ptr: obj); |
868 | goto free_scratch; |
869 | } |
870 | |
871 | ret = map_pt_dma(vm, obj); |
872 | if (ret) { |
873 | i915_gem_object_put(obj); |
874 | goto free_scratch; |
875 | } |
876 | |
877 | fill_px(obj, vm->scratch[i - 1]->encode); |
878 | obj->encode = gen8_pde_encode(px_dma(obj), level: I915_CACHE_NONE); |
879 | |
880 | vm->scratch[i] = obj; |
881 | } |
882 | |
883 | return 0; |
884 | |
885 | free_scratch: |
886 | while (i--) |
887 | i915_gem_object_put(obj: vm->scratch[i]); |
888 | vm->scratch[0] = NULL; |
889 | return ret; |
890 | } |
891 | |
892 | static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt) |
893 | { |
894 | struct i915_address_space *vm = &ppgtt->vm; |
895 | struct i915_page_directory *pd = ppgtt->pd; |
896 | unsigned int idx; |
897 | |
898 | GEM_BUG_ON(vm->top != 2); |
899 | GEM_BUG_ON(gen8_pd_top_count(vm) != GEN8_3LVL_PDPES); |
900 | |
901 | for (idx = 0; idx < GEN8_3LVL_PDPES; idx++) { |
902 | struct i915_page_directory *pde; |
903 | int err; |
904 | |
905 | pde = alloc_pd(vm); |
906 | if (IS_ERR(ptr: pde)) |
907 | return PTR_ERR(ptr: pde); |
908 | |
909 | err = map_pt_dma(vm, obj: pde->pt.base); |
910 | if (err) { |
911 | free_pd(vm, pde); |
912 | return err; |
913 | } |
914 | |
915 | fill_px(pde, vm->scratch[1]->encode); |
916 | set_pd_entry(pd, idx, pde); |
917 | atomic_inc(px_used(pde)); /* keep pinned */ |
918 | } |
919 | wmb(); |
920 | |
921 | return 0; |
922 | } |
923 | |
924 | static struct i915_page_directory * |
925 | gen8_alloc_top_pd(struct i915_address_space *vm) |
926 | { |
927 | const unsigned int count = gen8_pd_top_count(vm); |
928 | struct i915_page_directory *pd; |
929 | int err; |
930 | |
931 | GEM_BUG_ON(count > I915_PDES); |
932 | |
933 | pd = __alloc_pd(npde: count); |
934 | if (unlikely(!pd)) |
935 | return ERR_PTR(error: -ENOMEM); |
936 | |
937 | pd->pt.base = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K); |
938 | if (IS_ERR(ptr: pd->pt.base)) { |
939 | err = PTR_ERR(ptr: pd->pt.base); |
940 | pd->pt.base = NULL; |
941 | goto err_pd; |
942 | } |
943 | |
944 | err = map_pt_dma(vm, obj: pd->pt.base); |
945 | if (err) |
946 | goto err_pd; |
947 | |
948 | fill_page_dma(px_base(pd), val: vm->scratch[vm->top]->encode, count); |
949 | atomic_inc(px_used(pd)); /* mark as pinned */ |
950 | return pd; |
951 | |
952 | err_pd: |
953 | free_pd(vm, pd); |
954 | return ERR_PTR(error: err); |
955 | } |
956 | |
957 | static int gen8_init_rsvd(struct i915_address_space *vm) |
958 | { |
959 | struct drm_i915_private *i915 = vm->i915; |
960 | struct drm_i915_gem_object *obj; |
961 | struct i915_vma *vma; |
962 | int ret; |
963 | |
964 | if (!intel_gt_needs_wa_16018031267(gt: vm->gt)) |
965 | return 0; |
966 | |
967 | /* The memory will be used only by GPU. */ |
968 | obj = i915_gem_object_create_lmem(i915, PAGE_SIZE, |
969 | I915_BO_ALLOC_VOLATILE | |
970 | I915_BO_ALLOC_GPU_ONLY); |
971 | if (IS_ERR(ptr: obj)) |
972 | obj = i915_gem_object_create_internal(i915, PAGE_SIZE); |
973 | if (IS_ERR(ptr: obj)) |
974 | return PTR_ERR(ptr: obj); |
975 | |
976 | vma = i915_vma_instance(obj, vm, NULL); |
977 | if (IS_ERR(ptr: vma)) { |
978 | ret = PTR_ERR(ptr: vma); |
979 | goto unref; |
980 | } |
981 | |
982 | ret = i915_vma_pin(vma, size: 0, alignment: 0, PIN_USER | PIN_HIGH); |
983 | if (ret) |
984 | goto unref; |
985 | |
986 | vm->rsvd.vma = i915_vma_make_unshrinkable(vma); |
987 | vm->rsvd.obj = obj; |
988 | vm->total -= vma->node.size; |
989 | return 0; |
990 | unref: |
991 | i915_gem_object_put(obj); |
992 | return ret; |
993 | } |
994 | |
995 | /* |
996 | * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers |
997 | * with a net effect resembling a 2-level page table in normal x86 terms. Each |
998 | * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address |
999 | * space. |
1000 | * |
1001 | */ |
1002 | struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt, |
1003 | unsigned long lmem_pt_obj_flags) |
1004 | { |
1005 | struct i915_page_directory *pd; |
1006 | struct i915_ppgtt *ppgtt; |
1007 | int err; |
1008 | |
1009 | ppgtt = kzalloc(size: sizeof(*ppgtt), GFP_KERNEL); |
1010 | if (!ppgtt) |
1011 | return ERR_PTR(error: -ENOMEM); |
1012 | |
1013 | ppgtt_init(ppgtt, gt, lmem_pt_obj_flags); |
1014 | ppgtt->vm.top = i915_vm_is_4lvl(vm: &ppgtt->vm) ? 3 : 2; |
1015 | ppgtt->vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen8_pte_t)); |
1016 | |
1017 | /* |
1018 | * From bdw, there is hw support for read-only pages in the PPGTT. |
1019 | * |
1020 | * Gen11 has HSDES#:1807136187 unresolved. Disable ro support |
1021 | * for now. |
1022 | * |
1023 | * Gen12 has inherited the same read-only fault issue from gen11. |
1024 | */ |
1025 | ppgtt->vm.has_read_only = !IS_GRAPHICS_VER(gt->i915, 11, 12); |
1026 | |
1027 | if (HAS_LMEM(gt->i915)) |
1028 | ppgtt->vm.alloc_pt_dma = alloc_pt_lmem; |
1029 | else |
1030 | ppgtt->vm.alloc_pt_dma = alloc_pt_dma; |
1031 | |
1032 | /* |
1033 | * Using SMEM here instead of LMEM has the advantage of not reserving |
1034 | * high performance memory for a "never" used filler page. It also |
1035 | * removes the device access that would be required to initialise the |
1036 | * scratch page, reducing pressure on an even scarcer resource. |
1037 | */ |
1038 | ppgtt->vm.alloc_scratch_dma = alloc_pt_dma; |
1039 | |
1040 | if (GRAPHICS_VER(gt->i915) >= 12) |
1041 | ppgtt->vm.pte_encode = gen12_pte_encode; |
1042 | else |
1043 | ppgtt->vm.pte_encode = gen8_pte_encode; |
1044 | |
1045 | ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND; |
1046 | ppgtt->vm.insert_entries = gen8_ppgtt_insert; |
1047 | if (HAS_64K_PAGES(gt->i915)) |
1048 | ppgtt->vm.insert_page = xehpsdv_ppgtt_insert_entry; |
1049 | else |
1050 | ppgtt->vm.insert_page = gen8_ppgtt_insert_entry; |
1051 | ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc; |
1052 | ppgtt->vm.clear_range = gen8_ppgtt_clear; |
1053 | ppgtt->vm.foreach = gen8_ppgtt_foreach; |
1054 | ppgtt->vm.cleanup = gen8_ppgtt_cleanup; |
1055 | |
1056 | err = gen8_init_scratch(vm: &ppgtt->vm); |
1057 | if (err) |
1058 | goto err_put; |
1059 | |
1060 | pd = gen8_alloc_top_pd(vm: &ppgtt->vm); |
1061 | if (IS_ERR(ptr: pd)) { |
1062 | err = PTR_ERR(ptr: pd); |
1063 | goto err_put; |
1064 | } |
1065 | ppgtt->pd = pd; |
1066 | |
1067 | if (!i915_vm_is_4lvl(vm: &ppgtt->vm)) { |
1068 | err = gen8_preallocate_top_level_pdp(ppgtt); |
1069 | if (err) |
1070 | goto err_put; |
1071 | } |
1072 | |
1073 | if (intel_vgpu_active(i915: gt->i915)) |
1074 | gen8_ppgtt_notify_vgt(ppgtt, create: true); |
1075 | |
1076 | err = gen8_init_rsvd(vm: &ppgtt->vm); |
1077 | if (err) |
1078 | goto err_put; |
1079 | |
1080 | return ppgtt; |
1081 | |
1082 | err_put: |
1083 | i915_vm_put(vm: &ppgtt->vm); |
1084 | return ERR_PTR(error: err); |
1085 | } |
1086 | |