1 | // SPDX-License-Identifier: MIT |
2 | /* |
3 | * Copyright © 2020 Intel Corporation |
4 | */ |
5 | |
6 | #include <linux/log2.h> |
7 | |
8 | #include "gem/i915_gem_internal.h" |
9 | |
10 | #include "gen6_ppgtt.h" |
11 | #include "i915_scatterlist.h" |
12 | #include "i915_trace.h" |
13 | #include "i915_vgpu.h" |
14 | #include "intel_gt_regs.h" |
15 | #include "intel_engine_regs.h" |
16 | #include "intel_gt.h" |
17 | |
18 | /* Write pde (index) from the page directory @pd to the page table @pt */ |
19 | static void gen6_write_pde(const struct gen6_ppgtt *ppgtt, |
20 | const unsigned int pde, |
21 | const struct i915_page_table *pt) |
22 | { |
23 | dma_addr_t addr = pt ? px_dma(pt) : px_dma(ppgtt->base.vm.scratch[1]); |
24 | |
25 | /* Caller needs to make sure the write completes if necessary */ |
26 | iowrite32(GEN6_PDE_ADDR_ENCODE(addr) | GEN6_PDE_VALID, |
27 | ppgtt->pd_addr + pde); |
28 | } |
29 | |
30 | void gen7_ppgtt_enable(struct intel_gt *gt) |
31 | { |
32 | struct drm_i915_private *i915 = gt->i915; |
33 | struct intel_uncore *uncore = gt->uncore; |
34 | u32 ecochk; |
35 | |
36 | intel_uncore_rmw(uncore, GAC_ECO_BITS, clear: 0, ECOBITS_PPGTT_CACHE64B); |
37 | |
38 | ecochk = intel_uncore_read(uncore, GAM_ECOCHK); |
39 | if (IS_HASWELL(i915)) { |
40 | ecochk |= ECOCHK_PPGTT_WB_HSW; |
41 | } else { |
42 | ecochk |= ECOCHK_PPGTT_LLC_IVB; |
43 | ecochk &= ~ECOCHK_PPGTT_GFDT_IVB; |
44 | } |
45 | intel_uncore_write(uncore, GAM_ECOCHK, val: ecochk); |
46 | } |
47 | |
48 | void gen6_ppgtt_enable(struct intel_gt *gt) |
49 | { |
50 | struct intel_uncore *uncore = gt->uncore; |
51 | |
52 | intel_uncore_rmw(uncore, |
53 | GAC_ECO_BITS, |
54 | clear: 0, |
55 | ECOBITS_SNB_BIT | ECOBITS_PPGTT_CACHE64B); |
56 | |
57 | intel_uncore_rmw(uncore, |
58 | GAB_CTL, |
59 | clear: 0, |
60 | GAB_CTL_CONT_AFTER_PAGEFAULT); |
61 | |
62 | intel_uncore_rmw(uncore, |
63 | GAM_ECOCHK, |
64 | clear: 0, |
65 | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B); |
66 | |
67 | if (HAS_PPGTT(uncore->i915)) /* may be disabled for VT-d */ |
68 | intel_uncore_write(uncore, |
69 | GFX_MODE, |
70 | _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); |
71 | } |
72 | |
73 | /* PPGTT support for Sandybdrige/Gen6 and later */ |
74 | static void gen6_ppgtt_clear_range(struct i915_address_space *vm, |
75 | u64 start, u64 length) |
76 | { |
77 | struct gen6_ppgtt * const ppgtt = to_gen6_ppgtt(base: i915_vm_to_ppgtt(vm)); |
78 | const unsigned int first_entry = start / I915_GTT_PAGE_SIZE; |
79 | const gen6_pte_t scratch_pte = vm->scratch[0]->encode; |
80 | unsigned int pde = first_entry / GEN6_PTES; |
81 | unsigned int pte = first_entry % GEN6_PTES; |
82 | unsigned int num_entries = length / I915_GTT_PAGE_SIZE; |
83 | |
84 | while (num_entries) { |
85 | struct i915_page_table * const pt = |
86 | i915_pt_entry(pd: ppgtt->base.pd, n: pde++); |
87 | const unsigned int count = min(num_entries, GEN6_PTES - pte); |
88 | gen6_pte_t *vaddr; |
89 | |
90 | num_entries -= count; |
91 | |
92 | GEM_BUG_ON(count > atomic_read(&pt->used)); |
93 | if (!atomic_sub_return(i: count, v: &pt->used)) |
94 | ppgtt->scan_for_unused_pt = true; |
95 | |
96 | /* |
97 | * Note that the hw doesn't support removing PDE on the fly |
98 | * (they are cached inside the context with no means to |
99 | * invalidate the cache), so we can only reset the PTE |
100 | * entries back to scratch. |
101 | */ |
102 | |
103 | vaddr = px_vaddr(pt); |
104 | memset32(s: vaddr + pte, v: scratch_pte, n: count); |
105 | |
106 | pte = 0; |
107 | } |
108 | } |
109 | |
110 | static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, |
111 | struct i915_vma_resource *vma_res, |
112 | unsigned int pat_index, |
113 | u32 flags) |
114 | { |
115 | struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); |
116 | struct i915_page_directory * const pd = ppgtt->pd; |
117 | unsigned int first_entry = vma_res->start / I915_GTT_PAGE_SIZE; |
118 | unsigned int act_pt = first_entry / GEN6_PTES; |
119 | unsigned int act_pte = first_entry % GEN6_PTES; |
120 | const u32 pte_encode = vm->pte_encode(0, pat_index, flags); |
121 | struct sgt_dma iter = sgt_dma(vma_res); |
122 | gen6_pte_t *vaddr; |
123 | |
124 | GEM_BUG_ON(!pd->entry[act_pt]); |
125 | |
126 | vaddr = px_vaddr(i915_pt_entry(pd, act_pt)); |
127 | do { |
128 | GEM_BUG_ON(sg_dma_len(iter.sg) < I915_GTT_PAGE_SIZE); |
129 | vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma); |
130 | |
131 | iter.dma += I915_GTT_PAGE_SIZE; |
132 | if (iter.dma == iter.max) { |
133 | iter.sg = __sg_next(sg: iter.sg); |
134 | if (!iter.sg || sg_dma_len(iter.sg) == 0) |
135 | break; |
136 | |
137 | iter.dma = sg_dma_address(iter.sg); |
138 | iter.max = iter.dma + sg_dma_len(iter.sg); |
139 | } |
140 | |
141 | if (++act_pte == GEN6_PTES) { |
142 | vaddr = px_vaddr(i915_pt_entry(pd, ++act_pt)); |
143 | act_pte = 0; |
144 | } |
145 | } while (1); |
146 | |
147 | vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE; |
148 | } |
149 | |
150 | static void gen6_flush_pd(struct gen6_ppgtt *ppgtt, u64 start, u64 end) |
151 | { |
152 | struct i915_page_directory * const pd = ppgtt->base.pd; |
153 | struct i915_page_table *pt; |
154 | unsigned int pde; |
155 | |
156 | start = round_down(start, SZ_64K); |
157 | end = round_up(end, SZ_64K) - start; |
158 | |
159 | mutex_lock(&ppgtt->flush); |
160 | |
161 | gen6_for_each_pde(pt, pd, start, end, pde) |
162 | gen6_write_pde(ppgtt, pde, pt); |
163 | |
164 | mb(); |
165 | ioread32(ppgtt->pd_addr + pde - 1); |
166 | gen6_ggtt_invalidate(ggtt: ppgtt->base.vm.gt->ggtt); |
167 | mb(); |
168 | |
169 | mutex_unlock(lock: &ppgtt->flush); |
170 | } |
171 | |
172 | static void gen6_alloc_va_range(struct i915_address_space *vm, |
173 | struct i915_vm_pt_stash *stash, |
174 | u64 start, u64 length) |
175 | { |
176 | struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base: i915_vm_to_ppgtt(vm)); |
177 | struct i915_page_directory * const pd = ppgtt->base.pd; |
178 | struct i915_page_table *pt; |
179 | bool flush = false; |
180 | u64 from = start; |
181 | unsigned int pde; |
182 | |
183 | spin_lock(lock: &pd->lock); |
184 | gen6_for_each_pde(pt, pd, start, length, pde) { |
185 | const unsigned int count = gen6_pte_count(addr: start, length); |
186 | |
187 | if (!pt) { |
188 | spin_unlock(lock: &pd->lock); |
189 | |
190 | pt = stash->pt[0]; |
191 | __i915_gem_object_pin_pages(obj: pt->base); |
192 | |
193 | fill32_px(pt, vm->scratch[0]->encode); |
194 | |
195 | spin_lock(lock: &pd->lock); |
196 | if (!pd->entry[pde]) { |
197 | stash->pt[0] = pt->stash; |
198 | atomic_set(v: &pt->used, i: 0); |
199 | pd->entry[pde] = pt; |
200 | } else { |
201 | pt = pd->entry[pde]; |
202 | } |
203 | |
204 | flush = true; |
205 | } |
206 | |
207 | atomic_add(i: count, v: &pt->used); |
208 | } |
209 | spin_unlock(lock: &pd->lock); |
210 | |
211 | if (flush && i915_vma_is_bound(vma: ppgtt->vma, I915_VMA_GLOBAL_BIND)) { |
212 | intel_wakeref_t wakeref; |
213 | |
214 | with_intel_runtime_pm(&vm->i915->runtime_pm, wakeref) |
215 | gen6_flush_pd(ppgtt, start: from, end: start); |
216 | } |
217 | } |
218 | |
219 | static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt) |
220 | { |
221 | struct i915_address_space * const vm = &ppgtt->base.vm; |
222 | int ret; |
223 | |
224 | ret = setup_scratch_page(vm); |
225 | if (ret) |
226 | return ret; |
227 | |
228 | vm->scratch[0]->encode = |
229 | vm->pte_encode(px_dma(vm->scratch[0]), |
230 | i915_gem_get_pat_index(i915: vm->i915, |
231 | level: I915_CACHE_NONE), |
232 | PTE_READ_ONLY); |
233 | |
234 | vm->scratch[1] = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K); |
235 | if (IS_ERR(ptr: vm->scratch[1])) { |
236 | ret = PTR_ERR(ptr: vm->scratch[1]); |
237 | goto err_scratch0; |
238 | } |
239 | |
240 | ret = map_pt_dma(vm, obj: vm->scratch[1]); |
241 | if (ret) |
242 | goto err_scratch1; |
243 | |
244 | fill32_px(vm->scratch[1], vm->scratch[0]->encode); |
245 | |
246 | return 0; |
247 | |
248 | err_scratch1: |
249 | i915_gem_object_put(obj: vm->scratch[1]); |
250 | err_scratch0: |
251 | i915_gem_object_put(obj: vm->scratch[0]); |
252 | vm->scratch[0] = NULL; |
253 | return ret; |
254 | } |
255 | |
256 | static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt) |
257 | { |
258 | struct i915_page_directory * const pd = ppgtt->base.pd; |
259 | struct i915_page_table *pt; |
260 | u32 pde; |
261 | |
262 | gen6_for_all_pdes(pt, pd, pde) |
263 | if (pt) |
264 | free_pt(&ppgtt->base.vm, pt); |
265 | } |
266 | |
267 | static void gen6_ppgtt_cleanup(struct i915_address_space *vm) |
268 | { |
269 | struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base: i915_vm_to_ppgtt(vm)); |
270 | |
271 | gen6_ppgtt_free_pd(ppgtt); |
272 | free_scratch(vm); |
273 | |
274 | if (ppgtt->base.pd) |
275 | free_pd(&ppgtt->base.vm, ppgtt->base.pd); |
276 | |
277 | mutex_destroy(lock: &ppgtt->flush); |
278 | } |
279 | |
280 | static void pd_vma_bind(struct i915_address_space *vm, |
281 | struct i915_vm_pt_stash *stash, |
282 | struct i915_vma_resource *vma_res, |
283 | unsigned int pat_index, |
284 | u32 unused) |
285 | { |
286 | struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); |
287 | struct gen6_ppgtt *ppgtt = vma_res->private; |
288 | u32 ggtt_offset = vma_res->start / I915_GTT_PAGE_SIZE; |
289 | |
290 | ppgtt->pp_dir = ggtt_offset * sizeof(gen6_pte_t) << 10; |
291 | ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset; |
292 | |
293 | gen6_flush_pd(ppgtt, start: 0, end: ppgtt->base.vm.total); |
294 | } |
295 | |
296 | static void pd_vma_unbind(struct i915_address_space *vm, |
297 | struct i915_vma_resource *vma_res) |
298 | { |
299 | struct gen6_ppgtt *ppgtt = vma_res->private; |
300 | struct i915_page_directory * const pd = ppgtt->base.pd; |
301 | struct i915_page_table *pt; |
302 | unsigned int pde; |
303 | |
304 | if (!ppgtt->scan_for_unused_pt) |
305 | return; |
306 | |
307 | /* Free all no longer used page tables */ |
308 | gen6_for_all_pdes(pt, ppgtt->base.pd, pde) { |
309 | if (!pt || atomic_read(v: &pt->used)) |
310 | continue; |
311 | |
312 | free_pt(&ppgtt->base.vm, pt); |
313 | pd->entry[pde] = NULL; |
314 | } |
315 | |
316 | ppgtt->scan_for_unused_pt = false; |
317 | } |
318 | |
319 | static const struct i915_vma_ops pd_vma_ops = { |
320 | .bind_vma = pd_vma_bind, |
321 | .unbind_vma = pd_vma_unbind, |
322 | }; |
323 | |
324 | int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww) |
325 | { |
326 | struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); |
327 | int err; |
328 | |
329 | GEM_BUG_ON(!kref_read(&ppgtt->base.vm.ref)); |
330 | |
331 | /* |
332 | * Workaround the limited maximum vma->pin_count and the aliasing_ppgtt |
333 | * which will be pinned into every active context. |
334 | * (When vma->pin_count becomes atomic, I expect we will naturally |
335 | * need a larger, unpacked, type and kill this redundancy.) |
336 | */ |
337 | if (atomic_add_unless(v: &ppgtt->pin_count, a: 1, u: 0)) |
338 | return 0; |
339 | |
340 | /* grab the ppgtt resv to pin the object */ |
341 | err = i915_vm_lock_objects(vm: &ppgtt->base.vm, ww); |
342 | if (err) |
343 | return err; |
344 | |
345 | /* |
346 | * PPGTT PDEs reside in the GGTT and consists of 512 entries. The |
347 | * allocator works in address space sizes, so it's multiplied by page |
348 | * size. We allocate at the top of the GTT to avoid fragmentation. |
349 | */ |
350 | if (!atomic_read(v: &ppgtt->pin_count)) { |
351 | err = i915_ggtt_pin(vma: ppgtt->vma, ww, GEN6_PD_ALIGN, PIN_HIGH); |
352 | |
353 | GEM_BUG_ON(ppgtt->vma->fence); |
354 | clear_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(ppgtt->vma)); |
355 | } |
356 | if (!err) |
357 | atomic_inc(v: &ppgtt->pin_count); |
358 | |
359 | return err; |
360 | } |
361 | |
362 | static int pd_dummy_obj_get_pages(struct drm_i915_gem_object *obj) |
363 | { |
364 | obj->mm.pages = ZERO_SIZE_PTR; |
365 | return 0; |
366 | } |
367 | |
368 | static void pd_dummy_obj_put_pages(struct drm_i915_gem_object *obj, |
369 | struct sg_table *pages) |
370 | { |
371 | } |
372 | |
373 | static const struct drm_i915_gem_object_ops pd_dummy_obj_ops = { |
374 | .name = "pd_dummy_obj" , |
375 | .get_pages = pd_dummy_obj_get_pages, |
376 | .put_pages = pd_dummy_obj_put_pages, |
377 | }; |
378 | |
379 | static struct i915_page_directory * |
380 | gen6_alloc_top_pd(struct gen6_ppgtt *ppgtt) |
381 | { |
382 | struct i915_ggtt * const ggtt = ppgtt->base.vm.gt->ggtt; |
383 | struct i915_page_directory *pd; |
384 | int err; |
385 | |
386 | pd = __alloc_pd(I915_PDES); |
387 | if (unlikely(!pd)) |
388 | return ERR_PTR(error: -ENOMEM); |
389 | |
390 | pd->pt.base = __i915_gem_object_create_internal(i915: ppgtt->base.vm.gt->i915, |
391 | ops: &pd_dummy_obj_ops, |
392 | I915_PDES * SZ_4K); |
393 | if (IS_ERR(ptr: pd->pt.base)) { |
394 | err = PTR_ERR(ptr: pd->pt.base); |
395 | pd->pt.base = NULL; |
396 | goto err_pd; |
397 | } |
398 | |
399 | pd->pt.base->base.resv = i915_vm_resv_get(vm: &ppgtt->base.vm); |
400 | pd->pt.base->shares_resv_from = &ppgtt->base.vm; |
401 | |
402 | ppgtt->vma = i915_vma_instance(obj: pd->pt.base, vm: &ggtt->vm, NULL); |
403 | if (IS_ERR(ptr: ppgtt->vma)) { |
404 | err = PTR_ERR(ptr: ppgtt->vma); |
405 | ppgtt->vma = NULL; |
406 | goto err_pd; |
407 | } |
408 | |
409 | /* The dummy object we create is special, override ops.. */ |
410 | ppgtt->vma->ops = &pd_vma_ops; |
411 | ppgtt->vma->private = ppgtt; |
412 | return pd; |
413 | |
414 | err_pd: |
415 | free_pd(&ppgtt->base.vm, pd); |
416 | return ERR_PTR(error: err); |
417 | } |
418 | |
419 | void gen6_ppgtt_unpin(struct i915_ppgtt *base) |
420 | { |
421 | struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); |
422 | |
423 | GEM_BUG_ON(!atomic_read(&ppgtt->pin_count)); |
424 | if (atomic_dec_and_test(v: &ppgtt->pin_count)) |
425 | i915_vma_unpin(vma: ppgtt->vma); |
426 | } |
427 | |
428 | struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt) |
429 | { |
430 | struct i915_ggtt * const ggtt = gt->ggtt; |
431 | struct gen6_ppgtt *ppgtt; |
432 | int err; |
433 | |
434 | ppgtt = kzalloc(size: sizeof(*ppgtt), GFP_KERNEL); |
435 | if (!ppgtt) |
436 | return ERR_PTR(error: -ENOMEM); |
437 | |
438 | mutex_init(&ppgtt->flush); |
439 | |
440 | ppgtt_init(ppgtt: &ppgtt->base, gt, lmem_pt_obj_flags: 0); |
441 | ppgtt->base.vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen6_pte_t)); |
442 | ppgtt->base.vm.top = 1; |
443 | |
444 | ppgtt->base.vm.bind_async_flags = I915_VMA_LOCAL_BIND; |
445 | ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range; |
446 | ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range; |
447 | ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries; |
448 | ppgtt->base.vm.cleanup = gen6_ppgtt_cleanup; |
449 | |
450 | ppgtt->base.vm.alloc_pt_dma = alloc_pt_dma; |
451 | ppgtt->base.vm.alloc_scratch_dma = alloc_pt_dma; |
452 | ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode; |
453 | |
454 | err = gen6_ppgtt_init_scratch(ppgtt); |
455 | if (err) |
456 | goto err_put; |
457 | |
458 | ppgtt->base.pd = gen6_alloc_top_pd(ppgtt); |
459 | if (IS_ERR(ptr: ppgtt->base.pd)) { |
460 | err = PTR_ERR(ptr: ppgtt->base.pd); |
461 | goto err_put; |
462 | } |
463 | |
464 | return &ppgtt->base; |
465 | |
466 | err_put: |
467 | i915_vm_put(vm: &ppgtt->base.vm); |
468 | return ERR_PTR(error: err); |
469 | } |
470 | |