1 | // SPDX-License-Identifier: MIT |
2 | /* |
3 | * Copyright © 2020 Intel Corporation |
4 | */ |
5 | |
6 | #include <linux/slab.h> |
7 | |
8 | #include "gem/i915_gem_lmem.h" |
9 | |
10 | #include "i915_trace.h" |
11 | #include "intel_gt.h" |
12 | #include "intel_gtt.h" |
13 | #include "gen6_ppgtt.h" |
14 | #include "gen8_ppgtt.h" |
15 | |
16 | struct i915_page_table *alloc_pt(struct i915_address_space *vm, int sz) |
17 | { |
18 | struct i915_page_table *pt; |
19 | |
20 | pt = kmalloc(size: sizeof(*pt), I915_GFP_ALLOW_FAIL); |
21 | if (unlikely(!pt)) |
22 | return ERR_PTR(error: -ENOMEM); |
23 | |
24 | pt->base = vm->alloc_pt_dma(vm, sz); |
25 | if (IS_ERR(ptr: pt->base)) { |
26 | kfree(objp: pt); |
27 | return ERR_PTR(error: -ENOMEM); |
28 | } |
29 | |
30 | pt->is_compact = false; |
31 | atomic_set(v: &pt->used, i: 0); |
32 | return pt; |
33 | } |
34 | |
35 | struct i915_page_directory *__alloc_pd(int count) |
36 | { |
37 | struct i915_page_directory *pd; |
38 | |
39 | pd = kzalloc(size: sizeof(*pd), I915_GFP_ALLOW_FAIL); |
40 | if (unlikely(!pd)) |
41 | return NULL; |
42 | |
43 | pd->entry = kcalloc(n: count, size: sizeof(*pd->entry), I915_GFP_ALLOW_FAIL); |
44 | if (unlikely(!pd->entry)) { |
45 | kfree(objp: pd); |
46 | return NULL; |
47 | } |
48 | |
49 | spin_lock_init(&pd->lock); |
50 | return pd; |
51 | } |
52 | |
53 | struct i915_page_directory *alloc_pd(struct i915_address_space *vm) |
54 | { |
55 | struct i915_page_directory *pd; |
56 | |
57 | pd = __alloc_pd(I915_PDES); |
58 | if (unlikely(!pd)) |
59 | return ERR_PTR(error: -ENOMEM); |
60 | |
61 | pd->pt.base = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K); |
62 | if (IS_ERR(ptr: pd->pt.base)) { |
63 | kfree(objp: pd->entry); |
64 | kfree(objp: pd); |
65 | return ERR_PTR(error: -ENOMEM); |
66 | } |
67 | |
68 | return pd; |
69 | } |
70 | |
71 | void free_px(struct i915_address_space *vm, struct i915_page_table *pt, int lvl) |
72 | { |
73 | BUILD_BUG_ON(offsetof(struct i915_page_directory, pt)); |
74 | |
75 | if (lvl) { |
76 | struct i915_page_directory *pd = |
77 | container_of(pt, typeof(*pd), pt); |
78 | kfree(objp: pd->entry); |
79 | } |
80 | |
81 | if (pt->base) |
82 | i915_gem_object_put(obj: pt->base); |
83 | |
84 | kfree(objp: pt); |
85 | } |
86 | |
87 | static void |
88 | write_dma_entry(struct drm_i915_gem_object * const pdma, |
89 | const unsigned short idx, |
90 | const u64 encoded_entry) |
91 | { |
92 | u64 * const vaddr = __px_vaddr(p: pdma); |
93 | |
94 | vaddr[idx] = encoded_entry; |
95 | drm_clflush_virt_range(addr: &vaddr[idx], length: sizeof(u64)); |
96 | } |
97 | |
98 | void |
99 | __set_pd_entry(struct i915_page_directory * const pd, |
100 | const unsigned short idx, |
101 | struct i915_page_table * const to, |
102 | u64 (*encode)(const dma_addr_t, const enum i915_cache_level)) |
103 | { |
104 | /* Each thread pre-pins the pd, and we may have a thread per pde. */ |
105 | GEM_BUG_ON(atomic_read(px_used(pd)) > NALLOC * I915_PDES); |
106 | |
107 | atomic_inc(px_used(pd)); |
108 | pd->entry[idx] = to; |
109 | write_dma_entry(px_base(pd), idx, encoded_entry: encode(px_dma(to), I915_CACHE_LLC)); |
110 | } |
111 | |
112 | void |
113 | clear_pd_entry(struct i915_page_directory * const pd, |
114 | const unsigned short idx, |
115 | const struct drm_i915_gem_object * const scratch) |
116 | { |
117 | GEM_BUG_ON(atomic_read(px_used(pd)) == 0); |
118 | |
119 | write_dma_entry(px_base(pd), idx, encoded_entry: scratch->encode); |
120 | pd->entry[idx] = NULL; |
121 | atomic_dec(px_used(pd)); |
122 | } |
123 | |
124 | bool |
125 | release_pd_entry(struct i915_page_directory * const pd, |
126 | const unsigned short idx, |
127 | struct i915_page_table * const pt, |
128 | const struct drm_i915_gem_object * const scratch) |
129 | { |
130 | bool free = false; |
131 | |
132 | if (atomic_add_unless(v: &pt->used, a: -1, u: 1)) |
133 | return false; |
134 | |
135 | spin_lock(lock: &pd->lock); |
136 | if (atomic_dec_and_test(v: &pt->used)) { |
137 | clear_pd_entry(pd, idx, scratch); |
138 | free = true; |
139 | } |
140 | spin_unlock(lock: &pd->lock); |
141 | |
142 | return free; |
143 | } |
144 | |
145 | int i915_ppgtt_init_hw(struct intel_gt *gt) |
146 | { |
147 | struct drm_i915_private *i915 = gt->i915; |
148 | |
149 | gtt_write_workarounds(gt); |
150 | |
151 | if (GRAPHICS_VER(i915) == 6) |
152 | gen6_ppgtt_enable(gt); |
153 | else if (GRAPHICS_VER(i915) == 7) |
154 | gen7_ppgtt_enable(gt); |
155 | |
156 | return 0; |
157 | } |
158 | |
159 | static struct i915_ppgtt * |
160 | __ppgtt_create(struct intel_gt *gt, unsigned long lmem_pt_obj_flags) |
161 | { |
162 | if (GRAPHICS_VER(gt->i915) < 8) |
163 | return gen6_ppgtt_create(gt); |
164 | else |
165 | return gen8_ppgtt_create(gt, lmem_pt_obj_flags); |
166 | } |
167 | |
168 | struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt, |
169 | unsigned long lmem_pt_obj_flags) |
170 | { |
171 | struct i915_ppgtt *ppgtt; |
172 | |
173 | ppgtt = __ppgtt_create(gt, lmem_pt_obj_flags); |
174 | if (IS_ERR(ptr: ppgtt)) |
175 | return ppgtt; |
176 | |
177 | trace_i915_ppgtt_create(vm: &ppgtt->vm); |
178 | |
179 | return ppgtt; |
180 | } |
181 | |
182 | void ppgtt_bind_vma(struct i915_address_space *vm, |
183 | struct i915_vm_pt_stash *stash, |
184 | struct i915_vma_resource *vma_res, |
185 | unsigned int pat_index, |
186 | u32 flags) |
187 | { |
188 | u32 pte_flags; |
189 | |
190 | if (!vma_res->allocated) { |
191 | vm->allocate_va_range(vm, stash, vma_res->start, |
192 | vma_res->vma_size); |
193 | vma_res->allocated = true; |
194 | } |
195 | |
196 | /* Applicable to VLV, and gen8+ */ |
197 | pte_flags = 0; |
198 | if (vma_res->bi.readonly) |
199 | pte_flags |= PTE_READ_ONLY; |
200 | if (vma_res->bi.lmem) |
201 | pte_flags |= PTE_LM; |
202 | |
203 | vm->insert_entries(vm, vma_res, pat_index, pte_flags); |
204 | wmb(); |
205 | } |
206 | |
207 | void ppgtt_unbind_vma(struct i915_address_space *vm, |
208 | struct i915_vma_resource *vma_res) |
209 | { |
210 | if (!vma_res->allocated) |
211 | return; |
212 | |
213 | vm->clear_range(vm, vma_res->start, vma_res->vma_size); |
214 | vma_invalidate_tlb(vm, tlb: vma_res->tlb); |
215 | } |
216 | |
217 | static unsigned long pd_count(u64 size, int shift) |
218 | { |
219 | /* Beware later misalignment */ |
220 | return (size + 2 * (BIT_ULL(shift) - 1)) >> shift; |
221 | } |
222 | |
223 | int i915_vm_alloc_pt_stash(struct i915_address_space *vm, |
224 | struct i915_vm_pt_stash *stash, |
225 | u64 size) |
226 | { |
227 | unsigned long count; |
228 | int shift, n, pt_sz; |
229 | |
230 | shift = vm->pd_shift; |
231 | if (!shift) |
232 | return 0; |
233 | |
234 | pt_sz = stash->pt_sz; |
235 | if (!pt_sz) |
236 | pt_sz = I915_GTT_PAGE_SIZE_4K; |
237 | else |
238 | GEM_BUG_ON(!IS_DGFX(vm->i915)); |
239 | |
240 | GEM_BUG_ON(!is_power_of_2(pt_sz)); |
241 | |
242 | count = pd_count(size, shift); |
243 | while (count--) { |
244 | struct i915_page_table *pt; |
245 | |
246 | pt = alloc_pt(vm, sz: pt_sz); |
247 | if (IS_ERR(ptr: pt)) { |
248 | i915_vm_free_pt_stash(vm, stash); |
249 | return PTR_ERR(ptr: pt); |
250 | } |
251 | |
252 | pt->stash = stash->pt[0]; |
253 | stash->pt[0] = pt; |
254 | } |
255 | |
256 | for (n = 1; n < vm->top; n++) { |
257 | shift += ilog2(I915_PDES); /* Each PD holds 512 entries */ |
258 | count = pd_count(size, shift); |
259 | while (count--) { |
260 | struct i915_page_directory *pd; |
261 | |
262 | pd = alloc_pd(vm); |
263 | if (IS_ERR(ptr: pd)) { |
264 | i915_vm_free_pt_stash(vm, stash); |
265 | return PTR_ERR(ptr: pd); |
266 | } |
267 | |
268 | pd->pt.stash = stash->pt[1]; |
269 | stash->pt[1] = &pd->pt; |
270 | } |
271 | } |
272 | |
273 | return 0; |
274 | } |
275 | |
276 | int i915_vm_map_pt_stash(struct i915_address_space *vm, |
277 | struct i915_vm_pt_stash *stash) |
278 | { |
279 | struct i915_page_table *pt; |
280 | int n, err; |
281 | |
282 | for (n = 0; n < ARRAY_SIZE(stash->pt); n++) { |
283 | for (pt = stash->pt[n]; pt; pt = pt->stash) { |
284 | err = map_pt_dma_locked(vm, obj: pt->base); |
285 | if (err) |
286 | return err; |
287 | } |
288 | } |
289 | |
290 | return 0; |
291 | } |
292 | |
293 | void i915_vm_free_pt_stash(struct i915_address_space *vm, |
294 | struct i915_vm_pt_stash *stash) |
295 | { |
296 | struct i915_page_table *pt; |
297 | int n; |
298 | |
299 | for (n = 0; n < ARRAY_SIZE(stash->pt); n++) { |
300 | while ((pt = stash->pt[n])) { |
301 | stash->pt[n] = pt->stash; |
302 | free_px(vm, pt, lvl: n); |
303 | } |
304 | } |
305 | } |
306 | |
307 | void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt, |
308 | unsigned long lmem_pt_obj_flags) |
309 | { |
310 | struct drm_i915_private *i915 = gt->i915; |
311 | |
312 | ppgtt->vm.gt = gt; |
313 | ppgtt->vm.i915 = i915; |
314 | ppgtt->vm.dma = i915->drm.dev; |
315 | ppgtt->vm.total = BIT_ULL(RUNTIME_INFO(i915)->ppgtt_size); |
316 | ppgtt->vm.lmem_pt_obj_flags = lmem_pt_obj_flags; |
317 | |
318 | dma_resv_init(obj: &ppgtt->vm._resv); |
319 | i915_address_space_init(vm: &ppgtt->vm, VM_CLASS_PPGTT); |
320 | |
321 | ppgtt->vm.vma_ops.bind_vma = ppgtt_bind_vma; |
322 | ppgtt->vm.vma_ops.unbind_vma = ppgtt_unbind_vma; |
323 | } |
324 | |