1 | // SPDX-License-Identifier: MIT |
2 | /* |
3 | * Copyright © 2021 Intel Corporation |
4 | */ |
5 | |
6 | #include <linux/interval_tree_generic.h> |
7 | #include <linux/sched/mm.h> |
8 | |
9 | #include "i915_sw_fence.h" |
10 | #include "i915_vma_resource.h" |
11 | #include "i915_drv.h" |
12 | #include "intel_memory_region.h" |
13 | |
14 | #include "gt/intel_gtt.h" |
15 | |
16 | static struct kmem_cache *slab_vma_resources; |
17 | |
18 | /** |
19 | * DOC: |
20 | * We use a per-vm interval tree to keep track of vma_resources |
21 | * scheduled for unbind but not yet unbound. The tree is protected by |
22 | * the vm mutex, and nodes are removed just after the unbind fence signals. |
23 | * The removal takes the vm mutex from a kernel thread which we need to |
24 | * keep in mind so that we don't grab the mutex and try to wait for all |
25 | * pending unbinds to complete, because that will temporaryily block many |
26 | * of the workqueue threads, and people will get angry. |
27 | * |
28 | * We should consider using a single ordered fence per VM instead but that |
29 | * requires ordering the unbinds and might introduce unnecessary waiting |
30 | * for unrelated unbinds. Amount of code will probably be roughly the same |
31 | * due to the simplicity of using the interval tree interface. |
32 | * |
33 | * Another drawback of this interval tree is that the complexity of insertion |
34 | * and removal of fences increases as O(ln(pending_unbinds)) instead of |
35 | * O(1) for a single fence without interval tree. |
36 | */ |
37 | #define VMA_RES_START(_node) ((_node)->start - (_node)->guard) |
38 | #define VMA_RES_LAST(_node) ((_node)->start + (_node)->node_size + (_node)->guard - 1) |
39 | INTERVAL_TREE_DEFINE(struct i915_vma_resource, rb, |
40 | u64, __subtree_last, |
41 | VMA_RES_START, VMA_RES_LAST, static, vma_res_itree); |
42 | |
43 | /* Callbacks for the unbind dma-fence. */ |
44 | |
45 | /** |
46 | * i915_vma_resource_alloc - Allocate a vma resource |
47 | * |
48 | * Return: A pointer to a cleared struct i915_vma_resource or |
49 | * a -ENOMEM error pointer if allocation fails. |
50 | */ |
51 | struct i915_vma_resource *i915_vma_resource_alloc(void) |
52 | { |
53 | struct i915_vma_resource *vma_res = |
54 | kmem_cache_zalloc(k: slab_vma_resources, GFP_KERNEL); |
55 | |
56 | return vma_res ? vma_res : ERR_PTR(error: -ENOMEM); |
57 | } |
58 | |
59 | /** |
60 | * i915_vma_resource_free - Free a vma resource |
61 | * @vma_res: The vma resource to free. |
62 | */ |
63 | void i915_vma_resource_free(struct i915_vma_resource *vma_res) |
64 | { |
65 | if (vma_res) |
66 | kmem_cache_free(s: slab_vma_resources, objp: vma_res); |
67 | } |
68 | |
69 | static const char *get_driver_name(struct dma_fence *fence) |
70 | { |
71 | return "vma unbind fence" ; |
72 | } |
73 | |
74 | static const char *get_timeline_name(struct dma_fence *fence) |
75 | { |
76 | return "unbound" ; |
77 | } |
78 | |
79 | static void unbind_fence_free_rcu(struct rcu_head *head) |
80 | { |
81 | struct i915_vma_resource *vma_res = |
82 | container_of(head, typeof(*vma_res), unbind_fence.rcu); |
83 | |
84 | i915_vma_resource_free(vma_res); |
85 | } |
86 | |
87 | static void unbind_fence_release(struct dma_fence *fence) |
88 | { |
89 | struct i915_vma_resource *vma_res = |
90 | container_of(fence, typeof(*vma_res), unbind_fence); |
91 | |
92 | i915_sw_fence_fini(fence: &vma_res->chain); |
93 | |
94 | call_rcu(head: &fence->rcu, func: unbind_fence_free_rcu); |
95 | } |
96 | |
97 | static const struct dma_fence_ops unbind_fence_ops = { |
98 | .get_driver_name = get_driver_name, |
99 | .get_timeline_name = get_timeline_name, |
100 | .release = unbind_fence_release, |
101 | }; |
102 | |
103 | static void __i915_vma_resource_unhold(struct i915_vma_resource *vma_res) |
104 | { |
105 | struct i915_address_space *vm; |
106 | |
107 | if (!refcount_dec_and_test(r: &vma_res->hold_count)) |
108 | return; |
109 | |
110 | dma_fence_signal(fence: &vma_res->unbind_fence); |
111 | |
112 | vm = vma_res->vm; |
113 | if (vma_res->wakeref) |
114 | intel_runtime_pm_put(rpm: &vm->i915->runtime_pm, wref: vma_res->wakeref); |
115 | |
116 | vma_res->vm = NULL; |
117 | if (!RB_EMPTY_NODE(&vma_res->rb)) { |
118 | mutex_lock(&vm->mutex); |
119 | vma_res_itree_remove(node: vma_res, root: &vm->pending_unbind); |
120 | mutex_unlock(lock: &vm->mutex); |
121 | } |
122 | |
123 | if (vma_res->bi.pages_rsgt) |
124 | i915_refct_sgt_put(rsgt: vma_res->bi.pages_rsgt); |
125 | } |
126 | |
127 | /** |
128 | * i915_vma_resource_unhold - Unhold the signaling of the vma resource unbind |
129 | * fence. |
130 | * @vma_res: The vma resource. |
131 | * @lockdep_cookie: The lockdep cookie returned from i915_vma_resource_hold. |
132 | * |
133 | * The function may leave a dma_fence critical section. |
134 | */ |
135 | void i915_vma_resource_unhold(struct i915_vma_resource *vma_res, |
136 | bool lockdep_cookie) |
137 | { |
138 | dma_fence_end_signalling(cookie: lockdep_cookie); |
139 | |
140 | if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { |
141 | unsigned long irq_flags; |
142 | |
143 | /* Inefficient open-coded might_lock_irqsave() */ |
144 | spin_lock_irqsave(&vma_res->lock, irq_flags); |
145 | spin_unlock_irqrestore(lock: &vma_res->lock, flags: irq_flags); |
146 | } |
147 | |
148 | __i915_vma_resource_unhold(vma_res); |
149 | } |
150 | |
151 | /** |
152 | * i915_vma_resource_hold - Hold the signaling of the vma resource unbind fence. |
153 | * @vma_res: The vma resource. |
154 | * @lockdep_cookie: Pointer to a bool serving as a lockdep cooke that should |
155 | * be given as an argument to the pairing i915_vma_resource_unhold. |
156 | * |
157 | * If returning true, the function enters a dma_fence signalling critical |
158 | * section if not in one already. |
159 | * |
160 | * Return: true if holding successful, false if not. |
161 | */ |
162 | bool i915_vma_resource_hold(struct i915_vma_resource *vma_res, |
163 | bool *lockdep_cookie) |
164 | { |
165 | bool held = refcount_inc_not_zero(r: &vma_res->hold_count); |
166 | |
167 | if (held) |
168 | *lockdep_cookie = dma_fence_begin_signalling(); |
169 | |
170 | return held; |
171 | } |
172 | |
173 | static void i915_vma_resource_unbind_work(struct work_struct *work) |
174 | { |
175 | struct i915_vma_resource *vma_res = |
176 | container_of(work, typeof(*vma_res), work); |
177 | struct i915_address_space *vm = vma_res->vm; |
178 | bool lockdep_cookie; |
179 | |
180 | lockdep_cookie = dma_fence_begin_signalling(); |
181 | if (likely(!vma_res->skip_pte_rewrite)) |
182 | vma_res->ops->unbind_vma(vm, vma_res); |
183 | |
184 | dma_fence_end_signalling(cookie: lockdep_cookie); |
185 | __i915_vma_resource_unhold(vma_res); |
186 | i915_vma_resource_put(vma_res); |
187 | } |
188 | |
189 | static int |
190 | i915_vma_resource_fence_notify(struct i915_sw_fence *fence, |
191 | enum i915_sw_fence_notify state) |
192 | { |
193 | struct i915_vma_resource *vma_res = |
194 | container_of(fence, typeof(*vma_res), chain); |
195 | struct dma_fence *unbind_fence = |
196 | &vma_res->unbind_fence; |
197 | |
198 | switch (state) { |
199 | case FENCE_COMPLETE: |
200 | dma_fence_get(fence: unbind_fence); |
201 | if (vma_res->immediate_unbind) { |
202 | i915_vma_resource_unbind_work(work: &vma_res->work); |
203 | } else { |
204 | INIT_WORK(&vma_res->work, i915_vma_resource_unbind_work); |
205 | queue_work(wq: system_unbound_wq, work: &vma_res->work); |
206 | } |
207 | break; |
208 | case FENCE_FREE: |
209 | i915_vma_resource_put(vma_res); |
210 | break; |
211 | } |
212 | |
213 | return NOTIFY_DONE; |
214 | } |
215 | |
216 | /** |
217 | * i915_vma_resource_unbind - Unbind a vma resource |
218 | * @vma_res: The vma resource to unbind. |
219 | * @tlb: pointer to vma->obj->mm.tlb associated with the resource |
220 | * to be stored at vma_res->tlb. When not-NULL, it will be used |
221 | * to do TLB cache invalidation before freeing a VMA resource. |
222 | * Used only for async unbind. |
223 | * |
224 | * At this point this function does little more than publish a fence that |
225 | * signals immediately unless signaling is held back. |
226 | * |
227 | * Return: A refcounted pointer to a dma-fence that signals when unbinding is |
228 | * complete. |
229 | */ |
230 | struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res, |
231 | u32 *tlb) |
232 | { |
233 | struct i915_address_space *vm = vma_res->vm; |
234 | |
235 | vma_res->tlb = tlb; |
236 | |
237 | /* Reference for the sw fence */ |
238 | i915_vma_resource_get(vma_res); |
239 | |
240 | /* Caller must already have a wakeref in this case. */ |
241 | if (vma_res->needs_wakeref) |
242 | vma_res->wakeref = intel_runtime_pm_get_if_in_use(rpm: &vm->i915->runtime_pm); |
243 | |
244 | if (atomic_read(v: &vma_res->chain.pending) <= 1) { |
245 | RB_CLEAR_NODE(&vma_res->rb); |
246 | vma_res->immediate_unbind = 1; |
247 | } else { |
248 | vma_res_itree_insert(node: vma_res, root: &vma_res->vm->pending_unbind); |
249 | } |
250 | |
251 | i915_sw_fence_commit(fence: &vma_res->chain); |
252 | |
253 | return &vma_res->unbind_fence; |
254 | } |
255 | |
256 | /** |
257 | * __i915_vma_resource_init - Initialize a vma resource. |
258 | * @vma_res: The vma resource to initialize |
259 | * |
260 | * Initializes the private members of a vma resource. |
261 | */ |
262 | void __i915_vma_resource_init(struct i915_vma_resource *vma_res) |
263 | { |
264 | spin_lock_init(&vma_res->lock); |
265 | dma_fence_init(fence: &vma_res->unbind_fence, ops: &unbind_fence_ops, |
266 | lock: &vma_res->lock, context: 0, seqno: 0); |
267 | refcount_set(r: &vma_res->hold_count, n: 1); |
268 | i915_sw_fence_init(&vma_res->chain, i915_vma_resource_fence_notify); |
269 | } |
270 | |
271 | static void |
272 | i915_vma_resource_color_adjust_range(struct i915_address_space *vm, |
273 | u64 *start, |
274 | u64 *end) |
275 | { |
276 | if (i915_vm_has_cache_coloring(vm)) { |
277 | if (*start) |
278 | *start -= I915_GTT_PAGE_SIZE; |
279 | *end += I915_GTT_PAGE_SIZE; |
280 | } |
281 | } |
282 | |
283 | /** |
284 | * i915_vma_resource_bind_dep_sync - Wait for / sync all unbinds touching a |
285 | * certain vm range. |
286 | * @vm: The vm to look at. |
287 | * @offset: The range start. |
288 | * @size: The range size. |
289 | * @intr: Whether to wait interrubtible. |
290 | * |
291 | * The function needs to be called with the vm lock held. |
292 | * |
293 | * Return: Zero on success, -ERESTARTSYS if interrupted and @intr==true |
294 | */ |
295 | int i915_vma_resource_bind_dep_sync(struct i915_address_space *vm, |
296 | u64 offset, |
297 | u64 size, |
298 | bool intr) |
299 | { |
300 | struct i915_vma_resource *node; |
301 | u64 last = offset + size - 1; |
302 | |
303 | lockdep_assert_held(&vm->mutex); |
304 | might_sleep(); |
305 | |
306 | i915_vma_resource_color_adjust_range(vm, start: &offset, end: &last); |
307 | node = vma_res_itree_iter_first(root: &vm->pending_unbind, start: offset, last); |
308 | while (node) { |
309 | int ret = dma_fence_wait(fence: &node->unbind_fence, intr); |
310 | |
311 | if (ret) |
312 | return ret; |
313 | |
314 | node = vma_res_itree_iter_next(node, start: offset, last); |
315 | } |
316 | |
317 | return 0; |
318 | } |
319 | |
320 | /** |
321 | * i915_vma_resource_bind_dep_sync_all - Wait for / sync all unbinds of a vm, |
322 | * releasing the vm lock while waiting. |
323 | * @vm: The vm to look at. |
324 | * |
325 | * The function may not be called with the vm lock held. |
326 | * Typically this is called at vm destruction to finish any pending |
327 | * unbind operations. The vm mutex is released while waiting to avoid |
328 | * stalling kernel workqueues trying to grab the mutex. |
329 | */ |
330 | void i915_vma_resource_bind_dep_sync_all(struct i915_address_space *vm) |
331 | { |
332 | struct i915_vma_resource *node; |
333 | struct dma_fence *fence; |
334 | |
335 | do { |
336 | fence = NULL; |
337 | mutex_lock(&vm->mutex); |
338 | node = vma_res_itree_iter_first(root: &vm->pending_unbind, start: 0, |
339 | U64_MAX); |
340 | if (node) |
341 | fence = dma_fence_get_rcu(fence: &node->unbind_fence); |
342 | mutex_unlock(lock: &vm->mutex); |
343 | |
344 | if (fence) { |
345 | /* |
346 | * The wait makes sure the node eventually removes |
347 | * itself from the tree. |
348 | */ |
349 | dma_fence_wait(fence, intr: false); |
350 | dma_fence_put(fence); |
351 | } |
352 | } while (node); |
353 | } |
354 | |
355 | /** |
356 | * i915_vma_resource_bind_dep_await - Have a struct i915_sw_fence await all |
357 | * pending unbinds in a certain range of a vm. |
358 | * @vm: The vm to look at. |
359 | * @sw_fence: The struct i915_sw_fence that will be awaiting the unbinds. |
360 | * @offset: The range start. |
361 | * @size: The range size. |
362 | * @intr: Whether to wait interrubtible. |
363 | * @gfp: Allocation mode for memory allocations. |
364 | * |
365 | * The function makes @sw_fence await all pending unbinds in a certain |
366 | * vm range before calling the complete notifier. To be able to await |
367 | * each individual unbind, the function needs to allocate memory using |
368 | * the @gpf allocation mode. If that fails, the function will instead |
369 | * wait for the unbind fence to signal, using @intr to judge whether to |
370 | * wait interruptible or not. Note that @gfp should ideally be selected so |
371 | * as to avoid any expensive memory allocation stalls and rather fail and |
372 | * synchronize itself. For now the vm mutex is required when calling this |
373 | * function with means that @gfp can't call into direct reclaim. In reality |
374 | * this means that during heavy memory pressure, we will sync in this |
375 | * function. |
376 | * |
377 | * Return: Zero on success, -ERESTARTSYS if interrupted and @intr==true |
378 | */ |
379 | int i915_vma_resource_bind_dep_await(struct i915_address_space *vm, |
380 | struct i915_sw_fence *sw_fence, |
381 | u64 offset, |
382 | u64 size, |
383 | bool intr, |
384 | gfp_t gfp) |
385 | { |
386 | struct i915_vma_resource *node; |
387 | u64 last = offset + size - 1; |
388 | |
389 | lockdep_assert_held(&vm->mutex); |
390 | might_alloc(gfp_mask: gfp); |
391 | might_sleep(); |
392 | |
393 | i915_vma_resource_color_adjust_range(vm, start: &offset, end: &last); |
394 | node = vma_res_itree_iter_first(root: &vm->pending_unbind, start: offset, last); |
395 | while (node) { |
396 | int ret; |
397 | |
398 | ret = i915_sw_fence_await_dma_fence(fence: sw_fence, |
399 | dma: &node->unbind_fence, |
400 | timeout: 0, gfp); |
401 | if (ret < 0) { |
402 | ret = dma_fence_wait(fence: &node->unbind_fence, intr); |
403 | if (ret) |
404 | return ret; |
405 | } |
406 | |
407 | node = vma_res_itree_iter_next(node, start: offset, last); |
408 | } |
409 | |
410 | return 0; |
411 | } |
412 | |
413 | void i915_vma_resource_module_exit(void) |
414 | { |
415 | kmem_cache_destroy(s: slab_vma_resources); |
416 | } |
417 | |
418 | int __init i915_vma_resource_module_init(void) |
419 | { |
420 | slab_vma_resources = KMEM_CACHE(i915_vma_resource, SLAB_HWCACHE_ALIGN); |
421 | if (!slab_vma_resources) |
422 | return -ENOMEM; |
423 | |
424 | return 0; |
425 | } |
426 | |