1// SPDX-License-Identifier: MIT
2/*
3 * Copyright © 2021 Intel Corporation
4 */
5
6#include <linux/interval_tree_generic.h>
7#include <linux/sched/mm.h>
8
9#include "i915_sw_fence.h"
10#include "i915_vma_resource.h"
11#include "i915_drv.h"
12#include "intel_memory_region.h"
13
14#include "gt/intel_gtt.h"
15
16static struct kmem_cache *slab_vma_resources;
17
18/**
19 * DOC:
20 * We use a per-vm interval tree to keep track of vma_resources
21 * scheduled for unbind but not yet unbound. The tree is protected by
22 * the vm mutex, and nodes are removed just after the unbind fence signals.
23 * The removal takes the vm mutex from a kernel thread which we need to
24 * keep in mind so that we don't grab the mutex and try to wait for all
25 * pending unbinds to complete, because that will temporaryily block many
26 * of the workqueue threads, and people will get angry.
27 *
28 * We should consider using a single ordered fence per VM instead but that
29 * requires ordering the unbinds and might introduce unnecessary waiting
30 * for unrelated unbinds. Amount of code will probably be roughly the same
31 * due to the simplicity of using the interval tree interface.
32 *
33 * Another drawback of this interval tree is that the complexity of insertion
34 * and removal of fences increases as O(ln(pending_unbinds)) instead of
35 * O(1) for a single fence without interval tree.
36 */
37#define VMA_RES_START(_node) ((_node)->start - (_node)->guard)
38#define VMA_RES_LAST(_node) ((_node)->start + (_node)->node_size + (_node)->guard - 1)
39INTERVAL_TREE_DEFINE(struct i915_vma_resource, rb,
40 u64, __subtree_last,
41 VMA_RES_START, VMA_RES_LAST, static, vma_res_itree);
42
43/* Callbacks for the unbind dma-fence. */
44
45/**
46 * i915_vma_resource_alloc - Allocate a vma resource
47 *
48 * Return: A pointer to a cleared struct i915_vma_resource or
49 * a -ENOMEM error pointer if allocation fails.
50 */
51struct i915_vma_resource *i915_vma_resource_alloc(void)
52{
53 struct i915_vma_resource *vma_res =
54 kmem_cache_zalloc(k: slab_vma_resources, GFP_KERNEL);
55
56 return vma_res ? vma_res : ERR_PTR(error: -ENOMEM);
57}
58
59/**
60 * i915_vma_resource_free - Free a vma resource
61 * @vma_res: The vma resource to free.
62 */
63void i915_vma_resource_free(struct i915_vma_resource *vma_res)
64{
65 if (vma_res)
66 kmem_cache_free(s: slab_vma_resources, objp: vma_res);
67}
68
69static const char *get_driver_name(struct dma_fence *fence)
70{
71 return "vma unbind fence";
72}
73
74static const char *get_timeline_name(struct dma_fence *fence)
75{
76 return "unbound";
77}
78
79static void unbind_fence_free_rcu(struct rcu_head *head)
80{
81 struct i915_vma_resource *vma_res =
82 container_of(head, typeof(*vma_res), unbind_fence.rcu);
83
84 i915_vma_resource_free(vma_res);
85}
86
87static void unbind_fence_release(struct dma_fence *fence)
88{
89 struct i915_vma_resource *vma_res =
90 container_of(fence, typeof(*vma_res), unbind_fence);
91
92 i915_sw_fence_fini(fence: &vma_res->chain);
93
94 call_rcu(head: &fence->rcu, func: unbind_fence_free_rcu);
95}
96
97static const struct dma_fence_ops unbind_fence_ops = {
98 .get_driver_name = get_driver_name,
99 .get_timeline_name = get_timeline_name,
100 .release = unbind_fence_release,
101};
102
103static void __i915_vma_resource_unhold(struct i915_vma_resource *vma_res)
104{
105 struct i915_address_space *vm;
106
107 if (!refcount_dec_and_test(r: &vma_res->hold_count))
108 return;
109
110 dma_fence_signal(fence: &vma_res->unbind_fence);
111
112 vm = vma_res->vm;
113 if (vma_res->wakeref)
114 intel_runtime_pm_put(rpm: &vm->i915->runtime_pm, wref: vma_res->wakeref);
115
116 vma_res->vm = NULL;
117 if (!RB_EMPTY_NODE(&vma_res->rb)) {
118 mutex_lock(&vm->mutex);
119 vma_res_itree_remove(node: vma_res, root: &vm->pending_unbind);
120 mutex_unlock(lock: &vm->mutex);
121 }
122
123 if (vma_res->bi.pages_rsgt)
124 i915_refct_sgt_put(rsgt: vma_res->bi.pages_rsgt);
125}
126
127/**
128 * i915_vma_resource_unhold - Unhold the signaling of the vma resource unbind
129 * fence.
130 * @vma_res: The vma resource.
131 * @lockdep_cookie: The lockdep cookie returned from i915_vma_resource_hold.
132 *
133 * The function may leave a dma_fence critical section.
134 */
135void i915_vma_resource_unhold(struct i915_vma_resource *vma_res,
136 bool lockdep_cookie)
137{
138 dma_fence_end_signalling(cookie: lockdep_cookie);
139
140 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
141 unsigned long irq_flags;
142
143 /* Inefficient open-coded might_lock_irqsave() */
144 spin_lock_irqsave(&vma_res->lock, irq_flags);
145 spin_unlock_irqrestore(lock: &vma_res->lock, flags: irq_flags);
146 }
147
148 __i915_vma_resource_unhold(vma_res);
149}
150
151/**
152 * i915_vma_resource_hold - Hold the signaling of the vma resource unbind fence.
153 * @vma_res: The vma resource.
154 * @lockdep_cookie: Pointer to a bool serving as a lockdep cooke that should
155 * be given as an argument to the pairing i915_vma_resource_unhold.
156 *
157 * If returning true, the function enters a dma_fence signalling critical
158 * section if not in one already.
159 *
160 * Return: true if holding successful, false if not.
161 */
162bool i915_vma_resource_hold(struct i915_vma_resource *vma_res,
163 bool *lockdep_cookie)
164{
165 bool held = refcount_inc_not_zero(r: &vma_res->hold_count);
166
167 if (held)
168 *lockdep_cookie = dma_fence_begin_signalling();
169
170 return held;
171}
172
173static void i915_vma_resource_unbind_work(struct work_struct *work)
174{
175 struct i915_vma_resource *vma_res =
176 container_of(work, typeof(*vma_res), work);
177 struct i915_address_space *vm = vma_res->vm;
178 bool lockdep_cookie;
179
180 lockdep_cookie = dma_fence_begin_signalling();
181 if (likely(!vma_res->skip_pte_rewrite))
182 vma_res->ops->unbind_vma(vm, vma_res);
183
184 dma_fence_end_signalling(cookie: lockdep_cookie);
185 __i915_vma_resource_unhold(vma_res);
186 i915_vma_resource_put(vma_res);
187}
188
189static int
190i915_vma_resource_fence_notify(struct i915_sw_fence *fence,
191 enum i915_sw_fence_notify state)
192{
193 struct i915_vma_resource *vma_res =
194 container_of(fence, typeof(*vma_res), chain);
195 struct dma_fence *unbind_fence =
196 &vma_res->unbind_fence;
197
198 switch (state) {
199 case FENCE_COMPLETE:
200 dma_fence_get(fence: unbind_fence);
201 if (vma_res->immediate_unbind) {
202 i915_vma_resource_unbind_work(work: &vma_res->work);
203 } else {
204 INIT_WORK(&vma_res->work, i915_vma_resource_unbind_work);
205 queue_work(wq: system_unbound_wq, work: &vma_res->work);
206 }
207 break;
208 case FENCE_FREE:
209 i915_vma_resource_put(vma_res);
210 break;
211 }
212
213 return NOTIFY_DONE;
214}
215
216/**
217 * i915_vma_resource_unbind - Unbind a vma resource
218 * @vma_res: The vma resource to unbind.
219 * @tlb: pointer to vma->obj->mm.tlb associated with the resource
220 * to be stored at vma_res->tlb. When not-NULL, it will be used
221 * to do TLB cache invalidation before freeing a VMA resource.
222 * Used only for async unbind.
223 *
224 * At this point this function does little more than publish a fence that
225 * signals immediately unless signaling is held back.
226 *
227 * Return: A refcounted pointer to a dma-fence that signals when unbinding is
228 * complete.
229 */
230struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res,
231 u32 *tlb)
232{
233 struct i915_address_space *vm = vma_res->vm;
234
235 vma_res->tlb = tlb;
236
237 /* Reference for the sw fence */
238 i915_vma_resource_get(vma_res);
239
240 /* Caller must already have a wakeref in this case. */
241 if (vma_res->needs_wakeref)
242 vma_res->wakeref = intel_runtime_pm_get_if_in_use(rpm: &vm->i915->runtime_pm);
243
244 if (atomic_read(v: &vma_res->chain.pending) <= 1) {
245 RB_CLEAR_NODE(&vma_res->rb);
246 vma_res->immediate_unbind = 1;
247 } else {
248 vma_res_itree_insert(node: vma_res, root: &vma_res->vm->pending_unbind);
249 }
250
251 i915_sw_fence_commit(fence: &vma_res->chain);
252
253 return &vma_res->unbind_fence;
254}
255
256/**
257 * __i915_vma_resource_init - Initialize a vma resource.
258 * @vma_res: The vma resource to initialize
259 *
260 * Initializes the private members of a vma resource.
261 */
262void __i915_vma_resource_init(struct i915_vma_resource *vma_res)
263{
264 spin_lock_init(&vma_res->lock);
265 dma_fence_init(fence: &vma_res->unbind_fence, ops: &unbind_fence_ops,
266 lock: &vma_res->lock, context: 0, seqno: 0);
267 refcount_set(r: &vma_res->hold_count, n: 1);
268 i915_sw_fence_init(&vma_res->chain, i915_vma_resource_fence_notify);
269}
270
271static void
272i915_vma_resource_color_adjust_range(struct i915_address_space *vm,
273 u64 *start,
274 u64 *end)
275{
276 if (i915_vm_has_cache_coloring(vm)) {
277 if (*start)
278 *start -= I915_GTT_PAGE_SIZE;
279 *end += I915_GTT_PAGE_SIZE;
280 }
281}
282
283/**
284 * i915_vma_resource_bind_dep_sync - Wait for / sync all unbinds touching a
285 * certain vm range.
286 * @vm: The vm to look at.
287 * @offset: The range start.
288 * @size: The range size.
289 * @intr: Whether to wait interrubtible.
290 *
291 * The function needs to be called with the vm lock held.
292 *
293 * Return: Zero on success, -ERESTARTSYS if interrupted and @intr==true
294 */
295int i915_vma_resource_bind_dep_sync(struct i915_address_space *vm,
296 u64 offset,
297 u64 size,
298 bool intr)
299{
300 struct i915_vma_resource *node;
301 u64 last = offset + size - 1;
302
303 lockdep_assert_held(&vm->mutex);
304 might_sleep();
305
306 i915_vma_resource_color_adjust_range(vm, start: &offset, end: &last);
307 node = vma_res_itree_iter_first(root: &vm->pending_unbind, start: offset, last);
308 while (node) {
309 int ret = dma_fence_wait(fence: &node->unbind_fence, intr);
310
311 if (ret)
312 return ret;
313
314 node = vma_res_itree_iter_next(node, start: offset, last);
315 }
316
317 return 0;
318}
319
320/**
321 * i915_vma_resource_bind_dep_sync_all - Wait for / sync all unbinds of a vm,
322 * releasing the vm lock while waiting.
323 * @vm: The vm to look at.
324 *
325 * The function may not be called with the vm lock held.
326 * Typically this is called at vm destruction to finish any pending
327 * unbind operations. The vm mutex is released while waiting to avoid
328 * stalling kernel workqueues trying to grab the mutex.
329 */
330void i915_vma_resource_bind_dep_sync_all(struct i915_address_space *vm)
331{
332 struct i915_vma_resource *node;
333 struct dma_fence *fence;
334
335 do {
336 fence = NULL;
337 mutex_lock(&vm->mutex);
338 node = vma_res_itree_iter_first(root: &vm->pending_unbind, start: 0,
339 U64_MAX);
340 if (node)
341 fence = dma_fence_get_rcu(fence: &node->unbind_fence);
342 mutex_unlock(lock: &vm->mutex);
343
344 if (fence) {
345 /*
346 * The wait makes sure the node eventually removes
347 * itself from the tree.
348 */
349 dma_fence_wait(fence, intr: false);
350 dma_fence_put(fence);
351 }
352 } while (node);
353}
354
355/**
356 * i915_vma_resource_bind_dep_await - Have a struct i915_sw_fence await all
357 * pending unbinds in a certain range of a vm.
358 * @vm: The vm to look at.
359 * @sw_fence: The struct i915_sw_fence that will be awaiting the unbinds.
360 * @offset: The range start.
361 * @size: The range size.
362 * @intr: Whether to wait interrubtible.
363 * @gfp: Allocation mode for memory allocations.
364 *
365 * The function makes @sw_fence await all pending unbinds in a certain
366 * vm range before calling the complete notifier. To be able to await
367 * each individual unbind, the function needs to allocate memory using
368 * the @gpf allocation mode. If that fails, the function will instead
369 * wait for the unbind fence to signal, using @intr to judge whether to
370 * wait interruptible or not. Note that @gfp should ideally be selected so
371 * as to avoid any expensive memory allocation stalls and rather fail and
372 * synchronize itself. For now the vm mutex is required when calling this
373 * function with means that @gfp can't call into direct reclaim. In reality
374 * this means that during heavy memory pressure, we will sync in this
375 * function.
376 *
377 * Return: Zero on success, -ERESTARTSYS if interrupted and @intr==true
378 */
379int i915_vma_resource_bind_dep_await(struct i915_address_space *vm,
380 struct i915_sw_fence *sw_fence,
381 u64 offset,
382 u64 size,
383 bool intr,
384 gfp_t gfp)
385{
386 struct i915_vma_resource *node;
387 u64 last = offset + size - 1;
388
389 lockdep_assert_held(&vm->mutex);
390 might_alloc(gfp_mask: gfp);
391 might_sleep();
392
393 i915_vma_resource_color_adjust_range(vm, start: &offset, end: &last);
394 node = vma_res_itree_iter_first(root: &vm->pending_unbind, start: offset, last);
395 while (node) {
396 int ret;
397
398 ret = i915_sw_fence_await_dma_fence(fence: sw_fence,
399 dma: &node->unbind_fence,
400 timeout: 0, gfp);
401 if (ret < 0) {
402 ret = dma_fence_wait(fence: &node->unbind_fence, intr);
403 if (ret)
404 return ret;
405 }
406
407 node = vma_res_itree_iter_next(node, start: offset, last);
408 }
409
410 return 0;
411}
412
413void i915_vma_resource_module_exit(void)
414{
415 kmem_cache_destroy(s: slab_vma_resources);
416}
417
418int __init i915_vma_resource_module_init(void)
419{
420 slab_vma_resources = KMEM_CACHE(i915_vma_resource, SLAB_HWCACHE_ALIGN);
421 if (!slab_vma_resources)
422 return -ENOMEM;
423
424 return 0;
425}
426

source code of linux/drivers/gpu/drm/i915/i915_vma_resource.c