1 | /* |
2 | * SPDX-License-Identifier: MIT |
3 | * |
4 | * Copyright © 2018 Intel Corporation |
5 | */ |
6 | |
7 | #include <linux/mutex.h> |
8 | |
9 | #include "i915_drv.h" |
10 | #include "i915_request.h" |
11 | #include "i915_scheduler.h" |
12 | |
13 | static struct kmem_cache *slab_dependencies; |
14 | static struct kmem_cache *slab_priorities; |
15 | |
16 | static DEFINE_SPINLOCK(schedule_lock); |
17 | |
18 | static const struct i915_request * |
19 | node_to_request(const struct i915_sched_node *node) |
20 | { |
21 | return container_of(node, const struct i915_request, sched); |
22 | } |
23 | |
24 | static inline bool node_started(const struct i915_sched_node *node) |
25 | { |
26 | return i915_request_started(rq: node_to_request(node)); |
27 | } |
28 | |
29 | static inline bool node_signaled(const struct i915_sched_node *node) |
30 | { |
31 | return i915_request_completed(rq: node_to_request(node)); |
32 | } |
33 | |
34 | static inline struct i915_priolist *to_priolist(struct rb_node *rb) |
35 | { |
36 | return rb_entry(rb, struct i915_priolist, node); |
37 | } |
38 | |
39 | static void assert_priolists(struct i915_sched_engine * const sched_engine) |
40 | { |
41 | struct rb_node *rb; |
42 | long last_prio; |
43 | |
44 | if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) |
45 | return; |
46 | |
47 | GEM_BUG_ON(rb_first_cached(&sched_engine->queue) != |
48 | rb_first(&sched_engine->queue.rb_root)); |
49 | |
50 | last_prio = INT_MAX; |
51 | for (rb = rb_first_cached(&sched_engine->queue); rb; rb = rb_next(rb)) { |
52 | const struct i915_priolist *p = to_priolist(rb); |
53 | |
54 | GEM_BUG_ON(p->priority > last_prio); |
55 | last_prio = p->priority; |
56 | } |
57 | } |
58 | |
59 | struct list_head * |
60 | i915_sched_lookup_priolist(struct i915_sched_engine *sched_engine, int prio) |
61 | { |
62 | struct i915_priolist *p; |
63 | struct rb_node **parent, *rb; |
64 | bool first = true; |
65 | |
66 | lockdep_assert_held(&sched_engine->lock); |
67 | assert_priolists(sched_engine); |
68 | |
69 | if (unlikely(sched_engine->no_priolist)) |
70 | prio = I915_PRIORITY_NORMAL; |
71 | |
72 | find_priolist: |
73 | /* most positive priority is scheduled first, equal priorities fifo */ |
74 | rb = NULL; |
75 | parent = &sched_engine->queue.rb_root.rb_node; |
76 | while (*parent) { |
77 | rb = *parent; |
78 | p = to_priolist(rb); |
79 | if (prio > p->priority) { |
80 | parent = &rb->rb_left; |
81 | } else if (prio < p->priority) { |
82 | parent = &rb->rb_right; |
83 | first = false; |
84 | } else { |
85 | return &p->requests; |
86 | } |
87 | } |
88 | |
89 | if (prio == I915_PRIORITY_NORMAL) { |
90 | p = &sched_engine->default_priolist; |
91 | } else { |
92 | p = kmem_cache_alloc(cachep: slab_priorities, GFP_ATOMIC); |
93 | /* Convert an allocation failure to a priority bump */ |
94 | if (unlikely(!p)) { |
95 | prio = I915_PRIORITY_NORMAL; /* recurses just once */ |
96 | |
97 | /* To maintain ordering with all rendering, after an |
98 | * allocation failure we have to disable all scheduling. |
99 | * Requests will then be executed in fifo, and schedule |
100 | * will ensure that dependencies are emitted in fifo. |
101 | * There will be still some reordering with existing |
102 | * requests, so if userspace lied about their |
103 | * dependencies that reordering may be visible. |
104 | */ |
105 | sched_engine->no_priolist = true; |
106 | goto find_priolist; |
107 | } |
108 | } |
109 | |
110 | p->priority = prio; |
111 | INIT_LIST_HEAD(list: &p->requests); |
112 | |
113 | rb_link_node(node: &p->node, parent: rb, rb_link: parent); |
114 | rb_insert_color_cached(node: &p->node, root: &sched_engine->queue, leftmost: first); |
115 | |
116 | return &p->requests; |
117 | } |
118 | |
119 | void __i915_priolist_free(struct i915_priolist *p) |
120 | { |
121 | kmem_cache_free(s: slab_priorities, objp: p); |
122 | } |
123 | |
124 | struct sched_cache { |
125 | struct list_head *priolist; |
126 | }; |
127 | |
128 | static struct i915_sched_engine * |
129 | lock_sched_engine(struct i915_sched_node *node, |
130 | struct i915_sched_engine *locked, |
131 | struct sched_cache *cache) |
132 | { |
133 | const struct i915_request *rq = node_to_request(node); |
134 | struct i915_sched_engine *sched_engine; |
135 | |
136 | GEM_BUG_ON(!locked); |
137 | |
138 | /* |
139 | * Virtual engines complicate acquiring the engine timeline lock, |
140 | * as their rq->engine pointer is not stable until under that |
141 | * engine lock. The simple ploy we use is to take the lock then |
142 | * check that the rq still belongs to the newly locked engine. |
143 | */ |
144 | while (locked != (sched_engine = READ_ONCE(rq->engine)->sched_engine)) { |
145 | spin_unlock(lock: &locked->lock); |
146 | memset(cache, 0, sizeof(*cache)); |
147 | spin_lock(lock: &sched_engine->lock); |
148 | locked = sched_engine; |
149 | } |
150 | |
151 | GEM_BUG_ON(locked != sched_engine); |
152 | return locked; |
153 | } |
154 | |
155 | static void __i915_schedule(struct i915_sched_node *node, |
156 | const struct i915_sched_attr *attr) |
157 | { |
158 | const int prio = max(attr->priority, node->attr.priority); |
159 | struct i915_sched_engine *sched_engine; |
160 | struct i915_dependency *dep, *p; |
161 | struct i915_dependency stack; |
162 | struct sched_cache cache; |
163 | LIST_HEAD(dfs); |
164 | |
165 | /* Needed in order to use the temporary link inside i915_dependency */ |
166 | lockdep_assert_held(&schedule_lock); |
167 | GEM_BUG_ON(prio == I915_PRIORITY_INVALID); |
168 | |
169 | if (node_signaled(node)) |
170 | return; |
171 | |
172 | stack.signaler = node; |
173 | list_add(new: &stack.dfs_link, head: &dfs); |
174 | |
175 | /* |
176 | * Recursively bump all dependent priorities to match the new request. |
177 | * |
178 | * A naive approach would be to use recursion: |
179 | * static void update_priorities(struct i915_sched_node *node, prio) { |
180 | * list_for_each_entry(dep, &node->signalers_list, signal_link) |
181 | * update_priorities(dep->signal, prio) |
182 | * queue_request(node); |
183 | * } |
184 | * but that may have unlimited recursion depth and so runs a very |
185 | * real risk of overunning the kernel stack. Instead, we build |
186 | * a flat list of all dependencies starting with the current request. |
187 | * As we walk the list of dependencies, we add all of its dependencies |
188 | * to the end of the list (this may include an already visited |
189 | * request) and continue to walk onwards onto the new dependencies. The |
190 | * end result is a topological list of requests in reverse order, the |
191 | * last element in the list is the request we must execute first. |
192 | */ |
193 | list_for_each_entry(dep, &dfs, dfs_link) { |
194 | struct i915_sched_node *node = dep->signaler; |
195 | |
196 | /* If we are already flying, we know we have no signalers */ |
197 | if (node_started(node)) |
198 | continue; |
199 | |
200 | /* |
201 | * Within an engine, there can be no cycle, but we may |
202 | * refer to the same dependency chain multiple times |
203 | * (redundant dependencies are not eliminated) and across |
204 | * engines. |
205 | */ |
206 | list_for_each_entry(p, &node->signalers_list, signal_link) { |
207 | GEM_BUG_ON(p == dep); /* no cycles! */ |
208 | |
209 | if (node_signaled(node: p->signaler)) |
210 | continue; |
211 | |
212 | if (prio > READ_ONCE(p->signaler->attr.priority)) |
213 | list_move_tail(list: &p->dfs_link, head: &dfs); |
214 | } |
215 | } |
216 | |
217 | /* |
218 | * If we didn't need to bump any existing priorities, and we haven't |
219 | * yet submitted this request (i.e. there is no potential race with |
220 | * execlists_submit_request()), we can set our own priority and skip |
221 | * acquiring the engine locks. |
222 | */ |
223 | if (node->attr.priority == I915_PRIORITY_INVALID) { |
224 | GEM_BUG_ON(!list_empty(&node->link)); |
225 | node->attr = *attr; |
226 | |
227 | if (stack.dfs_link.next == stack.dfs_link.prev) |
228 | return; |
229 | |
230 | __list_del_entry(entry: &stack.dfs_link); |
231 | } |
232 | |
233 | memset(&cache, 0, sizeof(cache)); |
234 | sched_engine = node_to_request(node)->engine->sched_engine; |
235 | spin_lock(lock: &sched_engine->lock); |
236 | |
237 | /* Fifo and depth-first replacement ensure our deps execute before us */ |
238 | sched_engine = lock_sched_engine(node, locked: sched_engine, cache: &cache); |
239 | list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) { |
240 | struct i915_request *from = container_of(dep->signaler, |
241 | struct i915_request, |
242 | sched); |
243 | INIT_LIST_HEAD(list: &dep->dfs_link); |
244 | |
245 | node = dep->signaler; |
246 | sched_engine = lock_sched_engine(node, locked: sched_engine, cache: &cache); |
247 | lockdep_assert_held(&sched_engine->lock); |
248 | |
249 | /* Recheck after acquiring the engine->timeline.lock */ |
250 | if (prio <= node->attr.priority || node_signaled(node)) |
251 | continue; |
252 | |
253 | GEM_BUG_ON(node_to_request(node)->engine->sched_engine != |
254 | sched_engine); |
255 | |
256 | /* Must be called before changing the nodes priority */ |
257 | if (sched_engine->bump_inflight_request_prio) |
258 | sched_engine->bump_inflight_request_prio(from, prio); |
259 | |
260 | WRITE_ONCE(node->attr.priority, prio); |
261 | |
262 | /* |
263 | * Once the request is ready, it will be placed into the |
264 | * priority lists and then onto the HW runlist. Before the |
265 | * request is ready, it does not contribute to our preemption |
266 | * decisions and we can safely ignore it, as it will, and |
267 | * any preemption required, be dealt with upon submission. |
268 | * See engine->submit_request() |
269 | */ |
270 | if (list_empty(head: &node->link)) |
271 | continue; |
272 | |
273 | if (i915_request_in_priority_queue(rq: node_to_request(node))) { |
274 | if (!cache.priolist) |
275 | cache.priolist = |
276 | i915_sched_lookup_priolist(sched_engine, |
277 | prio); |
278 | list_move_tail(list: &node->link, head: cache.priolist); |
279 | } |
280 | |
281 | /* Defer (tasklet) submission until after all of our updates. */ |
282 | if (sched_engine->kick_backend) |
283 | sched_engine->kick_backend(node_to_request(node), prio); |
284 | } |
285 | |
286 | spin_unlock(lock: &sched_engine->lock); |
287 | } |
288 | |
289 | void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr) |
290 | { |
291 | spin_lock_irq(lock: &schedule_lock); |
292 | __i915_schedule(node: &rq->sched, attr); |
293 | spin_unlock_irq(lock: &schedule_lock); |
294 | } |
295 | |
296 | void i915_sched_node_init(struct i915_sched_node *node) |
297 | { |
298 | INIT_LIST_HEAD(list: &node->signalers_list); |
299 | INIT_LIST_HEAD(list: &node->waiters_list); |
300 | INIT_LIST_HEAD(list: &node->link); |
301 | |
302 | i915_sched_node_reinit(node); |
303 | } |
304 | |
305 | void i915_sched_node_reinit(struct i915_sched_node *node) |
306 | { |
307 | node->attr.priority = I915_PRIORITY_INVALID; |
308 | node->semaphores = 0; |
309 | node->flags = 0; |
310 | |
311 | GEM_BUG_ON(!list_empty(&node->signalers_list)); |
312 | GEM_BUG_ON(!list_empty(&node->waiters_list)); |
313 | GEM_BUG_ON(!list_empty(&node->link)); |
314 | } |
315 | |
316 | static struct i915_dependency * |
317 | i915_dependency_alloc(void) |
318 | { |
319 | return kmem_cache_alloc(cachep: slab_dependencies, GFP_KERNEL); |
320 | } |
321 | |
322 | static void |
323 | i915_dependency_free(struct i915_dependency *dep) |
324 | { |
325 | kmem_cache_free(s: slab_dependencies, objp: dep); |
326 | } |
327 | |
328 | bool __i915_sched_node_add_dependency(struct i915_sched_node *node, |
329 | struct i915_sched_node *signal, |
330 | struct i915_dependency *dep, |
331 | unsigned long flags) |
332 | { |
333 | bool ret = false; |
334 | |
335 | spin_lock_irq(lock: &schedule_lock); |
336 | |
337 | if (!node_signaled(node: signal)) { |
338 | INIT_LIST_HEAD(list: &dep->dfs_link); |
339 | dep->signaler = signal; |
340 | dep->waiter = node; |
341 | dep->flags = flags; |
342 | |
343 | /* All set, now publish. Beware the lockless walkers. */ |
344 | list_add_rcu(new: &dep->signal_link, head: &node->signalers_list); |
345 | list_add_rcu(new: &dep->wait_link, head: &signal->waiters_list); |
346 | |
347 | /* Propagate the chains */ |
348 | node->flags |= signal->flags; |
349 | ret = true; |
350 | } |
351 | |
352 | spin_unlock_irq(lock: &schedule_lock); |
353 | |
354 | return ret; |
355 | } |
356 | |
357 | int i915_sched_node_add_dependency(struct i915_sched_node *node, |
358 | struct i915_sched_node *signal, |
359 | unsigned long flags) |
360 | { |
361 | struct i915_dependency *dep; |
362 | |
363 | dep = i915_dependency_alloc(); |
364 | if (!dep) |
365 | return -ENOMEM; |
366 | |
367 | if (!__i915_sched_node_add_dependency(node, signal, dep, |
368 | flags: flags | I915_DEPENDENCY_ALLOC)) |
369 | i915_dependency_free(dep); |
370 | |
371 | return 0; |
372 | } |
373 | |
374 | void i915_sched_node_fini(struct i915_sched_node *node) |
375 | { |
376 | struct i915_dependency *dep, *tmp; |
377 | |
378 | spin_lock_irq(lock: &schedule_lock); |
379 | |
380 | /* |
381 | * Everyone we depended upon (the fences we wait to be signaled) |
382 | * should retire before us and remove themselves from our list. |
383 | * However, retirement is run independently on each timeline and |
384 | * so we may be called out-of-order. |
385 | */ |
386 | list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) { |
387 | GEM_BUG_ON(!list_empty(&dep->dfs_link)); |
388 | |
389 | list_del_rcu(entry: &dep->wait_link); |
390 | if (dep->flags & I915_DEPENDENCY_ALLOC) |
391 | i915_dependency_free(dep); |
392 | } |
393 | INIT_LIST_HEAD(list: &node->signalers_list); |
394 | |
395 | /* Remove ourselves from everyone who depends upon us */ |
396 | list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) { |
397 | GEM_BUG_ON(dep->signaler != node); |
398 | GEM_BUG_ON(!list_empty(&dep->dfs_link)); |
399 | |
400 | list_del_rcu(entry: &dep->signal_link); |
401 | if (dep->flags & I915_DEPENDENCY_ALLOC) |
402 | i915_dependency_free(dep); |
403 | } |
404 | INIT_LIST_HEAD(list: &node->waiters_list); |
405 | |
406 | spin_unlock_irq(lock: &schedule_lock); |
407 | } |
408 | |
409 | void i915_request_show_with_schedule(struct drm_printer *m, |
410 | const struct i915_request *rq, |
411 | const char *prefix, |
412 | int indent) |
413 | { |
414 | struct i915_dependency *dep; |
415 | |
416 | i915_request_show(m, rq, prefix, indent); |
417 | if (i915_request_completed(rq)) |
418 | return; |
419 | |
420 | rcu_read_lock(); |
421 | for_each_signaler(dep, rq) { |
422 | const struct i915_request *signaler = |
423 | node_to_request(node: dep->signaler); |
424 | |
425 | /* Dependencies along the same timeline are expected. */ |
426 | if (signaler->timeline == rq->timeline) |
427 | continue; |
428 | |
429 | if (__i915_request_is_complete(rq: signaler)) |
430 | continue; |
431 | |
432 | i915_request_show(m, rq: signaler, prefix, indent: indent + 2); |
433 | } |
434 | rcu_read_unlock(); |
435 | } |
436 | |
437 | static void default_destroy(struct kref *kref) |
438 | { |
439 | struct i915_sched_engine *sched_engine = |
440 | container_of(kref, typeof(*sched_engine), ref); |
441 | |
442 | tasklet_kill(t: &sched_engine->tasklet); /* flush the callback */ |
443 | kfree(objp: sched_engine); |
444 | } |
445 | |
446 | static bool default_disabled(struct i915_sched_engine *sched_engine) |
447 | { |
448 | return false; |
449 | } |
450 | |
451 | struct i915_sched_engine * |
452 | i915_sched_engine_create(unsigned int subclass) |
453 | { |
454 | struct i915_sched_engine *sched_engine; |
455 | |
456 | sched_engine = kzalloc(size: sizeof(*sched_engine), GFP_KERNEL); |
457 | if (!sched_engine) |
458 | return NULL; |
459 | |
460 | kref_init(kref: &sched_engine->ref); |
461 | |
462 | sched_engine->queue = RB_ROOT_CACHED; |
463 | sched_engine->queue_priority_hint = INT_MIN; |
464 | sched_engine->destroy = default_destroy; |
465 | sched_engine->disabled = default_disabled; |
466 | |
467 | INIT_LIST_HEAD(list: &sched_engine->requests); |
468 | INIT_LIST_HEAD(list: &sched_engine->hold); |
469 | |
470 | spin_lock_init(&sched_engine->lock); |
471 | lockdep_set_subclass(&sched_engine->lock, subclass); |
472 | |
473 | /* |
474 | * Due to an interesting quirk in lockdep's internal debug tracking, |
475 | * after setting a subclass we must ensure the lock is used. Otherwise, |
476 | * nr_unused_locks is incremented once too often. |
477 | */ |
478 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
479 | local_irq_disable(); |
480 | lock_map_acquire(&sched_engine->lock.dep_map); |
481 | lock_map_release(&sched_engine->lock.dep_map); |
482 | local_irq_enable(); |
483 | #endif |
484 | |
485 | return sched_engine; |
486 | } |
487 | |
488 | void i915_scheduler_module_exit(void) |
489 | { |
490 | kmem_cache_destroy(s: slab_dependencies); |
491 | kmem_cache_destroy(s: slab_priorities); |
492 | } |
493 | |
494 | int __init i915_scheduler_module_init(void) |
495 | { |
496 | slab_dependencies = KMEM_CACHE(i915_dependency, |
497 | SLAB_HWCACHE_ALIGN | |
498 | SLAB_TYPESAFE_BY_RCU); |
499 | if (!slab_dependencies) |
500 | return -ENOMEM; |
501 | |
502 | slab_priorities = KMEM_CACHE(i915_priolist, 0); |
503 | if (!slab_priorities) |
504 | goto err_priorities; |
505 | |
506 | return 0; |
507 | |
508 | err_priorities: |
509 | kmem_cache_destroy(s: slab_priorities); |
510 | return -ENOMEM; |
511 | } |
512 | |