1 | /* |
2 | * Copyright 2015 Advanced Micro Devices, Inc. |
3 | * |
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
5 | * copy of this software and associated documentation files (the "Software"), |
6 | * to deal in the Software without restriction, including without limitation |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
8 | * and/or sell copies of the Software, and to permit persons to whom the |
9 | * Software is furnished to do so, subject to the following conditions: |
10 | * |
11 | * The above copyright notice and this permission notice shall be included in |
12 | * all copies or substantial portions of the Software. |
13 | * |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
20 | * OTHER DEALINGS IN THE SOFTWARE. |
21 | * |
22 | */ |
23 | |
24 | /** |
25 | * DOC: Overview |
26 | * |
27 | * The GPU scheduler provides entities which allow userspace to push jobs |
28 | * into software queues which are then scheduled on a hardware run queue. |
29 | * The software queues have a priority among them. The scheduler selects the entities |
30 | * from the run queue using a FIFO. The scheduler provides dependency handling |
31 | * features among jobs. The driver is supposed to provide callback functions for |
32 | * backend operations to the scheduler like submitting a job to hardware run queue, |
33 | * returning the dependencies of a job etc. |
34 | * |
35 | * The organisation of the scheduler is the following: |
36 | * |
37 | * 1. Each hw run queue has one scheduler |
38 | * 2. Each scheduler has multiple run queues with different priorities |
39 | * (e.g., HIGH_HW,HIGH_SW, KERNEL, NORMAL) |
40 | * 3. Each scheduler run queue has a queue of entities to schedule |
41 | * 4. Entities themselves maintain a queue of jobs that will be scheduled on |
42 | * the hardware. |
43 | * |
44 | * The jobs in a entity are always scheduled in the order that they were pushed. |
45 | * |
46 | * Note that once a job was taken from the entities queue and pushed to the |
47 | * hardware, i.e. the pending queue, the entity must not be referenced anymore |
48 | * through the jobs entity pointer. |
49 | */ |
50 | |
51 | /** |
52 | * DOC: Flow Control |
53 | * |
54 | * The DRM GPU scheduler provides a flow control mechanism to regulate the rate |
55 | * in which the jobs fetched from scheduler entities are executed. |
56 | * |
57 | * In this context the &drm_gpu_scheduler keeps track of a driver specified |
58 | * credit limit representing the capacity of this scheduler and a credit count; |
59 | * every &drm_sched_job carries a driver specified number of credits. |
60 | * |
61 | * Once a job is executed (but not yet finished), the job's credits contribute |
62 | * to the scheduler's credit count until the job is finished. If by executing |
63 | * one more job the scheduler's credit count would exceed the scheduler's |
64 | * credit limit, the job won't be executed. Instead, the scheduler will wait |
65 | * until the credit count has decreased enough to not overflow its credit limit. |
66 | * This implies waiting for previously executed jobs. |
67 | * |
68 | * Optionally, drivers may register a callback (update_job_credits) provided by |
69 | * struct drm_sched_backend_ops to update the job's credits dynamically. The |
70 | * scheduler executes this callback every time the scheduler considers a job for |
71 | * execution and subsequently checks whether the job fits the scheduler's credit |
72 | * limit. |
73 | */ |
74 | |
75 | #include <linux/wait.h> |
76 | #include <linux/sched.h> |
77 | #include <linux/completion.h> |
78 | #include <linux/dma-resv.h> |
79 | #include <uapi/linux/sched/types.h> |
80 | |
81 | #include <drm/drm_print.h> |
82 | #include <drm/drm_gem.h> |
83 | #include <drm/drm_syncobj.h> |
84 | #include <drm/gpu_scheduler.h> |
85 | #include <drm/spsc_queue.h> |
86 | |
87 | #define CREATE_TRACE_POINTS |
88 | #include "gpu_scheduler_trace.h" |
89 | |
90 | #define to_drm_sched_job(sched_job) \ |
91 | container_of((sched_job), struct drm_sched_job, queue_node) |
92 | |
93 | int drm_sched_policy = DRM_SCHED_POLICY_FIFO; |
94 | |
95 | /** |
96 | * DOC: sched_policy (int) |
97 | * Used to override default entities scheduling policy in a run queue. |
98 | */ |
99 | MODULE_PARM_DESC(sched_policy, "Specify the scheduling policy for entities on a run-queue, " __stringify(DRM_SCHED_POLICY_RR) " = Round Robin, " __stringify(DRM_SCHED_POLICY_FIFO) " = FIFO (default)." ); |
100 | module_param_named(sched_policy, drm_sched_policy, int, 0444); |
101 | |
102 | static u32 drm_sched_available_credits(struct drm_gpu_scheduler *sched) |
103 | { |
104 | u32 credits; |
105 | |
106 | drm_WARN_ON(sched, check_sub_overflow(sched->credit_limit, |
107 | atomic_read(&sched->credit_count), |
108 | &credits)); |
109 | |
110 | return credits; |
111 | } |
112 | |
113 | /** |
114 | * drm_sched_can_queue -- Can we queue more to the hardware? |
115 | * @sched: scheduler instance |
116 | * @entity: the scheduler entity |
117 | * |
118 | * Return true if we can push at least one more job from @entity, false |
119 | * otherwise. |
120 | */ |
121 | static bool drm_sched_can_queue(struct drm_gpu_scheduler *sched, |
122 | struct drm_sched_entity *entity) |
123 | { |
124 | struct drm_sched_job *s_job; |
125 | |
126 | s_job = to_drm_sched_job(spsc_queue_peek(&entity->job_queue)); |
127 | if (!s_job) |
128 | return false; |
129 | |
130 | if (sched->ops->update_job_credits) { |
131 | s_job->credits = sched->ops->update_job_credits(s_job); |
132 | |
133 | drm_WARN(sched, !s_job->credits, |
134 | "Jobs with zero credits bypass job-flow control.\n" ); |
135 | } |
136 | |
137 | /* If a job exceeds the credit limit, truncate it to the credit limit |
138 | * itself to guarantee forward progress. |
139 | */ |
140 | if (drm_WARN(sched, s_job->credits > sched->credit_limit, |
141 | "Jobs may not exceed the credit limit, truncate.\n" )) |
142 | s_job->credits = sched->credit_limit; |
143 | |
144 | return drm_sched_available_credits(sched) >= s_job->credits; |
145 | } |
146 | |
147 | static __always_inline bool drm_sched_entity_compare_before(struct rb_node *a, |
148 | const struct rb_node *b) |
149 | { |
150 | struct drm_sched_entity *ent_a = rb_entry((a), struct drm_sched_entity, rb_tree_node); |
151 | struct drm_sched_entity *ent_b = rb_entry((b), struct drm_sched_entity, rb_tree_node); |
152 | |
153 | return ktime_before(cmp1: ent_a->oldest_job_waiting, cmp2: ent_b->oldest_job_waiting); |
154 | } |
155 | |
156 | static inline void drm_sched_rq_remove_fifo_locked(struct drm_sched_entity *entity) |
157 | { |
158 | struct drm_sched_rq *rq = entity->rq; |
159 | |
160 | if (!RB_EMPTY_NODE(&entity->rb_tree_node)) { |
161 | rb_erase_cached(node: &entity->rb_tree_node, root: &rq->rb_tree_root); |
162 | RB_CLEAR_NODE(&entity->rb_tree_node); |
163 | } |
164 | } |
165 | |
166 | void drm_sched_rq_update_fifo(struct drm_sched_entity *entity, ktime_t ts) |
167 | { |
168 | /* |
169 | * Both locks need to be grabbed, one to protect from entity->rq change |
170 | * for entity from within concurrent drm_sched_entity_select_rq and the |
171 | * other to update the rb tree structure. |
172 | */ |
173 | spin_lock(lock: &entity->rq_lock); |
174 | spin_lock(lock: &entity->rq->lock); |
175 | |
176 | drm_sched_rq_remove_fifo_locked(entity); |
177 | |
178 | entity->oldest_job_waiting = ts; |
179 | |
180 | rb_add_cached(node: &entity->rb_tree_node, tree: &entity->rq->rb_tree_root, |
181 | less: drm_sched_entity_compare_before); |
182 | |
183 | spin_unlock(lock: &entity->rq->lock); |
184 | spin_unlock(lock: &entity->rq_lock); |
185 | } |
186 | |
187 | /** |
188 | * drm_sched_rq_init - initialize a given run queue struct |
189 | * |
190 | * @sched: scheduler instance to associate with this run queue |
191 | * @rq: scheduler run queue |
192 | * |
193 | * Initializes a scheduler runqueue. |
194 | */ |
195 | static void drm_sched_rq_init(struct drm_gpu_scheduler *sched, |
196 | struct drm_sched_rq *rq) |
197 | { |
198 | spin_lock_init(&rq->lock); |
199 | INIT_LIST_HEAD(list: &rq->entities); |
200 | rq->rb_tree_root = RB_ROOT_CACHED; |
201 | rq->current_entity = NULL; |
202 | rq->sched = sched; |
203 | } |
204 | |
205 | /** |
206 | * drm_sched_rq_add_entity - add an entity |
207 | * |
208 | * @rq: scheduler run queue |
209 | * @entity: scheduler entity |
210 | * |
211 | * Adds a scheduler entity to the run queue. |
212 | */ |
213 | void drm_sched_rq_add_entity(struct drm_sched_rq *rq, |
214 | struct drm_sched_entity *entity) |
215 | { |
216 | if (!list_empty(head: &entity->list)) |
217 | return; |
218 | |
219 | spin_lock(lock: &rq->lock); |
220 | |
221 | atomic_inc(v: rq->sched->score); |
222 | list_add_tail(new: &entity->list, head: &rq->entities); |
223 | |
224 | spin_unlock(lock: &rq->lock); |
225 | } |
226 | |
227 | /** |
228 | * drm_sched_rq_remove_entity - remove an entity |
229 | * |
230 | * @rq: scheduler run queue |
231 | * @entity: scheduler entity |
232 | * |
233 | * Removes a scheduler entity from the run queue. |
234 | */ |
235 | void drm_sched_rq_remove_entity(struct drm_sched_rq *rq, |
236 | struct drm_sched_entity *entity) |
237 | { |
238 | if (list_empty(head: &entity->list)) |
239 | return; |
240 | |
241 | spin_lock(lock: &rq->lock); |
242 | |
243 | atomic_dec(v: rq->sched->score); |
244 | list_del_init(entry: &entity->list); |
245 | |
246 | if (rq->current_entity == entity) |
247 | rq->current_entity = NULL; |
248 | |
249 | if (drm_sched_policy == DRM_SCHED_POLICY_FIFO) |
250 | drm_sched_rq_remove_fifo_locked(entity); |
251 | |
252 | spin_unlock(lock: &rq->lock); |
253 | } |
254 | |
255 | /** |
256 | * drm_sched_rq_select_entity_rr - Select an entity which could provide a job to run |
257 | * |
258 | * @sched: the gpu scheduler |
259 | * @rq: scheduler run queue to check. |
260 | * |
261 | * Try to find the next ready entity. |
262 | * |
263 | * Return an entity if one is found; return an error-pointer (!NULL) if an |
264 | * entity was ready, but the scheduler had insufficient credits to accommodate |
265 | * its job; return NULL, if no ready entity was found. |
266 | */ |
267 | static struct drm_sched_entity * |
268 | drm_sched_rq_select_entity_rr(struct drm_gpu_scheduler *sched, |
269 | struct drm_sched_rq *rq) |
270 | { |
271 | struct drm_sched_entity *entity; |
272 | |
273 | spin_lock(lock: &rq->lock); |
274 | |
275 | entity = rq->current_entity; |
276 | if (entity) { |
277 | list_for_each_entry_continue(entity, &rq->entities, list) { |
278 | if (drm_sched_entity_is_ready(entity)) { |
279 | /* If we can't queue yet, preserve the current |
280 | * entity in terms of fairness. |
281 | */ |
282 | if (!drm_sched_can_queue(sched, entity)) { |
283 | spin_unlock(lock: &rq->lock); |
284 | return ERR_PTR(error: -ENOSPC); |
285 | } |
286 | |
287 | rq->current_entity = entity; |
288 | reinit_completion(x: &entity->entity_idle); |
289 | spin_unlock(lock: &rq->lock); |
290 | return entity; |
291 | } |
292 | } |
293 | } |
294 | |
295 | list_for_each_entry(entity, &rq->entities, list) { |
296 | if (drm_sched_entity_is_ready(entity)) { |
297 | /* If we can't queue yet, preserve the current entity in |
298 | * terms of fairness. |
299 | */ |
300 | if (!drm_sched_can_queue(sched, entity)) { |
301 | spin_unlock(lock: &rq->lock); |
302 | return ERR_PTR(error: -ENOSPC); |
303 | } |
304 | |
305 | rq->current_entity = entity; |
306 | reinit_completion(x: &entity->entity_idle); |
307 | spin_unlock(lock: &rq->lock); |
308 | return entity; |
309 | } |
310 | |
311 | if (entity == rq->current_entity) |
312 | break; |
313 | } |
314 | |
315 | spin_unlock(lock: &rq->lock); |
316 | |
317 | return NULL; |
318 | } |
319 | |
320 | /** |
321 | * drm_sched_rq_select_entity_fifo - Select an entity which provides a job to run |
322 | * |
323 | * @sched: the gpu scheduler |
324 | * @rq: scheduler run queue to check. |
325 | * |
326 | * Find oldest waiting ready entity. |
327 | * |
328 | * Return an entity if one is found; return an error-pointer (!NULL) if an |
329 | * entity was ready, but the scheduler had insufficient credits to accommodate |
330 | * its job; return NULL, if no ready entity was found. |
331 | */ |
332 | static struct drm_sched_entity * |
333 | drm_sched_rq_select_entity_fifo(struct drm_gpu_scheduler *sched, |
334 | struct drm_sched_rq *rq) |
335 | { |
336 | struct rb_node *rb; |
337 | |
338 | spin_lock(lock: &rq->lock); |
339 | for (rb = rb_first_cached(&rq->rb_tree_root); rb; rb = rb_next(rb)) { |
340 | struct drm_sched_entity *entity; |
341 | |
342 | entity = rb_entry(rb, struct drm_sched_entity, rb_tree_node); |
343 | if (drm_sched_entity_is_ready(entity)) { |
344 | /* If we can't queue yet, preserve the current entity in |
345 | * terms of fairness. |
346 | */ |
347 | if (!drm_sched_can_queue(sched, entity)) { |
348 | spin_unlock(lock: &rq->lock); |
349 | return ERR_PTR(error: -ENOSPC); |
350 | } |
351 | |
352 | rq->current_entity = entity; |
353 | reinit_completion(x: &entity->entity_idle); |
354 | break; |
355 | } |
356 | } |
357 | spin_unlock(lock: &rq->lock); |
358 | |
359 | return rb ? rb_entry(rb, struct drm_sched_entity, rb_tree_node) : NULL; |
360 | } |
361 | |
362 | /** |
363 | * drm_sched_run_job_queue - enqueue run-job work |
364 | * @sched: scheduler instance |
365 | */ |
366 | static void drm_sched_run_job_queue(struct drm_gpu_scheduler *sched) |
367 | { |
368 | if (!READ_ONCE(sched->pause_submit)) |
369 | queue_work(wq: sched->submit_wq, work: &sched->work_run_job); |
370 | } |
371 | |
372 | /** |
373 | * __drm_sched_run_free_queue - enqueue free-job work |
374 | * @sched: scheduler instance |
375 | */ |
376 | static void __drm_sched_run_free_queue(struct drm_gpu_scheduler *sched) |
377 | { |
378 | if (!READ_ONCE(sched->pause_submit)) |
379 | queue_work(wq: sched->submit_wq, work: &sched->work_free_job); |
380 | } |
381 | |
382 | /** |
383 | * drm_sched_run_free_queue - enqueue free-job work if ready |
384 | * @sched: scheduler instance |
385 | */ |
386 | static void drm_sched_run_free_queue(struct drm_gpu_scheduler *sched) |
387 | { |
388 | struct drm_sched_job *job; |
389 | |
390 | spin_lock(lock: &sched->job_list_lock); |
391 | job = list_first_entry_or_null(&sched->pending_list, |
392 | struct drm_sched_job, list); |
393 | if (job && dma_fence_is_signaled(fence: &job->s_fence->finished)) |
394 | __drm_sched_run_free_queue(sched); |
395 | spin_unlock(lock: &sched->job_list_lock); |
396 | } |
397 | |
398 | /** |
399 | * drm_sched_job_done - complete a job |
400 | * @s_job: pointer to the job which is done |
401 | * |
402 | * Finish the job's fence and wake up the worker thread. |
403 | */ |
404 | static void drm_sched_job_done(struct drm_sched_job *s_job, int result) |
405 | { |
406 | struct drm_sched_fence *s_fence = s_job->s_fence; |
407 | struct drm_gpu_scheduler *sched = s_fence->sched; |
408 | |
409 | atomic_sub(i: s_job->credits, v: &sched->credit_count); |
410 | atomic_dec(v: sched->score); |
411 | |
412 | trace_drm_sched_process_job(fence: s_fence); |
413 | |
414 | dma_fence_get(fence: &s_fence->finished); |
415 | drm_sched_fence_finished(fence: s_fence, result); |
416 | dma_fence_put(fence: &s_fence->finished); |
417 | __drm_sched_run_free_queue(sched); |
418 | } |
419 | |
420 | /** |
421 | * drm_sched_job_done_cb - the callback for a done job |
422 | * @f: fence |
423 | * @cb: fence callbacks |
424 | */ |
425 | static void drm_sched_job_done_cb(struct dma_fence *f, struct dma_fence_cb *cb) |
426 | { |
427 | struct drm_sched_job *s_job = container_of(cb, struct drm_sched_job, cb); |
428 | |
429 | drm_sched_job_done(s_job, result: f->error); |
430 | } |
431 | |
432 | /** |
433 | * drm_sched_start_timeout - start timeout for reset worker |
434 | * |
435 | * @sched: scheduler instance to start the worker for |
436 | * |
437 | * Start the timeout for the given scheduler. |
438 | */ |
439 | static void drm_sched_start_timeout(struct drm_gpu_scheduler *sched) |
440 | { |
441 | lockdep_assert_held(&sched->job_list_lock); |
442 | |
443 | if (sched->timeout != MAX_SCHEDULE_TIMEOUT && |
444 | !list_empty(head: &sched->pending_list)) |
445 | mod_delayed_work(wq: sched->timeout_wq, dwork: &sched->work_tdr, delay: sched->timeout); |
446 | } |
447 | |
448 | static void drm_sched_start_timeout_unlocked(struct drm_gpu_scheduler *sched) |
449 | { |
450 | spin_lock(lock: &sched->job_list_lock); |
451 | drm_sched_start_timeout(sched); |
452 | spin_unlock(lock: &sched->job_list_lock); |
453 | } |
454 | |
455 | /** |
456 | * drm_sched_tdr_queue_imm: - immediately start job timeout handler |
457 | * |
458 | * @sched: scheduler for which the timeout handling should be started. |
459 | * |
460 | * Start timeout handling immediately for the named scheduler. |
461 | */ |
462 | void drm_sched_tdr_queue_imm(struct drm_gpu_scheduler *sched) |
463 | { |
464 | spin_lock(lock: &sched->job_list_lock); |
465 | sched->timeout = 0; |
466 | drm_sched_start_timeout(sched); |
467 | spin_unlock(lock: &sched->job_list_lock); |
468 | } |
469 | EXPORT_SYMBOL(drm_sched_tdr_queue_imm); |
470 | |
471 | /** |
472 | * drm_sched_fault - immediately start timeout handler |
473 | * |
474 | * @sched: scheduler where the timeout handling should be started. |
475 | * |
476 | * Start timeout handling immediately when the driver detects a hardware fault. |
477 | */ |
478 | void drm_sched_fault(struct drm_gpu_scheduler *sched) |
479 | { |
480 | if (sched->timeout_wq) |
481 | mod_delayed_work(wq: sched->timeout_wq, dwork: &sched->work_tdr, delay: 0); |
482 | } |
483 | EXPORT_SYMBOL(drm_sched_fault); |
484 | |
485 | /** |
486 | * drm_sched_suspend_timeout - Suspend scheduler job timeout |
487 | * |
488 | * @sched: scheduler instance for which to suspend the timeout |
489 | * |
490 | * Suspend the delayed work timeout for the scheduler. This is done by |
491 | * modifying the delayed work timeout to an arbitrary large value, |
492 | * MAX_SCHEDULE_TIMEOUT in this case. |
493 | * |
494 | * Returns the timeout remaining |
495 | * |
496 | */ |
497 | unsigned long drm_sched_suspend_timeout(struct drm_gpu_scheduler *sched) |
498 | { |
499 | unsigned long sched_timeout, now = jiffies; |
500 | |
501 | sched_timeout = sched->work_tdr.timer.expires; |
502 | |
503 | /* |
504 | * Modify the timeout to an arbitrarily large value. This also prevents |
505 | * the timeout to be restarted when new submissions arrive |
506 | */ |
507 | if (mod_delayed_work(wq: sched->timeout_wq, dwork: &sched->work_tdr, MAX_SCHEDULE_TIMEOUT) |
508 | && time_after(sched_timeout, now)) |
509 | return sched_timeout - now; |
510 | else |
511 | return sched->timeout; |
512 | } |
513 | EXPORT_SYMBOL(drm_sched_suspend_timeout); |
514 | |
515 | /** |
516 | * drm_sched_resume_timeout - Resume scheduler job timeout |
517 | * |
518 | * @sched: scheduler instance for which to resume the timeout |
519 | * @remaining: remaining timeout |
520 | * |
521 | * Resume the delayed work timeout for the scheduler. |
522 | */ |
523 | void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched, |
524 | unsigned long remaining) |
525 | { |
526 | spin_lock(lock: &sched->job_list_lock); |
527 | |
528 | if (list_empty(head: &sched->pending_list)) |
529 | cancel_delayed_work(dwork: &sched->work_tdr); |
530 | else |
531 | mod_delayed_work(wq: sched->timeout_wq, dwork: &sched->work_tdr, delay: remaining); |
532 | |
533 | spin_unlock(lock: &sched->job_list_lock); |
534 | } |
535 | EXPORT_SYMBOL(drm_sched_resume_timeout); |
536 | |
537 | static void drm_sched_job_begin(struct drm_sched_job *s_job) |
538 | { |
539 | struct drm_gpu_scheduler *sched = s_job->sched; |
540 | |
541 | spin_lock(lock: &sched->job_list_lock); |
542 | list_add_tail(new: &s_job->list, head: &sched->pending_list); |
543 | drm_sched_start_timeout(sched); |
544 | spin_unlock(lock: &sched->job_list_lock); |
545 | } |
546 | |
547 | static void drm_sched_job_timedout(struct work_struct *work) |
548 | { |
549 | struct drm_gpu_scheduler *sched; |
550 | struct drm_sched_job *job; |
551 | enum drm_gpu_sched_stat status = DRM_GPU_SCHED_STAT_NOMINAL; |
552 | |
553 | sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work); |
554 | |
555 | /* Protects against concurrent deletion in drm_sched_get_finished_job */ |
556 | spin_lock(lock: &sched->job_list_lock); |
557 | job = list_first_entry_or_null(&sched->pending_list, |
558 | struct drm_sched_job, list); |
559 | |
560 | if (job) { |
561 | /* |
562 | * Remove the bad job so it cannot be freed by concurrent |
563 | * drm_sched_cleanup_jobs. It will be reinserted back after sched->thread |
564 | * is parked at which point it's safe. |
565 | */ |
566 | list_del_init(entry: &job->list); |
567 | spin_unlock(lock: &sched->job_list_lock); |
568 | |
569 | status = job->sched->ops->timedout_job(job); |
570 | |
571 | /* |
572 | * Guilty job did complete and hence needs to be manually removed |
573 | * See drm_sched_stop doc. |
574 | */ |
575 | if (sched->free_guilty) { |
576 | job->sched->ops->free_job(job); |
577 | sched->free_guilty = false; |
578 | } |
579 | } else { |
580 | spin_unlock(lock: &sched->job_list_lock); |
581 | } |
582 | |
583 | if (status != DRM_GPU_SCHED_STAT_ENODEV) |
584 | drm_sched_start_timeout_unlocked(sched); |
585 | } |
586 | |
587 | /** |
588 | * drm_sched_stop - stop the scheduler |
589 | * |
590 | * @sched: scheduler instance |
591 | * @bad: job which caused the time out |
592 | * |
593 | * Stop the scheduler and also removes and frees all completed jobs. |
594 | * Note: bad job will not be freed as it might be used later and so it's |
595 | * callers responsibility to release it manually if it's not part of the |
596 | * pending list any more. |
597 | * |
598 | */ |
599 | void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad) |
600 | { |
601 | struct drm_sched_job *s_job, *tmp; |
602 | |
603 | drm_sched_wqueue_stop(sched); |
604 | |
605 | /* |
606 | * Reinsert back the bad job here - now it's safe as |
607 | * drm_sched_get_finished_job cannot race against us and release the |
608 | * bad job at this point - we parked (waited for) any in progress |
609 | * (earlier) cleanups and drm_sched_get_finished_job will not be called |
610 | * now until the scheduler thread is unparked. |
611 | */ |
612 | if (bad && bad->sched == sched) |
613 | /* |
614 | * Add at the head of the queue to reflect it was the earliest |
615 | * job extracted. |
616 | */ |
617 | list_add(new: &bad->list, head: &sched->pending_list); |
618 | |
619 | /* |
620 | * Iterate the job list from later to earlier one and either deactive |
621 | * their HW callbacks or remove them from pending list if they already |
622 | * signaled. |
623 | * This iteration is thread safe as sched thread is stopped. |
624 | */ |
625 | list_for_each_entry_safe_reverse(s_job, tmp, &sched->pending_list, |
626 | list) { |
627 | if (s_job->s_fence->parent && |
628 | dma_fence_remove_callback(fence: s_job->s_fence->parent, |
629 | cb: &s_job->cb)) { |
630 | dma_fence_put(fence: s_job->s_fence->parent); |
631 | s_job->s_fence->parent = NULL; |
632 | atomic_sub(i: s_job->credits, v: &sched->credit_count); |
633 | } else { |
634 | /* |
635 | * remove job from pending_list. |
636 | * Locking here is for concurrent resume timeout |
637 | */ |
638 | spin_lock(lock: &sched->job_list_lock); |
639 | list_del_init(entry: &s_job->list); |
640 | spin_unlock(lock: &sched->job_list_lock); |
641 | |
642 | /* |
643 | * Wait for job's HW fence callback to finish using s_job |
644 | * before releasing it. |
645 | * |
646 | * Job is still alive so fence refcount at least 1 |
647 | */ |
648 | dma_fence_wait(fence: &s_job->s_fence->finished, intr: false); |
649 | |
650 | /* |
651 | * We must keep bad job alive for later use during |
652 | * recovery by some of the drivers but leave a hint |
653 | * that the guilty job must be released. |
654 | */ |
655 | if (bad != s_job) |
656 | sched->ops->free_job(s_job); |
657 | else |
658 | sched->free_guilty = true; |
659 | } |
660 | } |
661 | |
662 | /* |
663 | * Stop pending timer in flight as we rearm it in drm_sched_start. This |
664 | * avoids the pending timeout work in progress to fire right away after |
665 | * this TDR finished and before the newly restarted jobs had a |
666 | * chance to complete. |
667 | */ |
668 | cancel_delayed_work(dwork: &sched->work_tdr); |
669 | } |
670 | |
671 | EXPORT_SYMBOL(drm_sched_stop); |
672 | |
673 | /** |
674 | * drm_sched_start - recover jobs after a reset |
675 | * |
676 | * @sched: scheduler instance |
677 | * @full_recovery: proceed with complete sched restart |
678 | * |
679 | */ |
680 | void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery) |
681 | { |
682 | struct drm_sched_job *s_job, *tmp; |
683 | int r; |
684 | |
685 | /* |
686 | * Locking the list is not required here as the sched thread is parked |
687 | * so no new jobs are being inserted or removed. Also concurrent |
688 | * GPU recovers can't run in parallel. |
689 | */ |
690 | list_for_each_entry_safe(s_job, tmp, &sched->pending_list, list) { |
691 | struct dma_fence *fence = s_job->s_fence->parent; |
692 | |
693 | atomic_add(i: s_job->credits, v: &sched->credit_count); |
694 | |
695 | if (!full_recovery) |
696 | continue; |
697 | |
698 | if (fence) { |
699 | r = dma_fence_add_callback(fence, cb: &s_job->cb, |
700 | func: drm_sched_job_done_cb); |
701 | if (r == -ENOENT) |
702 | drm_sched_job_done(s_job, result: fence->error); |
703 | else if (r) |
704 | DRM_DEV_ERROR(sched->dev, "fence add callback failed (%d)\n" , |
705 | r); |
706 | } else |
707 | drm_sched_job_done(s_job, result: -ECANCELED); |
708 | } |
709 | |
710 | if (full_recovery) |
711 | drm_sched_start_timeout_unlocked(sched); |
712 | |
713 | drm_sched_wqueue_start(sched); |
714 | } |
715 | EXPORT_SYMBOL(drm_sched_start); |
716 | |
717 | /** |
718 | * drm_sched_resubmit_jobs - Deprecated, don't use in new code! |
719 | * |
720 | * @sched: scheduler instance |
721 | * |
722 | * Re-submitting jobs was a concept AMD came up as cheap way to implement |
723 | * recovery after a job timeout. |
724 | * |
725 | * This turned out to be not working very well. First of all there are many |
726 | * problem with the dma_fence implementation and requirements. Either the |
727 | * implementation is risking deadlocks with core memory management or violating |
728 | * documented implementation details of the dma_fence object. |
729 | * |
730 | * Drivers can still save and restore their state for recovery operations, but |
731 | * we shouldn't make this a general scheduler feature around the dma_fence |
732 | * interface. |
733 | */ |
734 | void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched) |
735 | { |
736 | struct drm_sched_job *s_job, *tmp; |
737 | uint64_t guilty_context; |
738 | bool found_guilty = false; |
739 | struct dma_fence *fence; |
740 | |
741 | list_for_each_entry_safe(s_job, tmp, &sched->pending_list, list) { |
742 | struct drm_sched_fence *s_fence = s_job->s_fence; |
743 | |
744 | if (!found_guilty && atomic_read(v: &s_job->karma) > sched->hang_limit) { |
745 | found_guilty = true; |
746 | guilty_context = s_job->s_fence->scheduled.context; |
747 | } |
748 | |
749 | if (found_guilty && s_job->s_fence->scheduled.context == guilty_context) |
750 | dma_fence_set_error(fence: &s_fence->finished, error: -ECANCELED); |
751 | |
752 | fence = sched->ops->run_job(s_job); |
753 | |
754 | if (IS_ERR_OR_NULL(ptr: fence)) { |
755 | if (IS_ERR(ptr: fence)) |
756 | dma_fence_set_error(fence: &s_fence->finished, error: PTR_ERR(ptr: fence)); |
757 | |
758 | s_job->s_fence->parent = NULL; |
759 | } else { |
760 | |
761 | s_job->s_fence->parent = dma_fence_get(fence); |
762 | |
763 | /* Drop for orignal kref_init */ |
764 | dma_fence_put(fence); |
765 | } |
766 | } |
767 | } |
768 | EXPORT_SYMBOL(drm_sched_resubmit_jobs); |
769 | |
770 | /** |
771 | * drm_sched_job_init - init a scheduler job |
772 | * @job: scheduler job to init |
773 | * @entity: scheduler entity to use |
774 | * @credits: the number of credits this job contributes to the schedulers |
775 | * credit limit |
776 | * @owner: job owner for debugging |
777 | * |
778 | * Refer to drm_sched_entity_push_job() documentation |
779 | * for locking considerations. |
780 | * |
781 | * Drivers must make sure drm_sched_job_cleanup() if this function returns |
782 | * successfully, even when @job is aborted before drm_sched_job_arm() is called. |
783 | * |
784 | * WARNING: amdgpu abuses &drm_sched.ready to signal when the hardware |
785 | * has died, which can mean that there's no valid runqueue for a @entity. |
786 | * This function returns -ENOENT in this case (which probably should be -EIO as |
787 | * a more meanigful return value). |
788 | * |
789 | * Returns 0 for success, negative error code otherwise. |
790 | */ |
791 | int drm_sched_job_init(struct drm_sched_job *job, |
792 | struct drm_sched_entity *entity, |
793 | u32 credits, void *owner) |
794 | { |
795 | if (!entity->rq) { |
796 | /* This will most likely be followed by missing frames |
797 | * or worse--a blank screen--leave a trail in the |
798 | * logs, so this can be debugged easier. |
799 | */ |
800 | drm_err(job->sched, "%s: entity has no rq!\n" , __func__); |
801 | return -ENOENT; |
802 | } |
803 | |
804 | if (unlikely(!credits)) { |
805 | pr_err("*ERROR* %s: credits cannot be 0!\n" , __func__); |
806 | return -EINVAL; |
807 | } |
808 | |
809 | job->entity = entity; |
810 | job->credits = credits; |
811 | job->s_fence = drm_sched_fence_alloc(s_entity: entity, owner); |
812 | if (!job->s_fence) |
813 | return -ENOMEM; |
814 | |
815 | INIT_LIST_HEAD(list: &job->list); |
816 | |
817 | xa_init_flags(xa: &job->dependencies, XA_FLAGS_ALLOC); |
818 | |
819 | return 0; |
820 | } |
821 | EXPORT_SYMBOL(drm_sched_job_init); |
822 | |
823 | /** |
824 | * drm_sched_job_arm - arm a scheduler job for execution |
825 | * @job: scheduler job to arm |
826 | * |
827 | * This arms a scheduler job for execution. Specifically it initializes the |
828 | * &drm_sched_job.s_fence of @job, so that it can be attached to struct dma_resv |
829 | * or other places that need to track the completion of this job. |
830 | * |
831 | * Refer to drm_sched_entity_push_job() documentation for locking |
832 | * considerations. |
833 | * |
834 | * This can only be called if drm_sched_job_init() succeeded. |
835 | */ |
836 | void drm_sched_job_arm(struct drm_sched_job *job) |
837 | { |
838 | struct drm_gpu_scheduler *sched; |
839 | struct drm_sched_entity *entity = job->entity; |
840 | |
841 | BUG_ON(!entity); |
842 | drm_sched_entity_select_rq(entity); |
843 | sched = entity->rq->sched; |
844 | |
845 | job->sched = sched; |
846 | job->s_priority = entity->priority; |
847 | job->id = atomic64_inc_return(v: &sched->job_id_count); |
848 | |
849 | drm_sched_fence_init(fence: job->s_fence, entity: job->entity); |
850 | } |
851 | EXPORT_SYMBOL(drm_sched_job_arm); |
852 | |
853 | /** |
854 | * drm_sched_job_add_dependency - adds the fence as a job dependency |
855 | * @job: scheduler job to add the dependencies to |
856 | * @fence: the dma_fence to add to the list of dependencies. |
857 | * |
858 | * Note that @fence is consumed in both the success and error cases. |
859 | * |
860 | * Returns: |
861 | * 0 on success, or an error on failing to expand the array. |
862 | */ |
863 | int drm_sched_job_add_dependency(struct drm_sched_job *job, |
864 | struct dma_fence *fence) |
865 | { |
866 | struct dma_fence *entry; |
867 | unsigned long index; |
868 | u32 id = 0; |
869 | int ret; |
870 | |
871 | if (!fence) |
872 | return 0; |
873 | |
874 | /* Deduplicate if we already depend on a fence from the same context. |
875 | * This lets the size of the array of deps scale with the number of |
876 | * engines involved, rather than the number of BOs. |
877 | */ |
878 | xa_for_each(&job->dependencies, index, entry) { |
879 | if (entry->context != fence->context) |
880 | continue; |
881 | |
882 | if (dma_fence_is_later(f1: fence, f2: entry)) { |
883 | dma_fence_put(fence: entry); |
884 | xa_store(&job->dependencies, index, entry: fence, GFP_KERNEL); |
885 | } else { |
886 | dma_fence_put(fence); |
887 | } |
888 | return 0; |
889 | } |
890 | |
891 | ret = xa_alloc(xa: &job->dependencies, id: &id, entry: fence, xa_limit_32b, GFP_KERNEL); |
892 | if (ret != 0) |
893 | dma_fence_put(fence); |
894 | |
895 | return ret; |
896 | } |
897 | EXPORT_SYMBOL(drm_sched_job_add_dependency); |
898 | |
899 | /** |
900 | * drm_sched_job_add_syncobj_dependency - adds a syncobj's fence as a job dependency |
901 | * @job: scheduler job to add the dependencies to |
902 | * @file: drm file private pointer |
903 | * @handle: syncobj handle to lookup |
904 | * @point: timeline point |
905 | * |
906 | * This adds the fence matching the given syncobj to @job. |
907 | * |
908 | * Returns: |
909 | * 0 on success, or an error on failing to expand the array. |
910 | */ |
911 | int drm_sched_job_add_syncobj_dependency(struct drm_sched_job *job, |
912 | struct drm_file *file, |
913 | u32 handle, |
914 | u32 point) |
915 | { |
916 | struct dma_fence *fence; |
917 | int ret; |
918 | |
919 | ret = drm_syncobj_find_fence(file_private: file, handle, point, flags: 0, fence: &fence); |
920 | if (ret) |
921 | return ret; |
922 | |
923 | return drm_sched_job_add_dependency(job, fence); |
924 | } |
925 | EXPORT_SYMBOL(drm_sched_job_add_syncobj_dependency); |
926 | |
927 | /** |
928 | * drm_sched_job_add_resv_dependencies - add all fences from the resv to the job |
929 | * @job: scheduler job to add the dependencies to |
930 | * @resv: the dma_resv object to get the fences from |
931 | * @usage: the dma_resv_usage to use to filter the fences |
932 | * |
933 | * This adds all fences matching the given usage from @resv to @job. |
934 | * Must be called with the @resv lock held. |
935 | * |
936 | * Returns: |
937 | * 0 on success, or an error on failing to expand the array. |
938 | */ |
939 | int drm_sched_job_add_resv_dependencies(struct drm_sched_job *job, |
940 | struct dma_resv *resv, |
941 | enum dma_resv_usage usage) |
942 | { |
943 | struct dma_resv_iter cursor; |
944 | struct dma_fence *fence; |
945 | int ret; |
946 | |
947 | dma_resv_assert_held(resv); |
948 | |
949 | dma_resv_for_each_fence(&cursor, resv, usage, fence) { |
950 | /* Make sure to grab an additional ref on the added fence */ |
951 | dma_fence_get(fence); |
952 | ret = drm_sched_job_add_dependency(job, fence); |
953 | if (ret) { |
954 | dma_fence_put(fence); |
955 | return ret; |
956 | } |
957 | } |
958 | return 0; |
959 | } |
960 | EXPORT_SYMBOL(drm_sched_job_add_resv_dependencies); |
961 | |
962 | /** |
963 | * drm_sched_job_add_implicit_dependencies - adds implicit dependencies as job |
964 | * dependencies |
965 | * @job: scheduler job to add the dependencies to |
966 | * @obj: the gem object to add new dependencies from. |
967 | * @write: whether the job might write the object (so we need to depend on |
968 | * shared fences in the reservation object). |
969 | * |
970 | * This should be called after drm_gem_lock_reservations() on your array of |
971 | * GEM objects used in the job but before updating the reservations with your |
972 | * own fences. |
973 | * |
974 | * Returns: |
975 | * 0 on success, or an error on failing to expand the array. |
976 | */ |
977 | int drm_sched_job_add_implicit_dependencies(struct drm_sched_job *job, |
978 | struct drm_gem_object *obj, |
979 | bool write) |
980 | { |
981 | return drm_sched_job_add_resv_dependencies(job, obj->resv, |
982 | dma_resv_usage_rw(write)); |
983 | } |
984 | EXPORT_SYMBOL(drm_sched_job_add_implicit_dependencies); |
985 | |
986 | /** |
987 | * drm_sched_job_cleanup - clean up scheduler job resources |
988 | * @job: scheduler job to clean up |
989 | * |
990 | * Cleans up the resources allocated with drm_sched_job_init(). |
991 | * |
992 | * Drivers should call this from their error unwind code if @job is aborted |
993 | * before drm_sched_job_arm() is called. |
994 | * |
995 | * After that point of no return @job is committed to be executed by the |
996 | * scheduler, and this function should be called from the |
997 | * &drm_sched_backend_ops.free_job callback. |
998 | */ |
999 | void drm_sched_job_cleanup(struct drm_sched_job *job) |
1000 | { |
1001 | struct dma_fence *fence; |
1002 | unsigned long index; |
1003 | |
1004 | if (kref_read(kref: &job->s_fence->finished.refcount)) { |
1005 | /* drm_sched_job_arm() has been called */ |
1006 | dma_fence_put(fence: &job->s_fence->finished); |
1007 | } else { |
1008 | /* aborted job before committing to run it */ |
1009 | drm_sched_fence_free(fence: job->s_fence); |
1010 | } |
1011 | |
1012 | job->s_fence = NULL; |
1013 | |
1014 | xa_for_each(&job->dependencies, index, fence) { |
1015 | dma_fence_put(fence); |
1016 | } |
1017 | xa_destroy(&job->dependencies); |
1018 | |
1019 | } |
1020 | EXPORT_SYMBOL(drm_sched_job_cleanup); |
1021 | |
1022 | /** |
1023 | * drm_sched_wakeup - Wake up the scheduler if it is ready to queue |
1024 | * @sched: scheduler instance |
1025 | * @entity: the scheduler entity |
1026 | * |
1027 | * Wake up the scheduler if we can queue jobs. |
1028 | */ |
1029 | void drm_sched_wakeup(struct drm_gpu_scheduler *sched, |
1030 | struct drm_sched_entity *entity) |
1031 | { |
1032 | if (drm_sched_can_queue(sched, entity)) |
1033 | drm_sched_run_job_queue(sched); |
1034 | } |
1035 | |
1036 | /** |
1037 | * drm_sched_select_entity - Select next entity to process |
1038 | * |
1039 | * @sched: scheduler instance |
1040 | * |
1041 | * Return an entity to process or NULL if none are found. |
1042 | * |
1043 | * Note, that we break out of the for-loop when "entity" is non-null, which can |
1044 | * also be an error-pointer--this assures we don't process lower priority |
1045 | * run-queues. See comments in the respectively called functions. |
1046 | */ |
1047 | static struct drm_sched_entity * |
1048 | drm_sched_select_entity(struct drm_gpu_scheduler *sched) |
1049 | { |
1050 | struct drm_sched_entity *entity; |
1051 | int i; |
1052 | |
1053 | /* Start with the highest priority. |
1054 | */ |
1055 | for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) { |
1056 | entity = drm_sched_policy == DRM_SCHED_POLICY_FIFO ? |
1057 | drm_sched_rq_select_entity_fifo(sched, rq: sched->sched_rq[i]) : |
1058 | drm_sched_rq_select_entity_rr(sched, rq: sched->sched_rq[i]); |
1059 | if (entity) |
1060 | break; |
1061 | } |
1062 | |
1063 | return IS_ERR(ptr: entity) ? NULL : entity; |
1064 | } |
1065 | |
1066 | /** |
1067 | * drm_sched_get_finished_job - fetch the next finished job to be destroyed |
1068 | * |
1069 | * @sched: scheduler instance |
1070 | * |
1071 | * Returns the next finished job from the pending list (if there is one) |
1072 | * ready for it to be destroyed. |
1073 | */ |
1074 | static struct drm_sched_job * |
1075 | drm_sched_get_finished_job(struct drm_gpu_scheduler *sched) |
1076 | { |
1077 | struct drm_sched_job *job, *next; |
1078 | |
1079 | spin_lock(lock: &sched->job_list_lock); |
1080 | |
1081 | job = list_first_entry_or_null(&sched->pending_list, |
1082 | struct drm_sched_job, list); |
1083 | |
1084 | if (job && dma_fence_is_signaled(fence: &job->s_fence->finished)) { |
1085 | /* remove job from pending_list */ |
1086 | list_del_init(entry: &job->list); |
1087 | |
1088 | /* cancel this job's TO timer */ |
1089 | cancel_delayed_work(dwork: &sched->work_tdr); |
1090 | /* make the scheduled timestamp more accurate */ |
1091 | next = list_first_entry_or_null(&sched->pending_list, |
1092 | typeof(*next), list); |
1093 | |
1094 | if (next) { |
1095 | if (test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, |
1096 | &next->s_fence->scheduled.flags)) |
1097 | next->s_fence->scheduled.timestamp = |
1098 | dma_fence_timestamp(fence: &job->s_fence->finished); |
1099 | /* start TO timer for next job */ |
1100 | drm_sched_start_timeout(sched); |
1101 | } |
1102 | } else { |
1103 | job = NULL; |
1104 | } |
1105 | |
1106 | spin_unlock(lock: &sched->job_list_lock); |
1107 | |
1108 | return job; |
1109 | } |
1110 | |
1111 | /** |
1112 | * drm_sched_pick_best - Get a drm sched from a sched_list with the least load |
1113 | * @sched_list: list of drm_gpu_schedulers |
1114 | * @num_sched_list: number of drm_gpu_schedulers in the sched_list |
1115 | * |
1116 | * Returns pointer of the sched with the least load or NULL if none of the |
1117 | * drm_gpu_schedulers are ready |
1118 | */ |
1119 | struct drm_gpu_scheduler * |
1120 | drm_sched_pick_best(struct drm_gpu_scheduler **sched_list, |
1121 | unsigned int num_sched_list) |
1122 | { |
1123 | struct drm_gpu_scheduler *sched, *picked_sched = NULL; |
1124 | int i; |
1125 | unsigned int min_score = UINT_MAX, num_score; |
1126 | |
1127 | for (i = 0; i < num_sched_list; ++i) { |
1128 | sched = sched_list[i]; |
1129 | |
1130 | if (!sched->ready) { |
1131 | DRM_WARN("scheduler %s is not ready, skipping" , |
1132 | sched->name); |
1133 | continue; |
1134 | } |
1135 | |
1136 | num_score = atomic_read(v: sched->score); |
1137 | if (num_score < min_score) { |
1138 | min_score = num_score; |
1139 | picked_sched = sched; |
1140 | } |
1141 | } |
1142 | |
1143 | return picked_sched; |
1144 | } |
1145 | EXPORT_SYMBOL(drm_sched_pick_best); |
1146 | |
1147 | /** |
1148 | * drm_sched_free_job_work - worker to call free_job |
1149 | * |
1150 | * @w: free job work |
1151 | */ |
1152 | static void drm_sched_free_job_work(struct work_struct *w) |
1153 | { |
1154 | struct drm_gpu_scheduler *sched = |
1155 | container_of(w, struct drm_gpu_scheduler, work_free_job); |
1156 | struct drm_sched_job *job; |
1157 | |
1158 | if (READ_ONCE(sched->pause_submit)) |
1159 | return; |
1160 | |
1161 | job = drm_sched_get_finished_job(sched); |
1162 | if (job) |
1163 | sched->ops->free_job(job); |
1164 | |
1165 | drm_sched_run_free_queue(sched); |
1166 | drm_sched_run_job_queue(sched); |
1167 | } |
1168 | |
1169 | /** |
1170 | * drm_sched_run_job_work - worker to call run_job |
1171 | * |
1172 | * @w: run job work |
1173 | */ |
1174 | static void drm_sched_run_job_work(struct work_struct *w) |
1175 | { |
1176 | struct drm_gpu_scheduler *sched = |
1177 | container_of(w, struct drm_gpu_scheduler, work_run_job); |
1178 | struct drm_sched_entity *entity; |
1179 | struct dma_fence *fence; |
1180 | struct drm_sched_fence *s_fence; |
1181 | struct drm_sched_job *sched_job; |
1182 | int r; |
1183 | |
1184 | if (READ_ONCE(sched->pause_submit)) |
1185 | return; |
1186 | |
1187 | /* Find entity with a ready job */ |
1188 | entity = drm_sched_select_entity(sched); |
1189 | if (!entity) |
1190 | return; /* No more work */ |
1191 | |
1192 | sched_job = drm_sched_entity_pop_job(entity); |
1193 | if (!sched_job) { |
1194 | complete_all(&entity->entity_idle); |
1195 | drm_sched_run_job_queue(sched); |
1196 | return; |
1197 | } |
1198 | |
1199 | s_fence = sched_job->s_fence; |
1200 | |
1201 | atomic_add(i: sched_job->credits, v: &sched->credit_count); |
1202 | drm_sched_job_begin(s_job: sched_job); |
1203 | |
1204 | trace_drm_run_job(sched_job, entity); |
1205 | fence = sched->ops->run_job(sched_job); |
1206 | complete_all(&entity->entity_idle); |
1207 | drm_sched_fence_scheduled(fence: s_fence, parent: fence); |
1208 | |
1209 | if (!IS_ERR_OR_NULL(ptr: fence)) { |
1210 | /* Drop for original kref_init of the fence */ |
1211 | dma_fence_put(fence); |
1212 | |
1213 | r = dma_fence_add_callback(fence, cb: &sched_job->cb, |
1214 | func: drm_sched_job_done_cb); |
1215 | if (r == -ENOENT) |
1216 | drm_sched_job_done(s_job: sched_job, result: fence->error); |
1217 | else if (r) |
1218 | DRM_DEV_ERROR(sched->dev, "fence add callback failed (%d)\n" , r); |
1219 | } else { |
1220 | drm_sched_job_done(s_job: sched_job, result: IS_ERR(ptr: fence) ? |
1221 | PTR_ERR(ptr: fence) : 0); |
1222 | } |
1223 | |
1224 | wake_up(&sched->job_scheduled); |
1225 | drm_sched_run_job_queue(sched); |
1226 | } |
1227 | |
1228 | /** |
1229 | * drm_sched_init - Init a gpu scheduler instance |
1230 | * |
1231 | * @sched: scheduler instance |
1232 | * @ops: backend operations for this scheduler |
1233 | * @submit_wq: workqueue to use for submission. If NULL, an ordered wq is |
1234 | * allocated and used |
1235 | * @num_rqs: number of runqueues, one for each priority, up to DRM_SCHED_PRIORITY_COUNT |
1236 | * @credit_limit: the number of credits this scheduler can hold from all jobs |
1237 | * @hang_limit: number of times to allow a job to hang before dropping it |
1238 | * @timeout: timeout value in jiffies for the scheduler |
1239 | * @timeout_wq: workqueue to use for timeout work. If NULL, the system_wq is |
1240 | * used |
1241 | * @score: optional score atomic shared with other schedulers |
1242 | * @name: name used for debugging |
1243 | * @dev: target &struct device |
1244 | * |
1245 | * Return 0 on success, otherwise error code. |
1246 | */ |
1247 | int drm_sched_init(struct drm_gpu_scheduler *sched, |
1248 | const struct drm_sched_backend_ops *ops, |
1249 | struct workqueue_struct *submit_wq, |
1250 | u32 num_rqs, u32 credit_limit, unsigned int hang_limit, |
1251 | long timeout, struct workqueue_struct *timeout_wq, |
1252 | atomic_t *score, const char *name, struct device *dev) |
1253 | { |
1254 | int i; |
1255 | |
1256 | sched->ops = ops; |
1257 | sched->credit_limit = credit_limit; |
1258 | sched->name = name; |
1259 | sched->timeout = timeout; |
1260 | sched->timeout_wq = timeout_wq ? : system_wq; |
1261 | sched->hang_limit = hang_limit; |
1262 | sched->score = score ? score : &sched->_score; |
1263 | sched->dev = dev; |
1264 | |
1265 | if (num_rqs > DRM_SCHED_PRIORITY_COUNT) { |
1266 | /* This is a gross violation--tell drivers what the problem is. |
1267 | */ |
1268 | drm_err(sched, "%s: num_rqs cannot be greater than DRM_SCHED_PRIORITY_COUNT\n" , |
1269 | __func__); |
1270 | return -EINVAL; |
1271 | } else if (sched->sched_rq) { |
1272 | /* Not an error, but warn anyway so drivers can |
1273 | * fine-tune their DRM calling order, and return all |
1274 | * is good. |
1275 | */ |
1276 | drm_warn(sched, "%s: scheduler already initialized!\n" , __func__); |
1277 | return 0; |
1278 | } |
1279 | |
1280 | if (submit_wq) { |
1281 | sched->submit_wq = submit_wq; |
1282 | sched->own_submit_wq = false; |
1283 | } else { |
1284 | sched->submit_wq = alloc_ordered_workqueue(name, 0); |
1285 | if (!sched->submit_wq) |
1286 | return -ENOMEM; |
1287 | |
1288 | sched->own_submit_wq = true; |
1289 | } |
1290 | |
1291 | sched->sched_rq = kmalloc_array(n: num_rqs, size: sizeof(*sched->sched_rq), |
1292 | GFP_KERNEL | __GFP_ZERO); |
1293 | if (!sched->sched_rq) |
1294 | goto Out_check_own; |
1295 | sched->num_rqs = num_rqs; |
1296 | for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) { |
1297 | sched->sched_rq[i] = kzalloc(size: sizeof(*sched->sched_rq[i]), GFP_KERNEL); |
1298 | if (!sched->sched_rq[i]) |
1299 | goto Out_unroll; |
1300 | drm_sched_rq_init(sched, rq: sched->sched_rq[i]); |
1301 | } |
1302 | |
1303 | init_waitqueue_head(&sched->job_scheduled); |
1304 | INIT_LIST_HEAD(list: &sched->pending_list); |
1305 | spin_lock_init(&sched->job_list_lock); |
1306 | atomic_set(v: &sched->credit_count, i: 0); |
1307 | INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout); |
1308 | INIT_WORK(&sched->work_run_job, drm_sched_run_job_work); |
1309 | INIT_WORK(&sched->work_free_job, drm_sched_free_job_work); |
1310 | atomic_set(v: &sched->_score, i: 0); |
1311 | atomic64_set(v: &sched->job_id_count, i: 0); |
1312 | sched->pause_submit = false; |
1313 | |
1314 | sched->ready = true; |
1315 | return 0; |
1316 | Out_unroll: |
1317 | for (--i ; i >= DRM_SCHED_PRIORITY_KERNEL; i--) |
1318 | kfree(objp: sched->sched_rq[i]); |
1319 | |
1320 | kfree(objp: sched->sched_rq); |
1321 | sched->sched_rq = NULL; |
1322 | Out_check_own: |
1323 | if (sched->own_submit_wq) |
1324 | destroy_workqueue(wq: sched->submit_wq); |
1325 | drm_err(sched, "%s: Failed to setup GPU scheduler--out of memory\n" , __func__); |
1326 | return -ENOMEM; |
1327 | } |
1328 | EXPORT_SYMBOL(drm_sched_init); |
1329 | |
1330 | /** |
1331 | * drm_sched_fini - Destroy a gpu scheduler |
1332 | * |
1333 | * @sched: scheduler instance |
1334 | * |
1335 | * Tears down and cleans up the scheduler. |
1336 | */ |
1337 | void drm_sched_fini(struct drm_gpu_scheduler *sched) |
1338 | { |
1339 | struct drm_sched_entity *s_entity; |
1340 | int i; |
1341 | |
1342 | drm_sched_wqueue_stop(sched); |
1343 | |
1344 | for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) { |
1345 | struct drm_sched_rq *rq = sched->sched_rq[i]; |
1346 | |
1347 | spin_lock(lock: &rq->lock); |
1348 | list_for_each_entry(s_entity, &rq->entities, list) |
1349 | /* |
1350 | * Prevents reinsertion and marks job_queue as idle, |
1351 | * it will removed from rq in drm_sched_entity_fini |
1352 | * eventually |
1353 | */ |
1354 | s_entity->stopped = true; |
1355 | spin_unlock(lock: &rq->lock); |
1356 | kfree(objp: sched->sched_rq[i]); |
1357 | } |
1358 | |
1359 | /* Wakeup everyone stuck in drm_sched_entity_flush for this scheduler */ |
1360 | wake_up_all(&sched->job_scheduled); |
1361 | |
1362 | /* Confirm no work left behind accessing device structures */ |
1363 | cancel_delayed_work_sync(dwork: &sched->work_tdr); |
1364 | |
1365 | if (sched->own_submit_wq) |
1366 | destroy_workqueue(wq: sched->submit_wq); |
1367 | sched->ready = false; |
1368 | kfree(objp: sched->sched_rq); |
1369 | sched->sched_rq = NULL; |
1370 | } |
1371 | EXPORT_SYMBOL(drm_sched_fini); |
1372 | |
1373 | /** |
1374 | * drm_sched_increase_karma - Update sched_entity guilty flag |
1375 | * |
1376 | * @bad: The job guilty of time out |
1377 | * |
1378 | * Increment on every hang caused by the 'bad' job. If this exceeds the hang |
1379 | * limit of the scheduler then the respective sched entity is marked guilty and |
1380 | * jobs from it will not be scheduled further |
1381 | */ |
1382 | void drm_sched_increase_karma(struct drm_sched_job *bad) |
1383 | { |
1384 | int i; |
1385 | struct drm_sched_entity *tmp; |
1386 | struct drm_sched_entity *entity; |
1387 | struct drm_gpu_scheduler *sched = bad->sched; |
1388 | |
1389 | /* don't change @bad's karma if it's from KERNEL RQ, |
1390 | * because sometimes GPU hang would cause kernel jobs (like VM updating jobs) |
1391 | * corrupt but keep in mind that kernel jobs always considered good. |
1392 | */ |
1393 | if (bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) { |
1394 | atomic_inc(v: &bad->karma); |
1395 | |
1396 | for (i = DRM_SCHED_PRIORITY_HIGH; i < sched->num_rqs; i++) { |
1397 | struct drm_sched_rq *rq = sched->sched_rq[i]; |
1398 | |
1399 | spin_lock(lock: &rq->lock); |
1400 | list_for_each_entry_safe(entity, tmp, &rq->entities, list) { |
1401 | if (bad->s_fence->scheduled.context == |
1402 | entity->fence_context) { |
1403 | if (entity->guilty) |
1404 | atomic_set(v: entity->guilty, i: 1); |
1405 | break; |
1406 | } |
1407 | } |
1408 | spin_unlock(lock: &rq->lock); |
1409 | if (&entity->list != &rq->entities) |
1410 | break; |
1411 | } |
1412 | } |
1413 | } |
1414 | EXPORT_SYMBOL(drm_sched_increase_karma); |
1415 | |
1416 | /** |
1417 | * drm_sched_wqueue_ready - Is the scheduler ready for submission |
1418 | * |
1419 | * @sched: scheduler instance |
1420 | * |
1421 | * Returns true if submission is ready |
1422 | */ |
1423 | bool drm_sched_wqueue_ready(struct drm_gpu_scheduler *sched) |
1424 | { |
1425 | return sched->ready; |
1426 | } |
1427 | EXPORT_SYMBOL(drm_sched_wqueue_ready); |
1428 | |
1429 | /** |
1430 | * drm_sched_wqueue_stop - stop scheduler submission |
1431 | * |
1432 | * @sched: scheduler instance |
1433 | */ |
1434 | void drm_sched_wqueue_stop(struct drm_gpu_scheduler *sched) |
1435 | { |
1436 | WRITE_ONCE(sched->pause_submit, true); |
1437 | cancel_work_sync(work: &sched->work_run_job); |
1438 | cancel_work_sync(work: &sched->work_free_job); |
1439 | } |
1440 | EXPORT_SYMBOL(drm_sched_wqueue_stop); |
1441 | |
1442 | /** |
1443 | * drm_sched_wqueue_start - start scheduler submission |
1444 | * |
1445 | * @sched: scheduler instance |
1446 | */ |
1447 | void drm_sched_wqueue_start(struct drm_gpu_scheduler *sched) |
1448 | { |
1449 | WRITE_ONCE(sched->pause_submit, false); |
1450 | queue_work(wq: sched->submit_wq, work: &sched->work_run_job); |
1451 | queue_work(wq: sched->submit_wq, work: &sched->work_free_job); |
1452 | } |
1453 | EXPORT_SYMBOL(drm_sched_wqueue_start); |
1454 | |