1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */ |
3 | /* Copyright 2019 Collabora ltd. */ |
4 | #include <linux/delay.h> |
5 | #include <linux/interrupt.h> |
6 | #include <linux/io.h> |
7 | #include <linux/iopoll.h> |
8 | #include <linux/platform_device.h> |
9 | #include <linux/pm_runtime.h> |
10 | #include <linux/dma-resv.h> |
11 | #include <drm/gpu_scheduler.h> |
12 | #include <drm/panfrost_drm.h> |
13 | |
14 | #include "panfrost_device.h" |
15 | #include "panfrost_devfreq.h" |
16 | #include "panfrost_job.h" |
17 | #include "panfrost_features.h" |
18 | #include "panfrost_issues.h" |
19 | #include "panfrost_gem.h" |
20 | #include "panfrost_regs.h" |
21 | #include "panfrost_gpu.h" |
22 | #include "panfrost_mmu.h" |
23 | #include "panfrost_dump.h" |
24 | |
25 | #define JOB_TIMEOUT_MS 500 |
26 | |
27 | #define job_write(dev, reg, data) writel(data, dev->iomem + (reg)) |
28 | #define job_read(dev, reg) readl(dev->iomem + (reg)) |
29 | |
30 | struct panfrost_queue_state { |
31 | struct drm_gpu_scheduler sched; |
32 | u64 fence_context; |
33 | u64 emit_seqno; |
34 | }; |
35 | |
36 | struct panfrost_job_slot { |
37 | struct panfrost_queue_state queue[NUM_JOB_SLOTS]; |
38 | spinlock_t job_lock; |
39 | int irq; |
40 | }; |
41 | |
42 | static struct panfrost_job * |
43 | to_panfrost_job(struct drm_sched_job *sched_job) |
44 | { |
45 | return container_of(sched_job, struct panfrost_job, base); |
46 | } |
47 | |
48 | struct panfrost_fence { |
49 | struct dma_fence base; |
50 | struct drm_device *dev; |
51 | /* panfrost seqno for signaled() test */ |
52 | u64 seqno; |
53 | int queue; |
54 | }; |
55 | |
56 | static inline struct panfrost_fence * |
57 | to_panfrost_fence(struct dma_fence *fence) |
58 | { |
59 | return (struct panfrost_fence *)fence; |
60 | } |
61 | |
62 | static const char *panfrost_fence_get_driver_name(struct dma_fence *fence) |
63 | { |
64 | return "panfrost" ; |
65 | } |
66 | |
67 | static const char *panfrost_fence_get_timeline_name(struct dma_fence *fence) |
68 | { |
69 | struct panfrost_fence *f = to_panfrost_fence(fence); |
70 | |
71 | switch (f->queue) { |
72 | case 0: |
73 | return "panfrost-js-0" ; |
74 | case 1: |
75 | return "panfrost-js-1" ; |
76 | case 2: |
77 | return "panfrost-js-2" ; |
78 | default: |
79 | return NULL; |
80 | } |
81 | } |
82 | |
83 | static const struct dma_fence_ops panfrost_fence_ops = { |
84 | .get_driver_name = panfrost_fence_get_driver_name, |
85 | .get_timeline_name = panfrost_fence_get_timeline_name, |
86 | }; |
87 | |
88 | static struct dma_fence *panfrost_fence_create(struct panfrost_device *pfdev, int js_num) |
89 | { |
90 | struct panfrost_fence *fence; |
91 | struct panfrost_job_slot *js = pfdev->js; |
92 | |
93 | fence = kzalloc(size: sizeof(*fence), GFP_KERNEL); |
94 | if (!fence) |
95 | return ERR_PTR(error: -ENOMEM); |
96 | |
97 | fence->dev = pfdev->ddev; |
98 | fence->queue = js_num; |
99 | fence->seqno = ++js->queue[js_num].emit_seqno; |
100 | dma_fence_init(fence: &fence->base, ops: &panfrost_fence_ops, lock: &js->job_lock, |
101 | context: js->queue[js_num].fence_context, seqno: fence->seqno); |
102 | |
103 | return &fence->base; |
104 | } |
105 | |
106 | int panfrost_job_get_slot(struct panfrost_job *job) |
107 | { |
108 | /* JS0: fragment jobs. |
109 | * JS1: vertex/tiler jobs |
110 | * JS2: compute jobs |
111 | */ |
112 | if (job->requirements & PANFROST_JD_REQ_FS) |
113 | return 0; |
114 | |
115 | /* Not exposed to userspace yet */ |
116 | #if 0 |
117 | if (job->requirements & PANFROST_JD_REQ_ONLY_COMPUTE) { |
118 | if ((job->requirements & PANFROST_JD_REQ_CORE_GRP_MASK) && |
119 | (job->pfdev->features.nr_core_groups == 2)) |
120 | return 2; |
121 | if (panfrost_has_hw_issue(job->pfdev, HW_ISSUE_8987)) |
122 | return 2; |
123 | } |
124 | #endif |
125 | return 1; |
126 | } |
127 | |
128 | static void panfrost_job_write_affinity(struct panfrost_device *pfdev, |
129 | u32 requirements, |
130 | int js) |
131 | { |
132 | u64 affinity; |
133 | |
134 | /* |
135 | * Use all cores for now. |
136 | * Eventually we may need to support tiler only jobs and h/w with |
137 | * multiple (2) coherent core groups |
138 | */ |
139 | affinity = pfdev->features.shader_present; |
140 | |
141 | job_write(pfdev, JS_AFFINITY_NEXT_LO(js), lower_32_bits(affinity)); |
142 | job_write(pfdev, JS_AFFINITY_NEXT_HI(js), upper_32_bits(affinity)); |
143 | } |
144 | |
145 | static u32 |
146 | panfrost_get_job_chain_flag(const struct panfrost_job *job) |
147 | { |
148 | struct panfrost_fence *f = to_panfrost_fence(fence: job->done_fence); |
149 | |
150 | if (!panfrost_has_hw_feature(pfdev: job->pfdev, feat: HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) |
151 | return 0; |
152 | |
153 | return (f->seqno & 1) ? JS_CONFIG_JOB_CHAIN_FLAG : 0; |
154 | } |
155 | |
156 | static struct panfrost_job * |
157 | panfrost_dequeue_job(struct panfrost_device *pfdev, int slot) |
158 | { |
159 | struct panfrost_job *job = pfdev->jobs[slot][0]; |
160 | |
161 | WARN_ON(!job); |
162 | if (job->is_profiled) { |
163 | if (job->engine_usage) { |
164 | job->engine_usage->elapsed_ns[slot] += |
165 | ktime_to_ns(ktime_sub(ktime_get(), job->start_time)); |
166 | job->engine_usage->cycles[slot] += |
167 | panfrost_cycle_counter_read(pfdev) - job->start_cycles; |
168 | } |
169 | panfrost_cycle_counter_put(pfdev: job->pfdev); |
170 | } |
171 | |
172 | pfdev->jobs[slot][0] = pfdev->jobs[slot][1]; |
173 | pfdev->jobs[slot][1] = NULL; |
174 | |
175 | return job; |
176 | } |
177 | |
178 | static unsigned int |
179 | panfrost_enqueue_job(struct panfrost_device *pfdev, int slot, |
180 | struct panfrost_job *job) |
181 | { |
182 | if (WARN_ON(!job)) |
183 | return 0; |
184 | |
185 | if (!pfdev->jobs[slot][0]) { |
186 | pfdev->jobs[slot][0] = job; |
187 | return 0; |
188 | } |
189 | |
190 | WARN_ON(pfdev->jobs[slot][1]); |
191 | pfdev->jobs[slot][1] = job; |
192 | WARN_ON(panfrost_get_job_chain_flag(job) == |
193 | panfrost_get_job_chain_flag(pfdev->jobs[slot][0])); |
194 | return 1; |
195 | } |
196 | |
197 | static void panfrost_job_hw_submit(struct panfrost_job *job, int js) |
198 | { |
199 | struct panfrost_device *pfdev = job->pfdev; |
200 | unsigned int subslot; |
201 | u32 cfg; |
202 | u64 jc_head = job->jc; |
203 | int ret; |
204 | |
205 | panfrost_devfreq_record_busy(devfreq: &pfdev->pfdevfreq); |
206 | |
207 | ret = pm_runtime_get_sync(dev: pfdev->dev); |
208 | if (ret < 0) |
209 | return; |
210 | |
211 | if (WARN_ON(job_read(pfdev, JS_COMMAND_NEXT(js)))) { |
212 | return; |
213 | } |
214 | |
215 | cfg = panfrost_mmu_as_get(pfdev, mmu: job->mmu); |
216 | |
217 | job_write(pfdev, JS_HEAD_NEXT_LO(js), lower_32_bits(jc_head)); |
218 | job_write(pfdev, JS_HEAD_NEXT_HI(js), upper_32_bits(jc_head)); |
219 | |
220 | panfrost_job_write_affinity(pfdev, requirements: job->requirements, js); |
221 | |
222 | /* start MMU, medium priority, cache clean/flush on end, clean/flush on |
223 | * start */ |
224 | cfg |= JS_CONFIG_THREAD_PRI(8) | |
225 | JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE | |
226 | JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE | |
227 | panfrost_get_job_chain_flag(job); |
228 | |
229 | if (panfrost_has_hw_feature(pfdev, feat: HW_FEATURE_FLUSH_REDUCTION)) |
230 | cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION; |
231 | |
232 | if (panfrost_has_hw_issue(pfdev, issue: HW_ISSUE_10649)) |
233 | cfg |= JS_CONFIG_START_MMU; |
234 | |
235 | job_write(pfdev, JS_CONFIG_NEXT(js), cfg); |
236 | |
237 | if (panfrost_has_hw_feature(pfdev, feat: HW_FEATURE_FLUSH_REDUCTION)) |
238 | job_write(pfdev, JS_FLUSH_ID_NEXT(js), job->flush_id); |
239 | |
240 | /* GO ! */ |
241 | |
242 | spin_lock(lock: &pfdev->js->job_lock); |
243 | subslot = panfrost_enqueue_job(pfdev, slot: js, job); |
244 | /* Don't queue the job if a reset is in progress */ |
245 | if (!atomic_read(v: &pfdev->reset.pending)) { |
246 | if (atomic_read(v: &pfdev->profile_mode)) { |
247 | panfrost_cycle_counter_get(pfdev); |
248 | job->is_profiled = true; |
249 | job->start_time = ktime_get(); |
250 | job->start_cycles = panfrost_cycle_counter_read(pfdev); |
251 | } |
252 | |
253 | job_write(pfdev, JS_COMMAND_NEXT(js), JS_COMMAND_START); |
254 | dev_dbg(pfdev->dev, |
255 | "JS: Submitting atom %p to js[%d][%d] with head=0x%llx AS %d" , |
256 | job, js, subslot, jc_head, cfg & 0xf); |
257 | } |
258 | spin_unlock(lock: &pfdev->js->job_lock); |
259 | } |
260 | |
261 | static int panfrost_acquire_object_fences(struct drm_gem_object **bos, |
262 | int bo_count, |
263 | struct drm_sched_job *job) |
264 | { |
265 | int i, ret; |
266 | |
267 | for (i = 0; i < bo_count; i++) { |
268 | ret = dma_resv_reserve_fences(obj: bos[i]->resv, num_fences: 1); |
269 | if (ret) |
270 | return ret; |
271 | |
272 | /* panfrost always uses write mode in its current uapi */ |
273 | ret = drm_sched_job_add_implicit_dependencies(job, obj: bos[i], |
274 | write: true); |
275 | if (ret) |
276 | return ret; |
277 | } |
278 | |
279 | return 0; |
280 | } |
281 | |
282 | static void panfrost_attach_object_fences(struct drm_gem_object **bos, |
283 | int bo_count, |
284 | struct dma_fence *fence) |
285 | { |
286 | int i; |
287 | |
288 | for (i = 0; i < bo_count; i++) |
289 | dma_resv_add_fence(obj: bos[i]->resv, fence, usage: DMA_RESV_USAGE_WRITE); |
290 | } |
291 | |
292 | int panfrost_job_push(struct panfrost_job *job) |
293 | { |
294 | struct panfrost_device *pfdev = job->pfdev; |
295 | struct ww_acquire_ctx acquire_ctx; |
296 | int ret = 0; |
297 | |
298 | ret = drm_gem_lock_reservations(objs: job->bos, count: job->bo_count, |
299 | acquire_ctx: &acquire_ctx); |
300 | if (ret) |
301 | return ret; |
302 | |
303 | mutex_lock(&pfdev->sched_lock); |
304 | drm_sched_job_arm(job: &job->base); |
305 | |
306 | job->render_done_fence = dma_fence_get(fence: &job->base.s_fence->finished); |
307 | |
308 | ret = panfrost_acquire_object_fences(bos: job->bos, bo_count: job->bo_count, |
309 | job: &job->base); |
310 | if (ret) { |
311 | mutex_unlock(lock: &pfdev->sched_lock); |
312 | goto unlock; |
313 | } |
314 | |
315 | kref_get(kref: &job->refcount); /* put by scheduler job completion */ |
316 | |
317 | drm_sched_entity_push_job(sched_job: &job->base); |
318 | |
319 | mutex_unlock(lock: &pfdev->sched_lock); |
320 | |
321 | panfrost_attach_object_fences(bos: job->bos, bo_count: job->bo_count, |
322 | fence: job->render_done_fence); |
323 | |
324 | unlock: |
325 | drm_gem_unlock_reservations(objs: job->bos, count: job->bo_count, acquire_ctx: &acquire_ctx); |
326 | |
327 | return ret; |
328 | } |
329 | |
330 | static void panfrost_job_cleanup(struct kref *ref) |
331 | { |
332 | struct panfrost_job *job = container_of(ref, struct panfrost_job, |
333 | refcount); |
334 | unsigned int i; |
335 | |
336 | dma_fence_put(fence: job->done_fence); |
337 | dma_fence_put(fence: job->render_done_fence); |
338 | |
339 | if (job->mappings) { |
340 | for (i = 0; i < job->bo_count; i++) { |
341 | if (!job->mappings[i]) |
342 | break; |
343 | |
344 | atomic_dec(v: &job->mappings[i]->obj->gpu_usecount); |
345 | panfrost_gem_mapping_put(mapping: job->mappings[i]); |
346 | } |
347 | kvfree(addr: job->mappings); |
348 | } |
349 | |
350 | if (job->bos) { |
351 | for (i = 0; i < job->bo_count; i++) |
352 | drm_gem_object_put(obj: job->bos[i]); |
353 | |
354 | kvfree(addr: job->bos); |
355 | } |
356 | |
357 | kfree(objp: job); |
358 | } |
359 | |
360 | void panfrost_job_put(struct panfrost_job *job) |
361 | { |
362 | kref_put(kref: &job->refcount, release: panfrost_job_cleanup); |
363 | } |
364 | |
365 | static void panfrost_job_free(struct drm_sched_job *sched_job) |
366 | { |
367 | struct panfrost_job *job = to_panfrost_job(sched_job); |
368 | |
369 | drm_sched_job_cleanup(job: sched_job); |
370 | |
371 | panfrost_job_put(job); |
372 | } |
373 | |
374 | static struct dma_fence *panfrost_job_run(struct drm_sched_job *sched_job) |
375 | { |
376 | struct panfrost_job *job = to_panfrost_job(sched_job); |
377 | struct panfrost_device *pfdev = job->pfdev; |
378 | int slot = panfrost_job_get_slot(job); |
379 | struct dma_fence *fence = NULL; |
380 | |
381 | if (unlikely(job->base.s_fence->finished.error)) |
382 | return NULL; |
383 | |
384 | /* Nothing to execute: can happen if the job has finished while |
385 | * we were resetting the GPU. |
386 | */ |
387 | if (!job->jc) |
388 | return NULL; |
389 | |
390 | fence = panfrost_fence_create(pfdev, js_num: slot); |
391 | if (IS_ERR(ptr: fence)) |
392 | return fence; |
393 | |
394 | if (job->done_fence) |
395 | dma_fence_put(fence: job->done_fence); |
396 | job->done_fence = dma_fence_get(fence); |
397 | |
398 | panfrost_job_hw_submit(job, js: slot); |
399 | |
400 | return fence; |
401 | } |
402 | |
403 | void panfrost_job_enable_interrupts(struct panfrost_device *pfdev) |
404 | { |
405 | int j; |
406 | u32 irq_mask = 0; |
407 | |
408 | for (j = 0; j < NUM_JOB_SLOTS; j++) { |
409 | irq_mask |= MK_JS_MASK(j); |
410 | } |
411 | |
412 | job_write(pfdev, JOB_INT_CLEAR, irq_mask); |
413 | job_write(pfdev, JOB_INT_MASK, irq_mask); |
414 | } |
415 | |
416 | static void panfrost_job_handle_err(struct panfrost_device *pfdev, |
417 | struct panfrost_job *job, |
418 | unsigned int js) |
419 | { |
420 | u32 js_status = job_read(pfdev, JS_STATUS(js)); |
421 | const char *exception_name = panfrost_exception_name(exception_code: js_status); |
422 | bool signal_fence = true; |
423 | |
424 | if (!panfrost_exception_is_fault(exception_code: js_status)) { |
425 | dev_dbg(pfdev->dev, "js event, js=%d, status=%s, head=0x%x, tail=0x%x" , |
426 | js, exception_name, |
427 | job_read(pfdev, JS_HEAD_LO(js)), |
428 | job_read(pfdev, JS_TAIL_LO(js))); |
429 | } else { |
430 | dev_err(pfdev->dev, "js fault, js=%d, status=%s, head=0x%x, tail=0x%x" , |
431 | js, exception_name, |
432 | job_read(pfdev, JS_HEAD_LO(js)), |
433 | job_read(pfdev, JS_TAIL_LO(js))); |
434 | } |
435 | |
436 | if (js_status == DRM_PANFROST_EXCEPTION_STOPPED) { |
437 | /* Update the job head so we can resume */ |
438 | job->jc = job_read(pfdev, JS_TAIL_LO(js)) | |
439 | ((u64)job_read(pfdev, JS_TAIL_HI(js)) << 32); |
440 | |
441 | /* The job will be resumed, don't signal the fence */ |
442 | signal_fence = false; |
443 | } else if (js_status == DRM_PANFROST_EXCEPTION_TERMINATED) { |
444 | /* Job has been hard-stopped, flag it as canceled */ |
445 | dma_fence_set_error(fence: job->done_fence, error: -ECANCELED); |
446 | job->jc = 0; |
447 | } else if (panfrost_exception_is_fault(exception_code: js_status)) { |
448 | /* We might want to provide finer-grained error code based on |
449 | * the exception type, but unconditionally setting to EINVAL |
450 | * is good enough for now. |
451 | */ |
452 | dma_fence_set_error(fence: job->done_fence, error: -EINVAL); |
453 | job->jc = 0; |
454 | } |
455 | |
456 | panfrost_mmu_as_put(pfdev, mmu: job->mmu); |
457 | panfrost_devfreq_record_idle(devfreq: &pfdev->pfdevfreq); |
458 | |
459 | if (signal_fence) |
460 | dma_fence_signal_locked(fence: job->done_fence); |
461 | |
462 | pm_runtime_put_autosuspend(dev: pfdev->dev); |
463 | |
464 | if (panfrost_exception_needs_reset(pfdev, exception_code: js_status)) { |
465 | atomic_set(v: &pfdev->reset.pending, i: 1); |
466 | drm_sched_fault(sched: &pfdev->js->queue[js].sched); |
467 | } |
468 | } |
469 | |
470 | static void panfrost_job_handle_done(struct panfrost_device *pfdev, |
471 | struct panfrost_job *job) |
472 | { |
473 | /* Set ->jc to 0 to avoid re-submitting an already finished job (can |
474 | * happen when we receive the DONE interrupt while doing a GPU reset). |
475 | */ |
476 | job->jc = 0; |
477 | panfrost_mmu_as_put(pfdev, mmu: job->mmu); |
478 | panfrost_devfreq_record_idle(devfreq: &pfdev->pfdevfreq); |
479 | |
480 | dma_fence_signal_locked(fence: job->done_fence); |
481 | pm_runtime_put_autosuspend(dev: pfdev->dev); |
482 | } |
483 | |
484 | static void panfrost_job_handle_irq(struct panfrost_device *pfdev, u32 status) |
485 | { |
486 | struct panfrost_job *done[NUM_JOB_SLOTS][2] = {}; |
487 | struct panfrost_job *failed[NUM_JOB_SLOTS] = {}; |
488 | u32 js_state = 0, js_events = 0; |
489 | unsigned int i, j; |
490 | |
491 | /* First we collect all failed/done jobs. */ |
492 | while (status) { |
493 | u32 js_state_mask = 0; |
494 | |
495 | for (j = 0; j < NUM_JOB_SLOTS; j++) { |
496 | if (status & MK_JS_MASK(j)) |
497 | js_state_mask |= MK_JS_MASK(j); |
498 | |
499 | if (status & JOB_INT_MASK_DONE(j)) { |
500 | if (done[j][0]) |
501 | done[j][1] = panfrost_dequeue_job(pfdev, slot: j); |
502 | else |
503 | done[j][0] = panfrost_dequeue_job(pfdev, slot: j); |
504 | } |
505 | |
506 | if (status & JOB_INT_MASK_ERR(j)) { |
507 | /* Cancel the next submission. Will be submitted |
508 | * after we're done handling this failure if |
509 | * there's no reset pending. |
510 | */ |
511 | job_write(pfdev, JS_COMMAND_NEXT(j), JS_COMMAND_NOP); |
512 | failed[j] = panfrost_dequeue_job(pfdev, slot: j); |
513 | } |
514 | } |
515 | |
516 | /* JS_STATE is sampled when JOB_INT_CLEAR is written. |
517 | * For each BIT(slot) or BIT(slot + 16) bit written to |
518 | * JOB_INT_CLEAR, the corresponding bits in JS_STATE |
519 | * (BIT(slot) and BIT(slot + 16)) are updated, but this |
520 | * is racy. If we only have one job done at the time we |
521 | * read JOB_INT_RAWSTAT but the second job fails before we |
522 | * clear the status, we end up with a status containing |
523 | * only the DONE bit and consider both jobs as DONE since |
524 | * JS_STATE reports both NEXT and CURRENT as inactive. |
525 | * To prevent that, let's repeat this clear+read steps |
526 | * until status is 0. |
527 | */ |
528 | job_write(pfdev, JOB_INT_CLEAR, status); |
529 | js_state &= ~js_state_mask; |
530 | js_state |= job_read(pfdev, JOB_INT_JS_STATE) & js_state_mask; |
531 | js_events |= status; |
532 | status = job_read(pfdev, JOB_INT_RAWSTAT); |
533 | } |
534 | |
535 | /* Then we handle the dequeued jobs. */ |
536 | for (j = 0; j < NUM_JOB_SLOTS; j++) { |
537 | if (!(js_events & MK_JS_MASK(j))) |
538 | continue; |
539 | |
540 | if (failed[j]) { |
541 | panfrost_job_handle_err(pfdev, job: failed[j], js: j); |
542 | } else if (pfdev->jobs[j][0] && !(js_state & MK_JS_MASK(j))) { |
543 | /* When the current job doesn't fail, the JM dequeues |
544 | * the next job without waiting for an ACK, this means |
545 | * we can have 2 jobs dequeued and only catch the |
546 | * interrupt when the second one is done. If both slots |
547 | * are inactive, but one job remains in pfdev->jobs[j], |
548 | * consider it done. Of course that doesn't apply if a |
549 | * failure happened since we cancelled execution of the |
550 | * job in _NEXT (see above). |
551 | */ |
552 | if (WARN_ON(!done[j][0])) |
553 | done[j][0] = panfrost_dequeue_job(pfdev, slot: j); |
554 | else |
555 | done[j][1] = panfrost_dequeue_job(pfdev, slot: j); |
556 | } |
557 | |
558 | for (i = 0; i < ARRAY_SIZE(done[0]) && done[j][i]; i++) |
559 | panfrost_job_handle_done(pfdev, job: done[j][i]); |
560 | } |
561 | |
562 | /* And finally we requeue jobs that were waiting in the second slot |
563 | * and have been stopped if we detected a failure on the first slot. |
564 | */ |
565 | for (j = 0; j < NUM_JOB_SLOTS; j++) { |
566 | if (!(js_events & MK_JS_MASK(j))) |
567 | continue; |
568 | |
569 | if (!failed[j] || !pfdev->jobs[j][0]) |
570 | continue; |
571 | |
572 | if (pfdev->jobs[j][0]->jc == 0) { |
573 | /* The job was cancelled, signal the fence now */ |
574 | struct panfrost_job *canceled = panfrost_dequeue_job(pfdev, slot: j); |
575 | |
576 | dma_fence_set_error(fence: canceled->done_fence, error: -ECANCELED); |
577 | panfrost_job_handle_done(pfdev, job: canceled); |
578 | } else if (!atomic_read(v: &pfdev->reset.pending)) { |
579 | /* Requeue the job we removed if no reset is pending */ |
580 | job_write(pfdev, JS_COMMAND_NEXT(j), JS_COMMAND_START); |
581 | } |
582 | } |
583 | } |
584 | |
585 | static void panfrost_job_handle_irqs(struct panfrost_device *pfdev) |
586 | { |
587 | u32 status = job_read(pfdev, JOB_INT_RAWSTAT); |
588 | |
589 | while (status) { |
590 | pm_runtime_mark_last_busy(dev: pfdev->dev); |
591 | |
592 | spin_lock(lock: &pfdev->js->job_lock); |
593 | panfrost_job_handle_irq(pfdev, status); |
594 | spin_unlock(lock: &pfdev->js->job_lock); |
595 | status = job_read(pfdev, JOB_INT_RAWSTAT); |
596 | } |
597 | } |
598 | |
599 | static u32 panfrost_active_slots(struct panfrost_device *pfdev, |
600 | u32 *js_state_mask, u32 js_state) |
601 | { |
602 | u32 rawstat; |
603 | |
604 | if (!(js_state & *js_state_mask)) |
605 | return 0; |
606 | |
607 | rawstat = job_read(pfdev, JOB_INT_RAWSTAT); |
608 | if (rawstat) { |
609 | unsigned int i; |
610 | |
611 | for (i = 0; i < NUM_JOB_SLOTS; i++) { |
612 | if (rawstat & MK_JS_MASK(i)) |
613 | *js_state_mask &= ~MK_JS_MASK(i); |
614 | } |
615 | } |
616 | |
617 | return js_state & *js_state_mask; |
618 | } |
619 | |
620 | static void |
621 | panfrost_reset(struct panfrost_device *pfdev, |
622 | struct drm_sched_job *bad) |
623 | { |
624 | u32 js_state, js_state_mask = 0xffffffff; |
625 | unsigned int i, j; |
626 | bool cookie; |
627 | int ret; |
628 | |
629 | if (!atomic_read(v: &pfdev->reset.pending)) |
630 | return; |
631 | |
632 | /* Stop the schedulers. |
633 | * |
634 | * FIXME: We temporarily get out of the dma_fence_signalling section |
635 | * because the cleanup path generate lockdep splats when taking locks |
636 | * to release job resources. We should rework the code to follow this |
637 | * pattern: |
638 | * |
639 | * try_lock |
640 | * if (locked) |
641 | * release |
642 | * else |
643 | * schedule_work_to_release_later |
644 | */ |
645 | for (i = 0; i < NUM_JOB_SLOTS; i++) |
646 | drm_sched_stop(sched: &pfdev->js->queue[i].sched, bad); |
647 | |
648 | cookie = dma_fence_begin_signalling(); |
649 | |
650 | if (bad) |
651 | drm_sched_increase_karma(bad); |
652 | |
653 | /* Mask job interrupts and synchronize to make sure we won't be |
654 | * interrupted during our reset. |
655 | */ |
656 | job_write(pfdev, JOB_INT_MASK, 0); |
657 | synchronize_irq(irq: pfdev->js->irq); |
658 | |
659 | for (i = 0; i < NUM_JOB_SLOTS; i++) { |
660 | /* Cancel the next job and soft-stop the running job. */ |
661 | job_write(pfdev, JS_COMMAND_NEXT(i), JS_COMMAND_NOP); |
662 | job_write(pfdev, JS_COMMAND(i), JS_COMMAND_SOFT_STOP); |
663 | } |
664 | |
665 | /* Wait at most 10ms for soft-stops to complete */ |
666 | ret = readl_poll_timeout(pfdev->iomem + JOB_INT_JS_STATE, js_state, |
667 | !panfrost_active_slots(pfdev, &js_state_mask, js_state), |
668 | 10, 10000); |
669 | |
670 | if (ret) |
671 | dev_err(pfdev->dev, "Soft-stop failed\n" ); |
672 | |
673 | /* Handle the remaining interrupts before we reset. */ |
674 | panfrost_job_handle_irqs(pfdev); |
675 | |
676 | /* Remaining interrupts have been handled, but we might still have |
677 | * stuck jobs. Let's make sure the PM counters stay balanced by |
678 | * manually calling pm_runtime_put_noidle() and |
679 | * panfrost_devfreq_record_idle() for each stuck job. |
680 | * Let's also make sure the cycle counting register's refcnt is |
681 | * kept balanced to prevent it from running forever |
682 | */ |
683 | spin_lock(lock: &pfdev->js->job_lock); |
684 | for (i = 0; i < NUM_JOB_SLOTS; i++) { |
685 | for (j = 0; j < ARRAY_SIZE(pfdev->jobs[0]) && pfdev->jobs[i][j]; j++) { |
686 | if (pfdev->jobs[i][j]->is_profiled) |
687 | panfrost_cycle_counter_put(pfdev: pfdev->jobs[i][j]->pfdev); |
688 | pm_runtime_put_noidle(dev: pfdev->dev); |
689 | panfrost_devfreq_record_idle(devfreq: &pfdev->pfdevfreq); |
690 | } |
691 | } |
692 | memset(pfdev->jobs, 0, sizeof(pfdev->jobs)); |
693 | spin_unlock(lock: &pfdev->js->job_lock); |
694 | |
695 | /* Proceed with reset now. */ |
696 | panfrost_device_reset(pfdev); |
697 | |
698 | /* panfrost_device_reset() unmasks job interrupts, but we want to |
699 | * keep them masked a bit longer. |
700 | */ |
701 | job_write(pfdev, JOB_INT_MASK, 0); |
702 | |
703 | /* GPU has been reset, we can clear the reset pending bit. */ |
704 | atomic_set(v: &pfdev->reset.pending, i: 0); |
705 | |
706 | /* Now resubmit jobs that were previously queued but didn't have a |
707 | * chance to finish. |
708 | * FIXME: We temporarily get out of the DMA fence signalling section |
709 | * while resubmitting jobs because the job submission logic will |
710 | * allocate memory with the GFP_KERNEL flag which can trigger memory |
711 | * reclaim and exposes a lock ordering issue. |
712 | */ |
713 | dma_fence_end_signalling(cookie); |
714 | for (i = 0; i < NUM_JOB_SLOTS; i++) |
715 | drm_sched_resubmit_jobs(sched: &pfdev->js->queue[i].sched); |
716 | cookie = dma_fence_begin_signalling(); |
717 | |
718 | /* Restart the schedulers */ |
719 | for (i = 0; i < NUM_JOB_SLOTS; i++) |
720 | drm_sched_start(sched: &pfdev->js->queue[i].sched, full_recovery: true); |
721 | |
722 | /* Re-enable job interrupts now that everything has been restarted. */ |
723 | job_write(pfdev, JOB_INT_MASK, |
724 | GENMASK(16 + NUM_JOB_SLOTS - 1, 16) | |
725 | GENMASK(NUM_JOB_SLOTS - 1, 0)); |
726 | |
727 | dma_fence_end_signalling(cookie); |
728 | } |
729 | |
730 | static enum drm_gpu_sched_stat panfrost_job_timedout(struct drm_sched_job |
731 | *sched_job) |
732 | { |
733 | struct panfrost_job *job = to_panfrost_job(sched_job); |
734 | struct panfrost_device *pfdev = job->pfdev; |
735 | int js = panfrost_job_get_slot(job); |
736 | |
737 | /* |
738 | * If the GPU managed to complete this jobs fence, the timeout is |
739 | * spurious. Bail out. |
740 | */ |
741 | if (dma_fence_is_signaled(fence: job->done_fence)) |
742 | return DRM_GPU_SCHED_STAT_NOMINAL; |
743 | |
744 | /* |
745 | * Panfrost IRQ handler may take a long time to process an interrupt |
746 | * if there is another IRQ handler hogging the processing. |
747 | * For example, the HDMI encoder driver might be stuck in the IRQ |
748 | * handler for a significant time in a case of bad cable connection. |
749 | * In order to catch such cases and not report spurious Panfrost |
750 | * job timeouts, synchronize the IRQ handler and re-check the fence |
751 | * status. |
752 | */ |
753 | synchronize_irq(irq: pfdev->js->irq); |
754 | |
755 | if (dma_fence_is_signaled(fence: job->done_fence)) { |
756 | dev_warn(pfdev->dev, "unexpectedly high interrupt latency\n" ); |
757 | return DRM_GPU_SCHED_STAT_NOMINAL; |
758 | } |
759 | |
760 | dev_err(pfdev->dev, "gpu sched timeout, js=%d, config=0x%x, status=0x%x, head=0x%x, tail=0x%x, sched_job=%p" , |
761 | js, |
762 | job_read(pfdev, JS_CONFIG(js)), |
763 | job_read(pfdev, JS_STATUS(js)), |
764 | job_read(pfdev, JS_HEAD_LO(js)), |
765 | job_read(pfdev, JS_TAIL_LO(js)), |
766 | sched_job); |
767 | |
768 | panfrost_core_dump(job); |
769 | |
770 | atomic_set(v: &pfdev->reset.pending, i: 1); |
771 | panfrost_reset(pfdev, bad: sched_job); |
772 | |
773 | return DRM_GPU_SCHED_STAT_NOMINAL; |
774 | } |
775 | |
776 | static void panfrost_reset_work(struct work_struct *work) |
777 | { |
778 | struct panfrost_device *pfdev; |
779 | |
780 | pfdev = container_of(work, struct panfrost_device, reset.work); |
781 | panfrost_reset(pfdev, NULL); |
782 | } |
783 | |
784 | static const struct drm_sched_backend_ops panfrost_sched_ops = { |
785 | .run_job = panfrost_job_run, |
786 | .timedout_job = panfrost_job_timedout, |
787 | .free_job = panfrost_job_free |
788 | }; |
789 | |
790 | static irqreturn_t panfrost_job_irq_handler_thread(int irq, void *data) |
791 | { |
792 | struct panfrost_device *pfdev = data; |
793 | |
794 | panfrost_job_handle_irqs(pfdev); |
795 | job_write(pfdev, JOB_INT_MASK, |
796 | GENMASK(16 + NUM_JOB_SLOTS - 1, 16) | |
797 | GENMASK(NUM_JOB_SLOTS - 1, 0)); |
798 | return IRQ_HANDLED; |
799 | } |
800 | |
801 | static irqreturn_t panfrost_job_irq_handler(int irq, void *data) |
802 | { |
803 | struct panfrost_device *pfdev = data; |
804 | u32 status = job_read(pfdev, JOB_INT_STAT); |
805 | |
806 | if (!status) |
807 | return IRQ_NONE; |
808 | |
809 | job_write(pfdev, JOB_INT_MASK, 0); |
810 | return IRQ_WAKE_THREAD; |
811 | } |
812 | |
813 | int panfrost_job_init(struct panfrost_device *pfdev) |
814 | { |
815 | struct panfrost_job_slot *js; |
816 | unsigned int nentries = 2; |
817 | int ret, j; |
818 | |
819 | /* All GPUs have two entries per queue, but without jobchain |
820 | * disambiguation stopping the right job in the close path is tricky, |
821 | * so let's just advertise one entry in that case. |
822 | */ |
823 | if (!panfrost_has_hw_feature(pfdev, feat: HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) |
824 | nentries = 1; |
825 | |
826 | pfdev->js = js = devm_kzalloc(dev: pfdev->dev, size: sizeof(*js), GFP_KERNEL); |
827 | if (!js) |
828 | return -ENOMEM; |
829 | |
830 | INIT_WORK(&pfdev->reset.work, panfrost_reset_work); |
831 | spin_lock_init(&js->job_lock); |
832 | |
833 | js->irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "job" ); |
834 | if (js->irq < 0) |
835 | return js->irq; |
836 | |
837 | ret = devm_request_threaded_irq(dev: pfdev->dev, irq: js->irq, |
838 | handler: panfrost_job_irq_handler, |
839 | thread_fn: panfrost_job_irq_handler_thread, |
840 | IRQF_SHARED, KBUILD_MODNAME "-job" , |
841 | dev_id: pfdev); |
842 | if (ret) { |
843 | dev_err(pfdev->dev, "failed to request job irq" ); |
844 | return ret; |
845 | } |
846 | |
847 | pfdev->reset.wq = alloc_ordered_workqueue("panfrost-reset" , 0); |
848 | if (!pfdev->reset.wq) |
849 | return -ENOMEM; |
850 | |
851 | for (j = 0; j < NUM_JOB_SLOTS; j++) { |
852 | js->queue[j].fence_context = dma_fence_context_alloc(num: 1); |
853 | |
854 | ret = drm_sched_init(sched: &js->queue[j].sched, |
855 | ops: &panfrost_sched_ops, |
856 | num_rqs: DRM_SCHED_PRIORITY_COUNT, |
857 | hw_submission: nentries, hang_limit: 0, |
858 | timeout: msecs_to_jiffies(JOB_TIMEOUT_MS), |
859 | timeout_wq: pfdev->reset.wq, |
860 | NULL, name: "pan_js" , dev: pfdev->dev); |
861 | if (ret) { |
862 | dev_err(pfdev->dev, "Failed to create scheduler: %d." , ret); |
863 | goto err_sched; |
864 | } |
865 | } |
866 | |
867 | panfrost_job_enable_interrupts(pfdev); |
868 | |
869 | return 0; |
870 | |
871 | err_sched: |
872 | for (j--; j >= 0; j--) |
873 | drm_sched_fini(sched: &js->queue[j].sched); |
874 | |
875 | destroy_workqueue(wq: pfdev->reset.wq); |
876 | return ret; |
877 | } |
878 | |
879 | void panfrost_job_fini(struct panfrost_device *pfdev) |
880 | { |
881 | struct panfrost_job_slot *js = pfdev->js; |
882 | int j; |
883 | |
884 | job_write(pfdev, JOB_INT_MASK, 0); |
885 | |
886 | for (j = 0; j < NUM_JOB_SLOTS; j++) { |
887 | drm_sched_fini(sched: &js->queue[j].sched); |
888 | } |
889 | |
890 | cancel_work_sync(work: &pfdev->reset.work); |
891 | destroy_workqueue(wq: pfdev->reset.wq); |
892 | } |
893 | |
894 | int panfrost_job_open(struct panfrost_file_priv *panfrost_priv) |
895 | { |
896 | struct panfrost_device *pfdev = panfrost_priv->pfdev; |
897 | struct panfrost_job_slot *js = pfdev->js; |
898 | struct drm_gpu_scheduler *sched; |
899 | int ret, i; |
900 | |
901 | for (i = 0; i < NUM_JOB_SLOTS; i++) { |
902 | sched = &js->queue[i].sched; |
903 | ret = drm_sched_entity_init(entity: &panfrost_priv->sched_entity[i], |
904 | priority: DRM_SCHED_PRIORITY_NORMAL, sched_list: &sched, |
905 | num_sched_list: 1, NULL); |
906 | if (WARN_ON(ret)) |
907 | return ret; |
908 | } |
909 | return 0; |
910 | } |
911 | |
912 | void panfrost_job_close(struct panfrost_file_priv *panfrost_priv) |
913 | { |
914 | struct panfrost_device *pfdev = panfrost_priv->pfdev; |
915 | int i; |
916 | |
917 | for (i = 0; i < NUM_JOB_SLOTS; i++) |
918 | drm_sched_entity_destroy(entity: &panfrost_priv->sched_entity[i]); |
919 | |
920 | /* Kill in-flight jobs */ |
921 | spin_lock(lock: &pfdev->js->job_lock); |
922 | for (i = 0; i < NUM_JOB_SLOTS; i++) { |
923 | struct drm_sched_entity *entity = &panfrost_priv->sched_entity[i]; |
924 | int j; |
925 | |
926 | for (j = ARRAY_SIZE(pfdev->jobs[0]) - 1; j >= 0; j--) { |
927 | struct panfrost_job *job = pfdev->jobs[i][j]; |
928 | u32 cmd; |
929 | |
930 | if (!job || job->base.entity != entity) |
931 | continue; |
932 | |
933 | if (j == 1) { |
934 | /* Try to cancel the job before it starts */ |
935 | job_write(pfdev, JS_COMMAND_NEXT(i), JS_COMMAND_NOP); |
936 | /* Reset the job head so it doesn't get restarted if |
937 | * the job in the first slot failed. |
938 | */ |
939 | job->jc = 0; |
940 | } |
941 | |
942 | if (panfrost_has_hw_feature(pfdev, feat: HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) { |
943 | cmd = panfrost_get_job_chain_flag(job) ? |
944 | JS_COMMAND_HARD_STOP_1 : |
945 | JS_COMMAND_HARD_STOP_0; |
946 | } else { |
947 | cmd = JS_COMMAND_HARD_STOP; |
948 | } |
949 | |
950 | job_write(pfdev, JS_COMMAND(i), cmd); |
951 | |
952 | /* Jobs can outlive their file context */ |
953 | job->engine_usage = NULL; |
954 | } |
955 | } |
956 | spin_unlock(lock: &pfdev->js->job_lock); |
957 | } |
958 | |
959 | int panfrost_job_is_idle(struct panfrost_device *pfdev) |
960 | { |
961 | struct panfrost_job_slot *js = pfdev->js; |
962 | int i; |
963 | |
964 | for (i = 0; i < NUM_JOB_SLOTS; i++) { |
965 | /* If there are any jobs in the HW queue, we're not idle */ |
966 | if (atomic_read(v: &js->queue[i].sched.hw_rq_count)) |
967 | return false; |
968 | } |
969 | |
970 | return true; |
971 | } |
972 | |