panfrost_job.c source code [linux/drivers/gpu/drm/panfrost/panfrost_job.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/ Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> /
3	/ Copyright 2019 Collabora ltd. /
4	#include <linux/delay.h>
5	#include <linux/interrupt.h>
6	#include <linux/io.h>
7	#include <linux/iopoll.h>
8	#include <linux/platform_device.h>
9	#include <linux/pm_runtime.h>
10	#include <linux/dma-resv.h>
11	#include <drm/gpu_scheduler.h>
12	#include <drm/panfrost_drm.h>
13
14	#include "panfrost_device.h"
15	#include "panfrost_devfreq.h"
16	#include "panfrost_job.h"
17	#include "panfrost_features.h"
18	#include "panfrost_issues.h"
19	#include "panfrost_gem.h"
20	#include "panfrost_regs.h"
21	#include "panfrost_gpu.h"
22	#include "panfrost_mmu.h"
23	#include "panfrost_dump.h"
24
25	#define JOB_TIMEOUT_MS 500
26
27	#define job_write(dev, reg, data) writel(data, dev->iomem + (reg))
28	#define job_read(dev, reg) readl(dev->iomem + (reg))
29
30	struct panfrost_queue_state {
31	struct drm_gpu_scheduler sched;
32	u64 fence_context;
33	u64 emit_seqno;
34	};
35
36	struct panfrost_job_slot {
37	struct panfrost_queue_state queue[NUM_JOB_SLOTS];
38	spinlock_t job_lock;
39	int irq;
40	};
41
42	static struct panfrost_job *
43	to_panfrost_job(struct drm_sched_job *sched_job)
44	{
45	return container_of(sched_job, struct panfrost_job, base);
46	}
47
48	struct panfrost_fence {
49	struct dma_fence base;
50	struct drm_device *dev;
51	/ panfrost seqno for signaled() test /
52	u64 seqno;
53	int queue;
54	};
55
56	static inline struct panfrost_fence *
57	to_panfrost_fence(struct dma_fence *fence)
58	{
59	return (struct panfrost_fence *)fence;
60	}
61
62	static const char panfrost_fence_get_driver_name(struct* dma_fence *fence)
63	{
64	return "panfrost";
65	}
66
67	static const char panfrost_fence_get_timeline_name(struct* dma_fence *fence)
68	{
69	struct panfrost_fence *f = to_panfrost_fence(fence);
70
71	switch (f->queue) {
72	case `0`:
73	return "panfrost-js-0";
74	case `1`:
75	return "panfrost-js-1";
76	case `2`:
77	return "panfrost-js-2";
78	default:
79	return NULL;
80	}
81	}
82
83	static const struct dma_fence_ops panfrost_fence_ops = {
84	.get_driver_name = panfrost_fence_get_driver_name,
85	.get_timeline_name = panfrost_fence_get_timeline_name,
86	};
87
88	static struct dma_fence panfrost_fence_create(struct* panfrost_device pfdev, int* js_num)
89	{
90	struct panfrost_fence *fence;
91	struct panfrost_job_slot *js = pfdev->js;
92
93	fence = kzalloc(size: sizeof(*fence), GFP_KERNEL);
94	if (!fence)
95	return ERR_PTR(error: -ENOMEM);
96
97	fence->dev = pfdev->ddev;
98	fence->queue = js_num;
99	fence->seqno = ++js->queue[js_num].emit_seqno;
100	dma_fence_init(fence: &fence->base, ops: &panfrost_fence_ops, lock: &js->job_lock,
101	context: js->queue[js_num].fence_context, seqno: fence->seqno);
102
103	return &fence->base;
104	}
105
106	int panfrost_job_get_slot(struct panfrost_job *job)
107	{
108	/ JS0: fragment jobs.*
109	* JS1: vertex/tiler jobs
110	* JS2: compute jobs
111	*/
112	if (job->requirements & PANFROST_JD_REQ_FS)
113	return `0`;
114
115	/ Not exposed to userspace yet /
116	#if 0
117	if (job->requirements & PANFROST_JD_REQ_ONLY_COMPUTE) {
118	if ((job->requirements & PANFROST_JD_REQ_CORE_GRP_MASK) &&
119	(job->pfdev->features.nr_core_groups == `2`))
120	return `2`;
121	if (panfrost_has_hw_issue(job->pfdev, HW_ISSUE_8987))
122	return `2`;
123	}
124	#endif
125	return `1`;
126	}
127
128	static void panfrost_job_write_affinity(struct panfrost_device *pfdev,
129	u32 requirements,
130	int js)
131	{
132	u64 affinity;
133
134	/*
135	* Use all cores for now.
136	* Eventually we may need to support tiler only jobs and h/w with
137	* multiple (2) coherent core groups
138	*/
139	affinity = pfdev->features.shader_present;
140
141	job_write(pfdev, JS_AFFINITY_NEXT_LO(js), lower_32_bits(affinity));
142	job_write(pfdev, JS_AFFINITY_NEXT_HI(js), upper_32_bits(affinity));
143	}
144
145	static u32
146	panfrost_get_job_chain_flag(const struct panfrost_job *job)
147	{
148	struct panfrost_fence *f = to_panfrost_fence(fence: job->done_fence);
149
150	if (!panfrost_has_hw_feature(pfdev: job->pfdev, feat: HW_FEATURE_JOBCHAIN_DISAMBIGUATION))
151	return `0`;
152
153	return (f->seqno & `1`) ? JS_CONFIG_JOB_CHAIN_FLAG : `0`;
154	}
155
156	static struct panfrost_job *
157	panfrost_dequeue_job(struct panfrost_device pfdev, int* slot)
158	{
159	struct panfrost_job *job = pfdev->jobs[slot][`0`];
160
161	WARN_ON(!job);
162	if (job->is_profiled) {
163	if (job->engine_usage) {
164	job->engine_usage->elapsed_ns[slot] +=
165	ktime_to_ns(ktime_sub(ktime_get(), job->start_time));
166	job->engine_usage->cycles[slot] +=
167	panfrost_cycle_counter_read(pfdev) - job->start_cycles;
168	}
169	panfrost_cycle_counter_put(pfdev: job->pfdev);
170	}
171
172	pfdev->jobs[slot][`0`] = pfdev->jobs[slot][`1`];
173	pfdev->jobs[slot][`1`] = NULL;
174
175	return job;
176	}
177
178	static unsigned int
179	panfrost_enqueue_job(struct panfrost_device pfdev, int* slot,
180	struct panfrost_job *job)
181	{
182	if (WARN_ON(!job))
183	return `0`;
184
185	if (!pfdev->jobs[slot][`0`]) {
186	pfdev->jobs[slot][`0`] = job;
187	return `0`;
188	}
189
190	WARN_ON(pfdev->jobs[slot][`1`]);
191	pfdev->jobs[slot][`1`] = job;
192	WARN_ON(panfrost_get_job_chain_flag(job) ==
193	panfrost_get_job_chain_flag(pfdev->jobs[slot][`0`]));
194	return `1`;
195	}
196
197	static void panfrost_job_hw_submit(struct panfrost_job job, int* js)
198	{
199	struct panfrost_device *pfdev = job->pfdev;
200	unsigned int subslot;
201	u32 cfg;
202	u64 jc_head = job->jc;
203	int ret;
204
205	panfrost_devfreq_record_busy(devfreq: &pfdev->pfdevfreq);
206
207	ret = pm_runtime_get_sync(dev: pfdev->dev);
208	if (ret < `0`)
209	return;
210
211	if (WARN_ON(job_read(pfdev, JS_COMMAND_NEXT(js)))) {
212	return;
213	}
214
215	cfg = panfrost_mmu_as_get(pfdev, mmu: job->mmu);
216
217	job_write(pfdev, JS_HEAD_NEXT_LO(js), lower_32_bits(jc_head));
218	job_write(pfdev, JS_HEAD_NEXT_HI(js), upper_32_bits(jc_head));
219
220	panfrost_job_write_affinity(pfdev, requirements: job->requirements, js);
221
222	/ start MMU, medium priority, cache clean/flush on end, clean/flush on*
223	* start */
224	cfg \|= JS_CONFIG_THREAD_PRI(`8`) \|
225	JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE \|
226	JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE \|
227	panfrost_get_job_chain_flag(job);
228
229	if (panfrost_has_hw_feature(pfdev, feat: HW_FEATURE_FLUSH_REDUCTION))
230	cfg \|= JS_CONFIG_ENABLE_FLUSH_REDUCTION;
231
232	if (panfrost_has_hw_issue(pfdev, issue: HW_ISSUE_10649))
233	cfg \|= JS_CONFIG_START_MMU;
234
235	job_write(pfdev, JS_CONFIG_NEXT(js), cfg);
236
237	if (panfrost_has_hw_feature(pfdev, feat: HW_FEATURE_FLUSH_REDUCTION))
238	job_write(pfdev, JS_FLUSH_ID_NEXT(js), job->flush_id);
239
240	/ GO ! /
241
242	spin_lock(lock: &pfdev->js->job_lock);
243	subslot = panfrost_enqueue_job(pfdev, slot: js, job);
244	/ Don't queue the job if a reset is in progress /
245	if (!atomic_read(v: &pfdev->reset.pending)) {
246	if (atomic_read(v: &pfdev->profile_mode)) {
247	panfrost_cycle_counter_get(pfdev);
248	job->is_profiled = true;
249	job->start_time = ktime_get();
250	job->start_cycles = panfrost_cycle_counter_read(pfdev);
251	}
252
253	job_write(pfdev, JS_COMMAND_NEXT(js), JS_COMMAND_START);
254	dev_dbg(pfdev->dev,
255	"JS: Submitting atom %p to js[%d][%d] with head=0x%llx AS %d",
256	job, js, subslot, jc_head, cfg & `0xf`);
257	}
258	spin_unlock(lock: &pfdev->js->job_lock);
259	}
260
261	static int panfrost_acquire_object_fences(struct drm_gem_object **bos,
262	int bo_count,
263	struct drm_sched_job *job)
264	{
265	int i, ret;
266
267	for (i = `0`; i < bo_count; i++) {
268	ret = dma_resv_reserve_fences(obj: bos[i]->resv, num_fences: `1`);
269	if (ret)
270	return ret;
271
272	/ panfrost always uses write mode in its current uapi /
273	ret = drm_sched_job_add_implicit_dependencies(job, obj: bos[i],
274	write: true);
275	if (ret)
276	return ret;
277	}
278
279	return `0`;
280	}
281
282	static void panfrost_attach_object_fences(struct drm_gem_object **bos,
283	int bo_count,
284	struct dma_fence *fence)
285	{
286	int i;
287
288	for (i = `0`; i < bo_count; i++)
289	dma_resv_add_fence(obj: bos[i]->resv, fence, usage: DMA_RESV_USAGE_WRITE);
290	}
291
292	int panfrost_job_push(struct panfrost_job *job)
293	{
294	struct panfrost_device *pfdev = job->pfdev;
295	struct ww_acquire_ctx acquire_ctx;
296	int ret = `0`;
297
298	ret = drm_gem_lock_reservations(objs: job->bos, count: job->bo_count,
299	acquire_ctx: &acquire_ctx);
300	if (ret)
301	return ret;
302
303	mutex_lock(&pfdev->sched_lock);
304	drm_sched_job_arm(job: &job->base);
305
306	job->render_done_fence = dma_fence_get(fence: &job->base.s_fence->finished);
307
308	ret = panfrost_acquire_object_fences(bos: job->bos, bo_count: job->bo_count,
309	job: &job->base);
310	if (ret) {
311	mutex_unlock(lock: &pfdev->sched_lock);
312	goto unlock;
313	}
314
315	kref_get(kref: &job->refcount); / put by scheduler job completion /
316
317	drm_sched_entity_push_job(sched_job: &job->base);
318
319	mutex_unlock(lock: &pfdev->sched_lock);
320
321	panfrost_attach_object_fences(bos: job->bos, bo_count: job->bo_count,
322	fence: job->render_done_fence);
323
324	unlock:
325	drm_gem_unlock_reservations(objs: job->bos, count: job->bo_count, acquire_ctx: &acquire_ctx);
326
327	return ret;
328	}
329
330	static void panfrost_job_cleanup(struct kref *ref)
331	{
332	struct panfrost_job job = container_of(ref, struct* panfrost_job,
333	refcount);
334	unsigned int i;
335
336	dma_fence_put(fence: job->done_fence);
337	dma_fence_put(fence: job->render_done_fence);
338
339	if (job->mappings) {
340	for (i = `0`; i < job->bo_count; i++) {
341	if (!job->mappings[i])
342	break;
343
344	atomic_dec(v: &job->mappings[i]->obj->gpu_usecount);
345	panfrost_gem_mapping_put(mapping: job->mappings[i]);
346	}
347	kvfree(addr: job->mappings);
348	}
349
350	if (job->bos) {
351	for (i = `0`; i < job->bo_count; i++)
352	drm_gem_object_put(obj: job->bos[i]);
353
354	kvfree(addr: job->bos);
355	}
356
357	kfree(objp: job);
358	}
359
360	void panfrost_job_put(struct panfrost_job *job)
361	{
362	kref_put(kref: &job->refcount, release: panfrost_job_cleanup);
363	}
364
365	static void panfrost_job_free(struct drm_sched_job *sched_job)
366	{
367	struct panfrost_job *job = to_panfrost_job(sched_job);
368
369	drm_sched_job_cleanup(job: sched_job);
370
371	panfrost_job_put(job);
372	}
373
374	static struct dma_fence panfrost_job_run(struct* drm_sched_job *sched_job)
375	{
376	struct panfrost_job *job = to_panfrost_job(sched_job);
377	struct panfrost_device *pfdev = job->pfdev;
378	int slot = panfrost_job_get_slot(job);
379	struct dma_fence *fence = NULL;
380
381	if (unlikely(job->base.s_fence->finished.error))
382	return NULL;
383
384	/ Nothing to execute: can happen if the job has finished while*
385	* we were resetting the GPU.
386	*/
387	if (!job->jc)
388	return NULL;
389
390	fence = panfrost_fence_create(pfdev, js_num: slot);
391	if (IS_ERR(ptr: fence))
392	return fence;
393
394	if (job->done_fence)
395	dma_fence_put(fence: job->done_fence);
396	job->done_fence = dma_fence_get(fence);
397
398	panfrost_job_hw_submit(job, js: slot);
399
400	return fence;
401	}
402
403	void panfrost_job_enable_interrupts(struct panfrost_device *pfdev)
404	{
405	int j;
406	u32 irq_mask = `0`;
407
408	for (j = `0`; j < NUM_JOB_SLOTS; j++) {
409	irq_mask \|= MK_JS_MASK(j);
410	}
411
412	job_write(pfdev, JOB_INT_CLEAR, irq_mask);
413	job_write(pfdev, JOB_INT_MASK, irq_mask);
414	}
415
416	static void panfrost_job_handle_err(struct panfrost_device *pfdev,
417	struct panfrost_job *job,
418	unsigned int js)
419	{
420	u32 js_status = job_read(pfdev, JS_STATUS(js));
421	const char *exception_name = panfrost_exception_name(exception_code: js_status);
422	bool signal_fence = true;
423
424	if (!panfrost_exception_is_fault(exception_code: js_status)) {
425	dev_dbg(pfdev->dev, "js event, js=%d, status=%s, head=0x%x, tail=0x%x",
426	js, exception_name,
427	job_read(pfdev, JS_HEAD_LO(js)),
428	job_read(pfdev, JS_TAIL_LO(js)));
429	} else {
430	dev_err(pfdev->dev, "js fault, js=%d, status=%s, head=0x%x, tail=0x%x",
431	js, exception_name,
432	job_read(pfdev, JS_HEAD_LO(js)),
433	job_read(pfdev, JS_TAIL_LO(js)));
434	}
435
436	if (js_status == DRM_PANFROST_EXCEPTION_STOPPED) {
437	/ Update the job head so we can resume /
438	job->jc = job_read(pfdev, JS_TAIL_LO(js)) \|
439	((u64)job_read(pfdev, JS_TAIL_HI(js)) << `32`);
440
441	/ The job will be resumed, don't signal the fence /
442	signal_fence = false;
443	} else if (js_status == DRM_PANFROST_EXCEPTION_TERMINATED) {
444	/ Job has been hard-stopped, flag it as canceled /
445	dma_fence_set_error(fence: job->done_fence, error: -ECANCELED);
446	job->jc = `0`;
447	} else if (panfrost_exception_is_fault(exception_code: js_status)) {
448	/ We might want to provide finer-grained error code based on*
449	* the exception type, but unconditionally setting to EINVAL
450	* is good enough for now.
451	*/
452	dma_fence_set_error(fence: job->done_fence, error: -EINVAL);
453	job->jc = `0`;
454	}
455
456	panfrost_mmu_as_put(pfdev, mmu: job->mmu);
457	panfrost_devfreq_record_idle(devfreq: &pfdev->pfdevfreq);
458
459	if (signal_fence)
460	dma_fence_signal_locked(fence: job->done_fence);
461
462	pm_runtime_put_autosuspend(dev: pfdev->dev);
463
464	if (panfrost_exception_needs_reset(pfdev, exception_code: js_status)) {
465	atomic_set(v: &pfdev->reset.pending, i: `1`);
466	drm_sched_fault(sched: &pfdev->js->queue[js].sched);
467	}
468	}
469
470	static void panfrost_job_handle_done(struct panfrost_device *pfdev,
471	struct panfrost_job *job)
472	{
473	/ Set ->jc to 0 to avoid re-submitting an already finished job (can*
474	* happen when we receive the DONE interrupt while doing a GPU reset).
475	*/
476	job->jc = `0`;
477	panfrost_mmu_as_put(pfdev, mmu: job->mmu);
478	panfrost_devfreq_record_idle(devfreq: &pfdev->pfdevfreq);
479
480	dma_fence_signal_locked(fence: job->done_fence);
481	pm_runtime_put_autosuspend(dev: pfdev->dev);
482	}
483
484	static void panfrost_job_handle_irq(struct panfrost_device *pfdev, u32 status)
485	{
486	struct panfrost_job *done[NUM_JOB_SLOTS][`2`] = {};
487	struct panfrost_job *failed[NUM_JOB_SLOTS] = {};
488	u32 js_state = `0`, js_events = `0`;
489	unsigned int i, j;
490
491	/ First we collect all failed/done jobs. /
492	while (status) {
493	u32 js_state_mask = `0`;
494
495	for (j = `0`; j < NUM_JOB_SLOTS; j++) {
496	if (status & MK_JS_MASK(j))
497	js_state_mask \|= MK_JS_MASK(j);
498
499	if (status & JOB_INT_MASK_DONE(j)) {
500	if (done[j][`0`])
501	done[j][`1`] = panfrost_dequeue_job(pfdev, slot: j);
502	else
503	done[j][`0`] = panfrost_dequeue_job(pfdev, slot: j);
504	}
505
506	if (status & JOB_INT_MASK_ERR(j)) {
507	/ Cancel the next submission. Will be submitted*
508	* after we're done handling this failure if
509	* there's no reset pending.
510	*/
511	job_write(pfdev, JS_COMMAND_NEXT(j), JS_COMMAND_NOP);
512	failed[j] = panfrost_dequeue_job(pfdev, slot: j);
513	}
514	}
515
516	/ JS_STATE is sampled when JOB_INT_CLEAR is written.*
517	* For each BIT(slot) or BIT(slot + 16) bit written to
518	* JOB_INT_CLEAR, the corresponding bits in JS_STATE
519	* (BIT(slot) and BIT(slot + 16)) are updated, but this
520	* is racy. If we only have one job done at the time we
521	* read JOB_INT_RAWSTAT but the second job fails before we
522	* clear the status, we end up with a status containing
523	* only the DONE bit and consider both jobs as DONE since
524	* JS_STATE reports both NEXT and CURRENT as inactive.
525	* To prevent that, let's repeat this clear+read steps
526	* until status is 0.
527	*/
528	job_write(pfdev, JOB_INT_CLEAR, status);
529	js_state &= ~js_state_mask;
530	js_state \|= job_read(pfdev, JOB_INT_JS_STATE) & js_state_mask;
531	js_events \|= status;
532	status = job_read(pfdev, JOB_INT_RAWSTAT);
533	}
534
535	/ Then we handle the dequeued jobs. /
536	for (j = `0`; j < NUM_JOB_SLOTS; j++) {
537	if (!(js_events & MK_JS_MASK(j)))
538	continue;
539
540	if (failed[j]) {
541	panfrost_job_handle_err(pfdev, job: failed[j], js: j);
542	} else if (pfdev->jobs[j][`0`] && !(js_state & MK_JS_MASK(j))) {
543	/ When the current job doesn't fail, the JM dequeues*
544	* the next job without waiting for an ACK, this means
545	* we can have 2 jobs dequeued and only catch the
546	* interrupt when the second one is done. If both slots
547	* are inactive, but one job remains in pfdev->jobs[j],
548	* consider it done. Of course that doesn't apply if a
549	* failure happened since we cancelled execution of the
550	* job in _NEXT (see above).
551	*/
552	if (WARN_ON(!done[j][`0`]))
553	done[j][`0`] = panfrost_dequeue_job(pfdev, slot: j);
554	else
555	done[j][`1`] = panfrost_dequeue_job(pfdev, slot: j);
556	}
557
558	for (i = `0`; i < ARRAY_SIZE(done[`0`]) && done[j][i]; i++)
559	panfrost_job_handle_done(pfdev, job: done[j][i]);
560	}
561
562	/ And finally we requeue jobs that were waiting in the second slot*
563	* and have been stopped if we detected a failure on the first slot.
564	*/
565	for (j = `0`; j < NUM_JOB_SLOTS; j++) {
566	if (!(js_events & MK_JS_MASK(j)))
567	continue;
568
569	if (!failed[j] \|\| !pfdev->jobs[j][`0`])
570	continue;
571
572	if (pfdev->jobs[j][`0`]->jc == `0`) {
573	/ The job was cancelled, signal the fence now /
574	struct panfrost_job *canceled = panfrost_dequeue_job(pfdev, slot: j);
575
576	dma_fence_set_error(fence: canceled->done_fence, error: -ECANCELED);
577	panfrost_job_handle_done(pfdev, job: canceled);
578	} else if (!atomic_read(v: &pfdev->reset.pending)) {
579	/ Requeue the job we removed if no reset is pending /
580	job_write(pfdev, JS_COMMAND_NEXT(j), JS_COMMAND_START);
581	}
582	}
583	}
584
585	static void panfrost_job_handle_irqs(struct panfrost_device *pfdev)
586	{
587	u32 status = job_read(pfdev, JOB_INT_RAWSTAT);
588
589	while (status) {
590	pm_runtime_mark_last_busy(dev: pfdev->dev);
591
592	spin_lock(lock: &pfdev->js->job_lock);
593	panfrost_job_handle_irq(pfdev, status);
594	spin_unlock(lock: &pfdev->js->job_lock);
595	status = job_read(pfdev, JOB_INT_RAWSTAT);
596	}
597	}
598
599	static u32 panfrost_active_slots(struct panfrost_device *pfdev,
600	u32 *js_state_mask, u32 js_state)
601	{
602	u32 rawstat;
603
604	if (!(js_state & *js_state_mask))
605	return `0`;
606
607	rawstat = job_read(pfdev, JOB_INT_RAWSTAT);
608	if (rawstat) {
609	unsigned int i;
610
611	for (i = `0`; i < NUM_JOB_SLOTS; i++) {
612	if (rawstat & MK_JS_MASK(i))
613	*js_state_mask &= ~MK_JS_MASK(i);
614	}
615	}
616
617	return js_state & *js_state_mask;
618	}
619
620	static void
621	panfrost_reset(struct panfrost_device *pfdev,
622	struct drm_sched_job *bad)
623	{
624	u32 js_state, js_state_mask = `0xffffffff`;
625	unsigned int i, j;
626	bool cookie;
627	int ret;
628
629	if (!atomic_read(v: &pfdev->reset.pending))
630	return;
631
632	/ Stop the schedulers.*
633	*
634	* FIXME: We temporarily get out of the dma_fence_signalling section
635	* because the cleanup path generate lockdep splats when taking locks
636	* to release job resources. We should rework the code to follow this
637	* pattern:
638	*
639	* try_lock
640	* if (locked)
641	* release
642	* else
643	* schedule_work_to_release_later
644	*/
645	for (i = `0`; i < NUM_JOB_SLOTS; i++)
646	drm_sched_stop(sched: &pfdev->js->queue[i].sched, bad);
647
648	cookie = dma_fence_begin_signalling();
649
650	if (bad)
651	drm_sched_increase_karma(bad);
652
653	/ Mask job interrupts and synchronize to make sure we won't be*
654	* interrupted during our reset.
655	*/
656	job_write(pfdev, JOB_INT_MASK, `0`);
657	synchronize_irq(irq: pfdev->js->irq);
658
659	for (i = `0`; i < NUM_JOB_SLOTS; i++) {
660	/ Cancel the next job and soft-stop the running job. /
661	job_write(pfdev, JS_COMMAND_NEXT(i), JS_COMMAND_NOP);
662	job_write(pfdev, JS_COMMAND(i), JS_COMMAND_SOFT_STOP);
663	}
664
665	/ Wait at most 10ms for soft-stops to complete /
666	ret = readl_poll_timeout(pfdev->iomem + JOB_INT_JS_STATE, js_state,
667	!panfrost_active_slots(pfdev, &js_state_mask, js_state),
668	`10`, `10000`);
669
670	if (ret)
671	dev_err(pfdev->dev, "Soft-stop failed\n");
672
673	/ Handle the remaining interrupts before we reset. /
674	panfrost_job_handle_irqs(pfdev);
675
676	/ Remaining interrupts have been handled, but we might still have*
677	* stuck jobs. Let's make sure the PM counters stay balanced by
678	* manually calling pm_runtime_put_noidle() and
679	* panfrost_devfreq_record_idle() for each stuck job.
680	* Let's also make sure the cycle counting register's refcnt is
681	* kept balanced to prevent it from running forever
682	*/
683	spin_lock(lock: &pfdev->js->job_lock);
684	for (i = `0`; i < NUM_JOB_SLOTS; i++) {
685	for (j = `0`; j < ARRAY_SIZE(pfdev->jobs[`0`]) && pfdev->jobs[i][j]; j++) {
686	if (pfdev->jobs[i][j]->is_profiled)
687	panfrost_cycle_counter_put(pfdev: pfdev->jobs[i][j]->pfdev);
688	pm_runtime_put_noidle(dev: pfdev->dev);
689	panfrost_devfreq_record_idle(devfreq: &pfdev->pfdevfreq);
690	}
691	}
692	memset(pfdev->jobs, `0`, sizeof(pfdev->jobs));
693	spin_unlock(lock: &pfdev->js->job_lock);
694
695	/ Proceed with reset now. /
696	panfrost_device_reset(pfdev);
697
698	/ panfrost_device_reset() unmasks job interrupts, but we want to*
699	* keep them masked a bit longer.
700	*/
701	job_write(pfdev, JOB_INT_MASK, `0`);
702
703	/ GPU has been reset, we can clear the reset pending bit. /
704	atomic_set(v: &pfdev->reset.pending, i: `0`);
705
706	/ Now resubmit jobs that were previously queued but didn't have a*
707	* chance to finish.
708	* FIXME: We temporarily get out of the DMA fence signalling section
709	* while resubmitting jobs because the job submission logic will
710	* allocate memory with the GFP_KERNEL flag which can trigger memory
711	* reclaim and exposes a lock ordering issue.
712	*/
713	dma_fence_end_signalling(cookie);
714	for (i = `0`; i < NUM_JOB_SLOTS; i++)
715	drm_sched_resubmit_jobs(sched: &pfdev->js->queue[i].sched);
716	cookie = dma_fence_begin_signalling();
717
718	/ Restart the schedulers /
719	for (i = `0`; i < NUM_JOB_SLOTS; i++)
720	drm_sched_start(sched: &pfdev->js->queue[i].sched, full_recovery: true);
721
722	/ Re-enable job interrupts now that everything has been restarted. /
723	job_write(pfdev, JOB_INT_MASK,
724	GENMASK(`16` + NUM_JOB_SLOTS - `1`, `16`) \|
725	GENMASK(NUM_JOB_SLOTS - `1`, `0`));
726
727	dma_fence_end_signalling(cookie);
728	}
729
730	static enum drm_gpu_sched_stat panfrost_job_timedout(struct drm_sched_job
731	*sched_job)
732	{
733	struct panfrost_job *job = to_panfrost_job(sched_job);
734	struct panfrost_device *pfdev = job->pfdev;
735	int js = panfrost_job_get_slot(job);
736
737	/*
738	* If the GPU managed to complete this jobs fence, the timeout is
739	* spurious. Bail out.
740	*/
741	if (dma_fence_is_signaled(fence: job->done_fence))
742	return DRM_GPU_SCHED_STAT_NOMINAL;
743
744	/*
745	* Panfrost IRQ handler may take a long time to process an interrupt
746	* if there is another IRQ handler hogging the processing.
747	* For example, the HDMI encoder driver might be stuck in the IRQ
748	* handler for a significant time in a case of bad cable connection.
749	* In order to catch such cases and not report spurious Panfrost
750	* job timeouts, synchronize the IRQ handler and re-check the fence
751	* status.
752	*/
753	synchronize_irq(irq: pfdev->js->irq);
754
755	if (dma_fence_is_signaled(fence: job->done_fence)) {
756	dev_warn(pfdev->dev, "unexpectedly high interrupt latency\n");
757	return DRM_GPU_SCHED_STAT_NOMINAL;
758	}
759
760	dev_err(pfdev->dev, "gpu sched timeout, js=%d, config=0x%x, status=0x%x, head=0x%x, tail=0x%x, sched_job=%p",
761	js,
762	job_read(pfdev, JS_CONFIG(js)),
763	job_read(pfdev, JS_STATUS(js)),
764	job_read(pfdev, JS_HEAD_LO(js)),
765	job_read(pfdev, JS_TAIL_LO(js)),
766	sched_job);
767
768	panfrost_core_dump(job);
769
770	atomic_set(v: &pfdev->reset.pending, i: `1`);
771	panfrost_reset(pfdev, bad: sched_job);
772
773	return DRM_GPU_SCHED_STAT_NOMINAL;
774	}
775
776	static void panfrost_reset_work(struct work_struct *work)
777	{
778	struct panfrost_device *pfdev;
779
780	pfdev = container_of(work, struct panfrost_device, reset.work);
781	panfrost_reset(pfdev, NULL);
782	}
783
784	static const struct drm_sched_backend_ops panfrost_sched_ops = {
785	.run_job = panfrost_job_run,
786	.timedout_job = panfrost_job_timedout,
787	.free_job = panfrost_job_free
788	};
789
790	static irqreturn_t panfrost_job_irq_handler_thread(int irq, void *data)
791	{
792	struct panfrost_device *pfdev = data;
793
794	panfrost_job_handle_irqs(pfdev);
795	job_write(pfdev, JOB_INT_MASK,
796	GENMASK(`16` + NUM_JOB_SLOTS - `1`, `16`) \|
797	GENMASK(NUM_JOB_SLOTS - `1`, `0`));
798	return IRQ_HANDLED;
799	}
800
801	static irqreturn_t panfrost_job_irq_handler(int irq, void *data)
802	{
803	struct panfrost_device *pfdev = data;
804	u32 status = job_read(pfdev, JOB_INT_STAT);
805
806	if (!status)
807	return IRQ_NONE;
808
809	job_write(pfdev, JOB_INT_MASK, `0`);
810	return IRQ_WAKE_THREAD;
811	}
812
813	int panfrost_job_init(struct panfrost_device *pfdev)
814	{
815	struct panfrost_job_slot *js;
816	unsigned int nentries = `2`;
817	int ret, j;
818
819	/ All GPUs have two entries per queue, but without jobchain*
820	* disambiguation stopping the right job in the close path is tricky,
821	* so let's just advertise one entry in that case.
822	*/
823	if (!panfrost_has_hw_feature(pfdev, feat: HW_FEATURE_JOBCHAIN_DISAMBIGUATION))
824	nentries = `1`;
825
826	pfdev->js = js = devm_kzalloc(dev: pfdev->dev, size: sizeof(*js), GFP_KERNEL);
827	if (!js)
828	return -ENOMEM;
829
830	INIT_WORK(&pfdev->reset.work, panfrost_reset_work);
831	spin_lock_init(&js->job_lock);
832
833	js->irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "job");
834	if (js->irq < `0`)
835	return js->irq;
836
837	ret = devm_request_threaded_irq(dev: pfdev->dev, irq: js->irq,
838	handler: panfrost_job_irq_handler,
839	thread_fn: panfrost_job_irq_handler_thread,
840	IRQF_SHARED, KBUILD_MODNAME "-job",
841	dev_id: pfdev);
842	if (ret) {
843	dev_err(pfdev->dev, "failed to request job irq");
844	return ret;
845	}
846
847	pfdev->reset.wq = alloc_ordered_workqueue("panfrost-reset", `0`);
848	if (!pfdev->reset.wq)
849	return -ENOMEM;
850
851	for (j = `0`; j < NUM_JOB_SLOTS; j++) {
852	js->queue[j].fence_context = dma_fence_context_alloc(num: `1`);
853
854	ret = drm_sched_init(sched: &js->queue[j].sched,
855	ops: &panfrost_sched_ops,
856	num_rqs: DRM_SCHED_PRIORITY_COUNT,
857	hw_submission: nentries, hang_limit: `0`,
858	timeout: msecs_to_jiffies(JOB_TIMEOUT_MS),
859	timeout_wq: pfdev->reset.wq,
860	NULL, name: "pan_js", dev: pfdev->dev);
861	if (ret) {
862	dev_err(pfdev->dev, "Failed to create scheduler: %d.", ret);
863	goto err_sched;
864	}
865	}
866
867	panfrost_job_enable_interrupts(pfdev);
868
869	return `0`;
870
871	err_sched:
872	for (j--; j >= `0`; j--)
873	drm_sched_fini(sched: &js->queue[j].sched);
874
875	destroy_workqueue(wq: pfdev->reset.wq);
876	return ret;
877	}
878
879	void panfrost_job_fini(struct panfrost_device *pfdev)
880	{
881	struct panfrost_job_slot *js = pfdev->js;
882	int j;
883
884	job_write(pfdev, JOB_INT_MASK, `0`);
885
886	for (j = `0`; j < NUM_JOB_SLOTS; j++) {
887	drm_sched_fini(sched: &js->queue[j].sched);
888	}
889
890	cancel_work_sync(work: &pfdev->reset.work);
891	destroy_workqueue(wq: pfdev->reset.wq);
892	}
893
894	int panfrost_job_open(struct panfrost_file_priv *panfrost_priv)
895	{
896	struct panfrost_device *pfdev = panfrost_priv->pfdev;
897	struct panfrost_job_slot *js = pfdev->js;
898	struct drm_gpu_scheduler *sched;
899	int ret, i;
900
901	for (i = `0`; i < NUM_JOB_SLOTS; i++) {
902	sched = &js->queue[i].sched;
903	ret = drm_sched_entity_init(entity: &panfrost_priv->sched_entity[i],
904	priority: DRM_SCHED_PRIORITY_NORMAL, sched_list: &sched,
905	num_sched_list: `1`, NULL);
906	if (WARN_ON(ret))
907	return ret;
908	}
909	return `0`;
910	}
911
912	void panfrost_job_close(struct panfrost_file_priv *panfrost_priv)
913	{
914	struct panfrost_device *pfdev = panfrost_priv->pfdev;
915	int i;
916
917	for (i = `0`; i < NUM_JOB_SLOTS; i++)
918	drm_sched_entity_destroy(entity: &panfrost_priv->sched_entity[i]);
919
920	/ Kill in-flight jobs /
921	spin_lock(lock: &pfdev->js->job_lock);
922	for (i = `0`; i < NUM_JOB_SLOTS; i++) {
923	struct drm_sched_entity *entity = &panfrost_priv->sched_entity[i];
924	int j;
925
926	for (j = ARRAY_SIZE(pfdev->jobs[`0`]) - `1`; j >= `0`; j--) {
927	struct panfrost_job *job = pfdev->jobs[i][j];
928	u32 cmd;
929
930	if (!job \|\| job->base.entity != entity)
931	continue;
932
933	if (j == `1`) {
934	/ Try to cancel the job before it starts /
935	job_write(pfdev, JS_COMMAND_NEXT(i), JS_COMMAND_NOP);
936	/ Reset the job head so it doesn't get restarted if*
937	* the job in the first slot failed.
938	*/
939	job->jc = `0`;
940	}
941
942	if (panfrost_has_hw_feature(pfdev, feat: HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
943	cmd = panfrost_get_job_chain_flag(job) ?
944	JS_COMMAND_HARD_STOP_1 :
945	JS_COMMAND_HARD_STOP_0;
946	} else {
947	cmd = JS_COMMAND_HARD_STOP;
948	}
949
950	job_write(pfdev, JS_COMMAND(i), cmd);
951
952	/ Jobs can outlive their file context /
953	job->engine_usage = NULL;
954	}
955	}
956	spin_unlock(lock: &pfdev->js->job_lock);
957	}
958
959	int panfrost_job_is_idle(struct panfrost_device *pfdev)
960	{
961	struct panfrost_job_slot *js = pfdev->js;
962	int i;
963
964	for (i = `0`; i < NUM_JOB_SLOTS; i++) {
965	/ If there are any jobs in the HW queue, we're not idle /
966	if (atomic_read(v: &js->queue[i].sched.hw_rq_count))
967	return false;
968	}
969
970	return true;
971	}
972

source code of linux/drivers/gpu/drm/panfrost/panfrost_job.c