nouveau_sched.c source code [linux/drivers/gpu/drm/nouveau/nouveau_sched.c]

1	// SPDX-License-Identifier: MIT
2
3	#include <linux/slab.h>
4	#include <drm/gpu_scheduler.h>
5	#include <drm/drm_syncobj.h>
6
7	#include "nouveau_drv.h"
8	#include "nouveau_gem.h"
9	#include "nouveau_mem.h"
10	#include "nouveau_dma.h"
11	#include "nouveau_exec.h"
12	#include "nouveau_abi16.h"
13	#include "nouveau_sched.h"
14
15	/ FIXME*
16	*
17	* We want to make sure that jobs currently executing can't be deferred by
18	* other jobs competing for the hardware. Otherwise we might end up with job
19	* timeouts just because of too many clients submitting too many jobs. We don't
20	* want jobs to time out because of system load, but because of the job being
21	* too bulky.
22	*
23	* For now allow for up to 16 concurrent jobs in flight until we know how many
24	* rings the hardware can process in parallel.
25	*/
26	#define NOUVEAU_SCHED_HW_SUBMISSIONS 16
27	#define NOUVEAU_SCHED_JOB_TIMEOUT_MS 10000
28
29	int
30	nouveau_job_init(struct nouveau_job *job,
31	struct nouveau_job_args *args)
32	{
33	struct nouveau_sched_entity *entity = args->sched_entity;
34	int ret;
35
36	job->file_priv = args->file_priv;
37	job->cli = nouveau_cli(fpriv: args->file_priv);
38	job->entity = entity;
39
40	job->sync = args->sync;
41	job->resv_usage = args->resv_usage;
42
43	job->ops = args->ops;
44
45	job->in_sync.count = args->in_sync.count;
46	if (job->in_sync.count) {
47	if (job->sync)
48	return -EINVAL;
49
50	job->in_sync.data = kmemdup(p: args->in_sync.s,
51	size: sizeof(args->in_sync.s)
52	args->in_sync.count,
53	GFP_KERNEL);
54	if (!job->in_sync.data)
55	return -ENOMEM;
56	}
57
58	job->out_sync.count = args->out_sync.count;
59	if (job->out_sync.count) {
60	if (job->sync) {
61	ret = -EINVAL;
62	goto err_free_in_sync;
63	}
64
65	job->out_sync.data = kmemdup(p: args->out_sync.s,
66	size: sizeof(args->out_sync.s)
67	args->out_sync.count,
68	GFP_KERNEL);
69	if (!job->out_sync.data) {
70	ret = -ENOMEM;
71	goto err_free_in_sync;
72	}
73
74	job->out_sync.objs = kcalloc(n: job->out_sync.count,
75	size: sizeof(*job->out_sync.objs),
76	GFP_KERNEL);
77	if (!job->out_sync.objs) {
78	ret = -ENOMEM;
79	goto err_free_out_sync;
80	}
81
82	job->out_sync.chains = kcalloc(n: job->out_sync.count,
83	size: sizeof(*job->out_sync.chains),
84	GFP_KERNEL);
85	if (!job->out_sync.chains) {
86	ret = -ENOMEM;
87	goto err_free_objs;
88	}
89
90	}
91
92	ret = drm_sched_job_init(job: &job->base, entity: &entity->base, NULL);
93	if (ret)
94	goto err_free_chains;
95
96	job->state = NOUVEAU_JOB_INITIALIZED;
97
98	return `0`;
99
100	err_free_chains:
101	kfree(objp: job->out_sync.chains);
102	err_free_objs:
103	kfree(objp: job->out_sync.objs);
104	err_free_out_sync:
105	kfree(objp: job->out_sync.data);
106	err_free_in_sync:
107	kfree(objp: job->in_sync.data);
108	return ret;
109	}
110
111	void
112	nouveau_job_free(struct nouveau_job *job)
113	{
114	kfree(objp: job->in_sync.data);
115	kfree(objp: job->out_sync.data);
116	kfree(objp: job->out_sync.objs);
117	kfree(objp: job->out_sync.chains);
118	}
119
120	void nouveau_job_fini(struct nouveau_job *job)
121	{
122	dma_fence_put(fence: job->done_fence);
123	drm_sched_job_cleanup(job: &job->base);
124	job->ops->free(job);
125	}
126
127	static int
128	sync_find_fence(struct nouveau_job *job,
129	struct drm_nouveau_sync *sync,
130	struct dma_fence **fence)
131	{
132	u32 stype = sync->flags & DRM_NOUVEAU_SYNC_TYPE_MASK;
133	u64 point = `0`;
134	int ret;
135
136	if (stype != DRM_NOUVEAU_SYNC_SYNCOBJ &&
137	stype != DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ)
138	return -EOPNOTSUPP;
139
140	if (stype == DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ)
141	point = sync->timeline_value;
142
143	ret = drm_syncobj_find_fence(file_private: job->file_priv,
144	handle: sync->handle, point,
145	flags: `0` / flags /, fence);
146	if (ret)
147	return ret;
148
149	return `0`;
150	}
151
152	static int
153	nouveau_job_add_deps(struct nouveau_job *job)
154	{
155	struct dma_fence *in_fence = NULL;
156	int ret, i;
157
158	for (i = `0`; i < job->in_sync.count; i++) {
159	struct drm_nouveau_sync *sync = &job->in_sync.data[i];
160
161	ret = sync_find_fence(job, sync, fence: &in_fence);
162	if (ret) {
163	NV_PRINTK(warn, job->cli,
164	"Failed to find syncobj (-> in): handle=%d\n",
165	sync->handle);
166	return ret;
167	}
168
169	ret = drm_sched_job_add_dependency(job: &job->base, fence: in_fence);
170	if (ret)
171	return ret;
172	}
173
174	return `0`;
175	}
176
177	static void
178	nouveau_job_fence_attach_cleanup(struct nouveau_job *job)
179	{
180	int i;
181
182	for (i = `0`; i < job->out_sync.count; i++) {
183	struct drm_syncobj *obj = job->out_sync.objs[i];
184	struct dma_fence_chain *chain = job->out_sync.chains[i];
185
186	if (obj)
187	drm_syncobj_put(obj);
188
189	if (chain)
190	dma_fence_chain_free(chain);
191	}
192	}
193
194	static int
195	nouveau_job_fence_attach_prepare(struct nouveau_job *job)
196	{
197	int i, ret;
198
199	for (i = `0`; i < job->out_sync.count; i++) {
200	struct drm_nouveau_sync *sync = &job->out_sync.data[i];
201	struct drm_syncobj **pobj = &job->out_sync.objs[i];
202	struct dma_fence_chain **pchain = &job->out_sync.chains[i];
203	u32 stype = sync->flags & DRM_NOUVEAU_SYNC_TYPE_MASK;
204
205	if (stype != DRM_NOUVEAU_SYNC_SYNCOBJ &&
206	stype != DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ) {
207	ret = -EINVAL;
208	goto err_sync_cleanup;
209	}
210
211	*pobj = drm_syncobj_find(file_private: job->file_priv, handle: sync->handle);
212	if (!*pobj) {
213	NV_PRINTK(warn, job->cli,
214	"Failed to find syncobj (-> out): handle=%d\n",
215	sync->handle);
216	ret = -ENOENT;
217	goto err_sync_cleanup;
218	}
219
220	if (stype == DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ) {
221	*pchain = dma_fence_chain_alloc();
222	if (!*pchain) {
223	ret = -ENOMEM;
224	goto err_sync_cleanup;
225	}
226	}
227	}
228
229	return `0`;
230
231	err_sync_cleanup:
232	nouveau_job_fence_attach_cleanup(job);
233	return ret;
234	}
235
236	static void
237	nouveau_job_fence_attach(struct nouveau_job *job)
238	{
239	struct dma_fence *fence = job->done_fence;
240	int i;
241
242	for (i = `0`; i < job->out_sync.count; i++) {
243	struct drm_nouveau_sync *sync = &job->out_sync.data[i];
244	struct drm_syncobj **pobj = &job->out_sync.objs[i];
245	struct dma_fence_chain **pchain = &job->out_sync.chains[i];
246	u32 stype = sync->flags & DRM_NOUVEAU_SYNC_TYPE_MASK;
247
248	if (stype == DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ) {
249	drm_syncobj_add_point(syncobj: pobj, chain: pchain, fence,
250	point: sync->timeline_value);
251	} else {
252	drm_syncobj_replace_fence(syncobj: *pobj, fence);
253	}
254
255	drm_syncobj_put(obj: *pobj);
256	*pobj = NULL;
257	*pchain = NULL;
258	}
259	}
260
261	int
262	nouveau_job_submit(struct nouveau_job *job)
263	{
264	struct nouveau_sched_entity *entity = to_nouveau_sched_entity(job->base.entity);
265	struct dma_fence *done_fence = NULL;
266	int ret;
267
268	ret = nouveau_job_add_deps(job);
269	if (ret)
270	goto err;
271
272	ret = nouveau_job_fence_attach_prepare(job);
273	if (ret)
274	goto err;
275
276	/ Make sure the job appears on the sched_entity's queue in the same*
277	* order as it was submitted.
278	*/
279	mutex_lock(&entity->mutex);
280
281	/ Guarantee we won't fail after the submit() callback returned*
282	* successfully.
283	*/
284	if (job->ops->submit) {
285	ret = job->ops->submit(job);
286	if (ret)
287	goto err_cleanup;
288	}
289
290	drm_sched_job_arm(job: &job->base);
291	job->done_fence = dma_fence_get(fence: &job->base.s_fence->finished);
292	if (job->sync)
293	done_fence = dma_fence_get(fence: job->done_fence);
294
295	/ If a sched job depends on a dma-fence from a job from the same GPU*
296	* scheduler instance, but a different scheduler entity, the GPU
297	* scheduler does only wait for the particular job to be scheduled,
298	* rather than for the job to fully complete. This is due to the GPU
299	* scheduler assuming that there is a scheduler instance per ring.
300	* However, the current implementation, in order to avoid arbitrary
301	* amounts of kthreads, has a single scheduler instance while scheduler
302	* entities represent rings.
303	*
304	* As a workaround, set the DRM_SCHED_FENCE_DONT_PIPELINE for all
305	* out-fences in order to force the scheduler to wait for full job
306	* completion for dependent jobs from different entities and same
307	* scheduler instance.
308	*
309	* There is some work in progress [1] to address the issues of firmware
310	* schedulers; once it is in-tree the scheduler topology in Nouveau
311	* should be re-worked accordingly.
312	*
313	* [1] https://lore.kernel.org/dri-devel/20230801205103.627779-1-matthew.brost@intel.com/
314	*/
315	set_bit(DRM_SCHED_FENCE_DONT_PIPELINE, addr: &job->done_fence->flags);
316
317	if (job->ops->armed_submit)
318	job->ops->armed_submit(job);
319
320	nouveau_job_fence_attach(job);
321
322	/ Set job state before pushing the job to the scheduler,*
323	* such that we do not overwrite the job state set in run().
324	*/
325	job->state = NOUVEAU_JOB_SUBMIT_SUCCESS;
326
327	drm_sched_entity_push_job(sched_job: &job->base);
328
329	mutex_unlock(lock: &entity->mutex);
330
331	if (done_fence) {
332	dma_fence_wait(fence: done_fence, intr: true);
333	dma_fence_put(fence: done_fence);
334	}
335
336	return `0`;
337
338	err_cleanup:
339	mutex_unlock(lock: &entity->mutex);
340	nouveau_job_fence_attach_cleanup(job);
341	err:
342	job->state = NOUVEAU_JOB_SUBMIT_FAILED;
343	return ret;
344	}
345
346	bool
347	nouveau_sched_entity_qwork(struct nouveau_sched_entity *entity,
348	struct work_struct *work)
349	{
350	return queue_work(wq: entity->sched_wq, work);
351	}
352
353	static struct dma_fence *
354	nouveau_job_run(struct nouveau_job *job)
355	{
356	struct dma_fence *fence;
357
358	fence = job->ops->run(job);
359	if (IS_ERR(ptr: fence))
360	job->state = NOUVEAU_JOB_RUN_FAILED;
361	else
362	job->state = NOUVEAU_JOB_RUN_SUCCESS;
363
364	return fence;
365	}
366
367	static struct dma_fence *
368	nouveau_sched_run_job(struct drm_sched_job *sched_job)
369	{
370	struct nouveau_job *job = to_nouveau_job(sched_job);
371
372	return nouveau_job_run(job);
373	}
374
375	static enum drm_gpu_sched_stat
376	nouveau_sched_timedout_job(struct drm_sched_job *sched_job)
377	{
378	struct drm_gpu_scheduler *sched = sched_job->sched;
379	struct nouveau_job *job = to_nouveau_job(sched_job);
380	enum drm_gpu_sched_stat stat = DRM_GPU_SCHED_STAT_NOMINAL;
381
382	drm_sched_stop(sched, bad: sched_job);
383
384	if (job->ops->timeout)
385	stat = job->ops->timeout(job);
386	else
387	NV_PRINTK(warn, job->cli, "Generic job timeout.\n");
388
389	drm_sched_start(sched, full_recovery: true);
390
391	return stat;
392	}
393
394	static void
395	nouveau_sched_free_job(struct drm_sched_job *sched_job)
396	{
397	struct nouveau_job *job = to_nouveau_job(sched_job);
398
399	nouveau_job_fini(job);
400	}
401
402	int nouveau_sched_entity_init(struct nouveau_sched_entity *entity,
403	struct drm_gpu_scheduler *sched,
404	struct workqueue_struct *sched_wq)
405	{
406	mutex_init(&entity->mutex);
407	spin_lock_init(&entity->job.list.lock);
408	INIT_LIST_HEAD(list: &entity->job.list.head);
409	init_waitqueue_head(&entity->job.wq);
410
411	entity->sched_wq = sched_wq;
412	return drm_sched_entity_init(entity: &entity->base,
413	priority: DRM_SCHED_PRIORITY_NORMAL,
414	sched_list: &sched, num_sched_list: `1`, NULL);
415	}
416
417	void
418	nouveau_sched_entity_fini(struct nouveau_sched_entity *entity)
419	{
420	drm_sched_entity_destroy(entity: &entity->base);
421	}
422
423	static const struct drm_sched_backend_ops nouveau_sched_ops = {
424	.run_job = nouveau_sched_run_job,
425	.timedout_job = nouveau_sched_timedout_job,
426	.free_job = nouveau_sched_free_job,
427	};
428
429	int nouveau_sched_init(struct nouveau_drm *drm)
430	{
431	struct drm_gpu_scheduler *sched = &drm->sched;
432	long job_hang_limit = msecs_to_jiffies(NOUVEAU_SCHED_JOB_TIMEOUT_MS);
433
434	drm->sched_wq = create_singlethread_workqueue("nouveau_sched_wq");
435	if (!drm->sched_wq)
436	return -ENOMEM;
437
438	return drm_sched_init(sched, ops: &nouveau_sched_ops,
439	num_rqs: DRM_SCHED_PRIORITY_COUNT,
440	NOUVEAU_SCHED_HW_SUBMISSIONS, hang_limit: `0`, timeout: job_hang_limit,
441	NULL, NULL, name: "nouveau_sched", dev: drm->dev->dev);
442	}
443
444	void nouveau_sched_fini(struct nouveau_drm *drm)
445	{
446	destroy_workqueue(wq: drm->sched_wq);
447	drm_sched_fini(sched: &drm->sched);
448	}
449

source code of linux/drivers/gpu/drm/nouveau/nouveau_sched.c