nouveau_exec.c source code [linux/drivers/gpu/drm/nouveau/nouveau_exec.c]

1	// SPDX-License-Identifier: MIT
2
3	#include <drm/drm_exec.h>
4
5	#include "nouveau_drv.h"
6	#include "nouveau_gem.h"
7	#include "nouveau_mem.h"
8	#include "nouveau_dma.h"
9	#include "nouveau_exec.h"
10	#include "nouveau_abi16.h"
11	#include "nouveau_chan.h"
12	#include "nouveau_sched.h"
13	#include "nouveau_uvmm.h"
14
15	/**
16	* DOC: Overview
17	*
18	* Nouveau's VM_BIND / EXEC UAPI consists of three ioctls: DRM_NOUVEAU_VM_INIT,
19	* DRM_NOUVEAU_VM_BIND and DRM_NOUVEAU_EXEC.
20	*
21	* In order to use the UAPI firstly a user client must initialize the VA space
22	* using the DRM_NOUVEAU_VM_INIT ioctl specifying which region of the VA space
23	* should be managed by the kernel and which by the UMD.
24	*
25	* The DRM_NOUVEAU_VM_BIND ioctl provides clients an interface to manage the
26	* userspace-managable portion of the VA space. It provides operations to map
27	* and unmap memory. Mappings may be flagged as sparse. Sparse mappings are not
28	* backed by a GEM object and the kernel will ignore GEM handles provided
29	* alongside a sparse mapping.
30	*
31	* Userspace may request memory backed mappings either within or outside of the
32	* bounds (but not crossing those bounds) of a previously mapped sparse
33	* mapping. Subsequently requested memory backed mappings within a sparse
34	* mapping will take precedence over the corresponding range of the sparse
35	* mapping. If such memory backed mappings are unmapped the kernel will make
36	* sure that the corresponding sparse mapping will take their place again.
37	* Requests to unmap a sparse mapping that still contains memory backed mappings
38	* will result in those memory backed mappings being unmapped first.
39	*
40	* Unmap requests are not bound to the range of existing mappings and can even
41	* overlap the bounds of sparse mappings. For such a request the kernel will
42	* make sure to unmap all memory backed mappings within the given range,
43	* splitting up memory backed mappings which are only partially contained
44	* within the given range. Unmap requests with the sparse flag set must match
45	* the range of a previously mapped sparse mapping exactly though.
46	*
47	* While the kernel generally permits arbitrary sequences and ranges of memory
48	* backed mappings being mapped and unmapped, either within a single or multiple
49	* VM_BIND ioctl calls, there are some restrictions for sparse mappings.
50	*
51	* The kernel does not permit to:
52	* - unmap non-existent sparse mappings
53	* - unmap a sparse mapping and map a new sparse mapping overlapping the range
54	* of the previously unmapped sparse mapping within the same VM_BIND ioctl
55	* - unmap a sparse mapping and map new memory backed mappings overlapping the
56	* range of the previously unmapped sparse mapping within the same VM_BIND
57	* ioctl
58	*
59	* When using the VM_BIND ioctl to request the kernel to map memory to a given
60	* virtual address in the GPU's VA space there is no guarantee that the actual
61	* mappings are created in the GPU's MMU. If the given memory is swapped out
62	* at the time the bind operation is executed the kernel will stash the mapping
63	* details into it's internal alloctor and create the actual MMU mappings once
64	* the memory is swapped back in. While this is transparent for userspace, it is
65	* guaranteed that all the backing memory is swapped back in and all the memory
66	* mappings, as requested by userspace previously, are actually mapped once the
67	* DRM_NOUVEAU_EXEC ioctl is called to submit an exec job.
68	*
69	* A VM_BIND job can be executed either synchronously or asynchronously. If
70	* exectued asynchronously, userspace may provide a list of syncobjs this job
71	* will wait for and/or a list of syncobj the kernel will signal once the
72	* VM_BIND job finished execution. If executed synchronously the ioctl will
73	* block until the bind job is finished. For synchronous jobs the kernel will
74	* not permit any syncobjs submitted to the kernel.
75	*
76	* To execute a push buffer the UAPI provides the DRM_NOUVEAU_EXEC ioctl. EXEC
77	* jobs are always executed asynchronously, and, equal to VM_BIND jobs, provide
78	* the option to synchronize them with syncobjs.
79	*
80	* Besides that, EXEC jobs can be scheduled for a specified channel to execute on.
81	*
82	* Since VM_BIND jobs update the GPU's VA space on job submit, EXEC jobs do have
83	* an up to date view of the VA space. However, the actual mappings might still
84	* be pending. Hence, EXEC jobs require to have the particular fences - of
85	* the corresponding VM_BIND jobs they depent on - attached to them.
86	*/
87
88	static int
89	nouveau_exec_job_submit(struct nouveau_job *job)
90	{
91	struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
92	struct nouveau_cli *cli = job->cli;
93	struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(cli);
94	struct drm_exec *exec = &job->exec;
95	struct drm_gem_object *obj;
96	unsigned long index;
97	int ret;
98
99	/ Create a new fence, but do not emit yet. /
100	ret = nouveau_fence_create(&exec_job->fence, exec_job->chan);
101	if (ret)
102	return ret;
103
104	nouveau_uvmm_lock(uvmm);
105	drm_exec_init(exec, DRM_EXEC_INTERRUPTIBLE_WAIT \|
106	DRM_EXEC_IGNORE_DUPLICATES);
107	drm_exec_until_all_locked(exec) {
108	struct drm_gpuva *va;
109
110	drm_gpuvm_for_each_va(va, &uvmm->base) {
111	if (unlikely(va == &uvmm->base.kernel_alloc_node))
112	continue;
113
114	ret = drm_exec_prepare_obj(exec, obj: va->gem.obj, num_fences: `1`);
115	drm_exec_retry_on_contention(exec);
116	if (ret)
117	goto err_uvmm_unlock;
118	}
119	}
120	nouveau_uvmm_unlock(uvmm);
121
122	drm_exec_for_each_locked_object(exec, index, obj) {
123	struct nouveau_bo *nvbo = nouveau_gem_object(gem: obj);
124
125	ret = nouveau_bo_validate(nvbo, interruptible: true, no_wait_gpu: false);
126	if (ret)
127	goto err_exec_fini;
128	}
129
130	return `0`;
131
132	err_uvmm_unlock:
133	nouveau_uvmm_unlock(uvmm);
134	err_exec_fini:
135	drm_exec_fini(exec);
136	return ret;
137
138	}
139
140	static void
141	nouveau_exec_job_armed_submit(struct nouveau_job *job)
142	{
143	struct drm_exec *exec = &job->exec;
144	struct drm_gem_object *obj;
145	unsigned long index;
146
147	drm_exec_for_each_locked_object(exec, index, obj)
148	dma_resv_add_fence(obj: obj->resv, fence: job->done_fence, usage: job->resv_usage);
149
150	drm_exec_fini(exec);
151	}
152
153	static struct dma_fence *
154	nouveau_exec_job_run(struct nouveau_job *job)
155	{
156	struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
157	struct nouveau_channel *chan = exec_job->chan;
158	struct nouveau_fence *fence = exec_job->fence;
159	int i, ret;
160
161	ret = nouveau_dma_wait(chan, slots: exec_job->push.count + `1`, size: `16`);
162	if (ret) {
163	NV_PRINTK(err, job->cli, "nv50cal_space: %d\n", ret);
164	return ERR_PTR(error: ret);
165	}
166
167	for (i = `0`; i < exec_job->push.count; i++) {
168	struct drm_nouveau_exec_push *p = &exec_job->push.s[i];
169	bool no_prefetch = p->flags & DRM_NOUVEAU_EXEC_PUSH_NO_PREFETCH;
170
171	nv50_dma_push(chan, addr: p->va, length: p->va_len, no_prefetch);
172	}
173
174	ret = nouveau_fence_emit(fence);
175	if (ret) {
176	nouveau_fence_unref(&exec_job->fence);
177	NV_PRINTK(err, job->cli, "error fencing pushbuf: %d\n", ret);
178	WIND_RING(chan);
179	return ERR_PTR(error: ret);
180	}
181
182	/ The fence was emitted successfully, set the job's fence pointer to*
183	* NULL in order to avoid freeing it up when the job is cleaned up.
184	*/
185	exec_job->fence = NULL;
186
187	return &fence->base;
188	}
189
190	static void
191	nouveau_exec_job_free(struct nouveau_job *job)
192	{
193	struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
194
195	nouveau_job_free(job);
196
197	kfree(objp: exec_job->fence);
198	kfree(objp: exec_job->push.s);
199	kfree(objp: exec_job);
200	}
201
202	static enum drm_gpu_sched_stat
203	nouveau_exec_job_timeout(struct nouveau_job *job)
204	{
205	struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
206	struct nouveau_channel *chan = exec_job->chan;
207
208	if (unlikely(!atomic_read(&chan->killed)))
209	nouveau_channel_kill(chan);
210
211	NV_PRINTK(warn, job->cli, "job timeout, channel %d killed!\n",
212	chan->chid);
213
214	nouveau_sched_entity_fini(entity: job->entity);
215
216	return DRM_GPU_SCHED_STAT_NOMINAL;
217	}
218
219	static struct nouveau_job_ops nouveau_exec_job_ops = {
220	.submit = nouveau_exec_job_submit,
221	.armed_submit = nouveau_exec_job_armed_submit,
222	.run = nouveau_exec_job_run,
223	.free = nouveau_exec_job_free,
224	.timeout = nouveau_exec_job_timeout,
225	};
226
227	int
228	nouveau_exec_job_init(struct nouveau_exec_job **pjob,
229	struct nouveau_exec_job_args *__args)
230	{
231	struct nouveau_exec_job *job;
232	struct nouveau_job_args args = {};
233	int i, ret;
234
235	for (i = `0`; i < __args->push.count; i++) {
236	struct drm_nouveau_exec_push *p = &__args->push.s[i];
237
238	if (unlikely(p->va_len > NV50_DMA_PUSH_MAX_LENGTH)) {
239	NV_PRINTK(err, nouveau_cli(__args->file_priv),
240	"pushbuf size exceeds limit: 0x%x max 0x%x\n",
241	p->va_len, NV50_DMA_PUSH_MAX_LENGTH);
242	return -EINVAL;
243	}
244	}
245
246	job = pjob = kzalloc(size: sizeof(job), GFP_KERNEL);
247	if (!job)
248	return -ENOMEM;
249
250	job->push.count = __args->push.count;
251	if (__args->push.count) {
252	job->push.s = kmemdup(p: __args->push.s,
253	size: sizeof(__args->push.s)
254	__args->push.count,
255	GFP_KERNEL);
256	if (!job->push.s) {
257	ret = -ENOMEM;
258	goto err_free_job;
259	}
260	}
261
262	job->chan = __args->chan;
263
264	args.sched_entity = __args->sched_entity;
265	args.file_priv = __args->file_priv;
266
267	args.in_sync.count = __args->in_sync.count;
268	args.in_sync.s = __args->in_sync.s;
269
270	args.out_sync.count = __args->out_sync.count;
271	args.out_sync.s = __args->out_sync.s;
272
273	args.ops = &nouveau_exec_job_ops;
274	args.resv_usage = DMA_RESV_USAGE_WRITE;
275
276	ret = nouveau_job_init(job: &job->base, args: &args);
277	if (ret)
278	goto err_free_pushs;
279
280	return `0`;
281
282	err_free_pushs:
283	kfree(objp: job->push.s);
284	err_free_job:
285	kfree(objp: job);
286	*pjob = NULL;
287
288	return ret;
289	}
290
291	static int
292	nouveau_exec(struct nouveau_exec_job_args *args)
293	{
294	struct nouveau_exec_job *job;
295	int ret;
296
297	ret = nouveau_exec_job_init(pjob: &job, args: args);
298	if (ret)
299	return ret;
300
301	ret = nouveau_job_submit(job: &job->base);
302	if (ret)
303	goto err_job_fini;
304
305	return `0`;
306
307	err_job_fini:
308	nouveau_job_fini(job: &job->base);
309	return ret;
310	}
311
312	static int
313	nouveau_exec_ucopy(struct nouveau_exec_job_args *args,
314	struct drm_nouveau_exec *req)
315	{
316	struct drm_nouveau_sync **s;
317	u32 inc = req->wait_count;
318	u64 ins = req->wait_ptr;
319	u32 outc = req->sig_count;
320	u64 outs = req->sig_ptr;
321	u32 pushc = req->push_count;
322	u64 pushs = req->push_ptr;
323	int ret;
324
325	if (pushc) {
326	args->push.count = pushc;
327	args->push.s = u_memcpya(user: pushs, nmemb: pushc, size: sizeof(*args->push.s));
328	if (IS_ERR(ptr: args->push.s))
329	return PTR_ERR(ptr: args->push.s);
330	}
331
332	if (inc) {
333	s = &args->in_sync.s;
334
335	args->in_sync.count = inc;
336	s = u_memcpya(user: ins, nmemb: inc, size: sizeof(*s));
337	if (IS_ERR(ptr: *s)) {
338	ret = PTR_ERR(ptr: *s);
339	goto err_free_pushs;
340	}
341	}
342
343	if (outc) {
344	s = &args->out_sync.s;
345
346	args->out_sync.count = outc;
347	s = u_memcpya(user: outs, nmemb: outc, size: sizeof(*s));
348	if (IS_ERR(ptr: *s)) {
349	ret = PTR_ERR(ptr: *s);
350	goto err_free_ins;
351	}
352	}
353
354	return `0`;
355
356	err_free_pushs:
357	u_free(addr: args->push.s);
358	err_free_ins:
359	u_free(addr: args->in_sync.s);
360	return ret;
361	}
362
363	static void
364	nouveau_exec_ufree(struct nouveau_exec_job_args *args)
365	{
366	u_free(addr: args->push.s);
367	u_free(addr: args->in_sync.s);
368	u_free(addr: args->out_sync.s);
369	}
370
371	int
372	nouveau_exec_ioctl_exec(struct drm_device *dev,
373	void *data,
374	struct drm_file *file_priv)
375	{
376	struct nouveau_abi16 *abi16 = nouveau_abi16_get(file_priv);
377	struct nouveau_cli *cli = nouveau_cli(fpriv: file_priv);
378	struct nouveau_abi16_chan *chan16;
379	struct nouveau_channel *chan = NULL;
380	struct nouveau_exec_job_args args = {};
381	struct drm_nouveau_exec *req = data;
382	int push_max, ret = `0`;
383
384	if (unlikely(!abi16))
385	return -ENOMEM;
386
387	/ abi16 locks already /
388	if (unlikely(!nouveau_cli_uvmm(cli)))
389	return nouveau_abi16_put(abi16, -ENOSYS);
390
391	list_for_each_entry(chan16, &abi16->channels, head) {
392	if (chan16->chan->chid == req->channel) {
393	chan = chan16->chan;
394	break;
395	}
396	}
397
398	if (!chan)
399	return nouveau_abi16_put(abi16, -ENOENT);
400
401	if (unlikely(atomic_read(&chan->killed)))
402	return nouveau_abi16_put(abi16, -ENODEV);
403
404	if (!chan->dma.ib_max)
405	return nouveau_abi16_put(abi16, -ENOSYS);
406
407	push_max = nouveau_exec_push_max_from_ib_max(ib_max: chan->dma.ib_max);
408	if (unlikely(req->push_count > push_max)) {
409	NV_PRINTK(err, cli, "pushbuf push count exceeds limit: %d max %d\n",
410	req->push_count, push_max);
411	return nouveau_abi16_put(abi16, -EINVAL);
412	}
413
414	ret = nouveau_exec_ucopy(args: &args, req);
415	if (ret)
416	goto out;
417
418	args.sched_entity = &chan16->sched_entity;
419	args.file_priv = file_priv;
420	args.chan = chan;
421
422	ret = nouveau_exec(args: &args);
423	if (ret)
424	goto out_free_args;
425
426	out_free_args:
427	nouveau_exec_ufree(args: &args);
428	out:
429	return nouveau_abi16_put(abi16, ret);
430	}
431

source code of linux/drivers/gpu/drm/nouveau/nouveau_exec.c