1// SPDX-License-Identifier: MIT
2
3#include <drm/drm_exec.h>
4
5#include "nouveau_drv.h"
6#include "nouveau_gem.h"
7#include "nouveau_mem.h"
8#include "nouveau_dma.h"
9#include "nouveau_exec.h"
10#include "nouveau_abi16.h"
11#include "nouveau_chan.h"
12#include "nouveau_sched.h"
13#include "nouveau_uvmm.h"
14
15/**
16 * DOC: Overview
17 *
18 * Nouveau's VM_BIND / EXEC UAPI consists of three ioctls: DRM_NOUVEAU_VM_INIT,
19 * DRM_NOUVEAU_VM_BIND and DRM_NOUVEAU_EXEC.
20 *
21 * In order to use the UAPI firstly a user client must initialize the VA space
22 * using the DRM_NOUVEAU_VM_INIT ioctl specifying which region of the VA space
23 * should be managed by the kernel and which by the UMD.
24 *
25 * The DRM_NOUVEAU_VM_BIND ioctl provides clients an interface to manage the
26 * userspace-managable portion of the VA space. It provides operations to map
27 * and unmap memory. Mappings may be flagged as sparse. Sparse mappings are not
28 * backed by a GEM object and the kernel will ignore GEM handles provided
29 * alongside a sparse mapping.
30 *
31 * Userspace may request memory backed mappings either within or outside of the
32 * bounds (but not crossing those bounds) of a previously mapped sparse
33 * mapping. Subsequently requested memory backed mappings within a sparse
34 * mapping will take precedence over the corresponding range of the sparse
35 * mapping. If such memory backed mappings are unmapped the kernel will make
36 * sure that the corresponding sparse mapping will take their place again.
37 * Requests to unmap a sparse mapping that still contains memory backed mappings
38 * will result in those memory backed mappings being unmapped first.
39 *
40 * Unmap requests are not bound to the range of existing mappings and can even
41 * overlap the bounds of sparse mappings. For such a request the kernel will
42 * make sure to unmap all memory backed mappings within the given range,
43 * splitting up memory backed mappings which are only partially contained
44 * within the given range. Unmap requests with the sparse flag set must match
45 * the range of a previously mapped sparse mapping exactly though.
46 *
47 * While the kernel generally permits arbitrary sequences and ranges of memory
48 * backed mappings being mapped and unmapped, either within a single or multiple
49 * VM_BIND ioctl calls, there are some restrictions for sparse mappings.
50 *
51 * The kernel does not permit to:
52 * - unmap non-existent sparse mappings
53 * - unmap a sparse mapping and map a new sparse mapping overlapping the range
54 * of the previously unmapped sparse mapping within the same VM_BIND ioctl
55 * - unmap a sparse mapping and map new memory backed mappings overlapping the
56 * range of the previously unmapped sparse mapping within the same VM_BIND
57 * ioctl
58 *
59 * When using the VM_BIND ioctl to request the kernel to map memory to a given
60 * virtual address in the GPU's VA space there is no guarantee that the actual
61 * mappings are created in the GPU's MMU. If the given memory is swapped out
62 * at the time the bind operation is executed the kernel will stash the mapping
63 * details into it's internal alloctor and create the actual MMU mappings once
64 * the memory is swapped back in. While this is transparent for userspace, it is
65 * guaranteed that all the backing memory is swapped back in and all the memory
66 * mappings, as requested by userspace previously, are actually mapped once the
67 * DRM_NOUVEAU_EXEC ioctl is called to submit an exec job.
68 *
69 * A VM_BIND job can be executed either synchronously or asynchronously. If
70 * exectued asynchronously, userspace may provide a list of syncobjs this job
71 * will wait for and/or a list of syncobj the kernel will signal once the
72 * VM_BIND job finished execution. If executed synchronously the ioctl will
73 * block until the bind job is finished. For synchronous jobs the kernel will
74 * not permit any syncobjs submitted to the kernel.
75 *
76 * To execute a push buffer the UAPI provides the DRM_NOUVEAU_EXEC ioctl. EXEC
77 * jobs are always executed asynchronously, and, equal to VM_BIND jobs, provide
78 * the option to synchronize them with syncobjs.
79 *
80 * Besides that, EXEC jobs can be scheduled for a specified channel to execute on.
81 *
82 * Since VM_BIND jobs update the GPU's VA space on job submit, EXEC jobs do have
83 * an up to date view of the VA space. However, the actual mappings might still
84 * be pending. Hence, EXEC jobs require to have the particular fences - of
85 * the corresponding VM_BIND jobs they depent on - attached to them.
86 */
87
88static int
89nouveau_exec_job_submit(struct nouveau_job *job)
90{
91 struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
92 struct nouveau_cli *cli = job->cli;
93 struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(cli);
94 struct drm_exec *exec = &job->exec;
95 struct drm_gem_object *obj;
96 unsigned long index;
97 int ret;
98
99 /* Create a new fence, but do not emit yet. */
100 ret = nouveau_fence_create(&exec_job->fence, exec_job->chan);
101 if (ret)
102 return ret;
103
104 nouveau_uvmm_lock(uvmm);
105 drm_exec_init(exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
106 DRM_EXEC_IGNORE_DUPLICATES);
107 drm_exec_until_all_locked(exec) {
108 struct drm_gpuva *va;
109
110 drm_gpuvm_for_each_va(va, &uvmm->base) {
111 if (unlikely(va == &uvmm->base.kernel_alloc_node))
112 continue;
113
114 ret = drm_exec_prepare_obj(exec, obj: va->gem.obj, num_fences: 1);
115 drm_exec_retry_on_contention(exec);
116 if (ret)
117 goto err_uvmm_unlock;
118 }
119 }
120 nouveau_uvmm_unlock(uvmm);
121
122 drm_exec_for_each_locked_object(exec, index, obj) {
123 struct nouveau_bo *nvbo = nouveau_gem_object(gem: obj);
124
125 ret = nouveau_bo_validate(nvbo, interruptible: true, no_wait_gpu: false);
126 if (ret)
127 goto err_exec_fini;
128 }
129
130 return 0;
131
132err_uvmm_unlock:
133 nouveau_uvmm_unlock(uvmm);
134err_exec_fini:
135 drm_exec_fini(exec);
136 return ret;
137
138}
139
140static void
141nouveau_exec_job_armed_submit(struct nouveau_job *job)
142{
143 struct drm_exec *exec = &job->exec;
144 struct drm_gem_object *obj;
145 unsigned long index;
146
147 drm_exec_for_each_locked_object(exec, index, obj)
148 dma_resv_add_fence(obj: obj->resv, fence: job->done_fence, usage: job->resv_usage);
149
150 drm_exec_fini(exec);
151}
152
153static struct dma_fence *
154nouveau_exec_job_run(struct nouveau_job *job)
155{
156 struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
157 struct nouveau_channel *chan = exec_job->chan;
158 struct nouveau_fence *fence = exec_job->fence;
159 int i, ret;
160
161 ret = nouveau_dma_wait(chan, slots: exec_job->push.count + 1, size: 16);
162 if (ret) {
163 NV_PRINTK(err, job->cli, "nv50cal_space: %d\n", ret);
164 return ERR_PTR(error: ret);
165 }
166
167 for (i = 0; i < exec_job->push.count; i++) {
168 struct drm_nouveau_exec_push *p = &exec_job->push.s[i];
169 bool no_prefetch = p->flags & DRM_NOUVEAU_EXEC_PUSH_NO_PREFETCH;
170
171 nv50_dma_push(chan, addr: p->va, length: p->va_len, no_prefetch);
172 }
173
174 ret = nouveau_fence_emit(fence);
175 if (ret) {
176 nouveau_fence_unref(&exec_job->fence);
177 NV_PRINTK(err, job->cli, "error fencing pushbuf: %d\n", ret);
178 WIND_RING(chan);
179 return ERR_PTR(error: ret);
180 }
181
182 /* The fence was emitted successfully, set the job's fence pointer to
183 * NULL in order to avoid freeing it up when the job is cleaned up.
184 */
185 exec_job->fence = NULL;
186
187 return &fence->base;
188}
189
190static void
191nouveau_exec_job_free(struct nouveau_job *job)
192{
193 struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
194
195 nouveau_job_free(job);
196
197 kfree(objp: exec_job->fence);
198 kfree(objp: exec_job->push.s);
199 kfree(objp: exec_job);
200}
201
202static enum drm_gpu_sched_stat
203nouveau_exec_job_timeout(struct nouveau_job *job)
204{
205 struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
206 struct nouveau_channel *chan = exec_job->chan;
207
208 if (unlikely(!atomic_read(&chan->killed)))
209 nouveau_channel_kill(chan);
210
211 NV_PRINTK(warn, job->cli, "job timeout, channel %d killed!\n",
212 chan->chid);
213
214 nouveau_sched_entity_fini(entity: job->entity);
215
216 return DRM_GPU_SCHED_STAT_NOMINAL;
217}
218
219static struct nouveau_job_ops nouveau_exec_job_ops = {
220 .submit = nouveau_exec_job_submit,
221 .armed_submit = nouveau_exec_job_armed_submit,
222 .run = nouveau_exec_job_run,
223 .free = nouveau_exec_job_free,
224 .timeout = nouveau_exec_job_timeout,
225};
226
227int
228nouveau_exec_job_init(struct nouveau_exec_job **pjob,
229 struct nouveau_exec_job_args *__args)
230{
231 struct nouveau_exec_job *job;
232 struct nouveau_job_args args = {};
233 int i, ret;
234
235 for (i = 0; i < __args->push.count; i++) {
236 struct drm_nouveau_exec_push *p = &__args->push.s[i];
237
238 if (unlikely(p->va_len > NV50_DMA_PUSH_MAX_LENGTH)) {
239 NV_PRINTK(err, nouveau_cli(__args->file_priv),
240 "pushbuf size exceeds limit: 0x%x max 0x%x\n",
241 p->va_len, NV50_DMA_PUSH_MAX_LENGTH);
242 return -EINVAL;
243 }
244 }
245
246 job = *pjob = kzalloc(size: sizeof(*job), GFP_KERNEL);
247 if (!job)
248 return -ENOMEM;
249
250 job->push.count = __args->push.count;
251 if (__args->push.count) {
252 job->push.s = kmemdup(p: __args->push.s,
253 size: sizeof(*__args->push.s) *
254 __args->push.count,
255 GFP_KERNEL);
256 if (!job->push.s) {
257 ret = -ENOMEM;
258 goto err_free_job;
259 }
260 }
261
262 job->chan = __args->chan;
263
264 args.sched_entity = __args->sched_entity;
265 args.file_priv = __args->file_priv;
266
267 args.in_sync.count = __args->in_sync.count;
268 args.in_sync.s = __args->in_sync.s;
269
270 args.out_sync.count = __args->out_sync.count;
271 args.out_sync.s = __args->out_sync.s;
272
273 args.ops = &nouveau_exec_job_ops;
274 args.resv_usage = DMA_RESV_USAGE_WRITE;
275
276 ret = nouveau_job_init(job: &job->base, args: &args);
277 if (ret)
278 goto err_free_pushs;
279
280 return 0;
281
282err_free_pushs:
283 kfree(objp: job->push.s);
284err_free_job:
285 kfree(objp: job);
286 *pjob = NULL;
287
288 return ret;
289}
290
291static int
292nouveau_exec(struct nouveau_exec_job_args *args)
293{
294 struct nouveau_exec_job *job;
295 int ret;
296
297 ret = nouveau_exec_job_init(pjob: &job, args: args);
298 if (ret)
299 return ret;
300
301 ret = nouveau_job_submit(job: &job->base);
302 if (ret)
303 goto err_job_fini;
304
305 return 0;
306
307err_job_fini:
308 nouveau_job_fini(job: &job->base);
309 return ret;
310}
311
312static int
313nouveau_exec_ucopy(struct nouveau_exec_job_args *args,
314 struct drm_nouveau_exec *req)
315{
316 struct drm_nouveau_sync **s;
317 u32 inc = req->wait_count;
318 u64 ins = req->wait_ptr;
319 u32 outc = req->sig_count;
320 u64 outs = req->sig_ptr;
321 u32 pushc = req->push_count;
322 u64 pushs = req->push_ptr;
323 int ret;
324
325 if (pushc) {
326 args->push.count = pushc;
327 args->push.s = u_memcpya(user: pushs, nmemb: pushc, size: sizeof(*args->push.s));
328 if (IS_ERR(ptr: args->push.s))
329 return PTR_ERR(ptr: args->push.s);
330 }
331
332 if (inc) {
333 s = &args->in_sync.s;
334
335 args->in_sync.count = inc;
336 *s = u_memcpya(user: ins, nmemb: inc, size: sizeof(**s));
337 if (IS_ERR(ptr: *s)) {
338 ret = PTR_ERR(ptr: *s);
339 goto err_free_pushs;
340 }
341 }
342
343 if (outc) {
344 s = &args->out_sync.s;
345
346 args->out_sync.count = outc;
347 *s = u_memcpya(user: outs, nmemb: outc, size: sizeof(**s));
348 if (IS_ERR(ptr: *s)) {
349 ret = PTR_ERR(ptr: *s);
350 goto err_free_ins;
351 }
352 }
353
354 return 0;
355
356err_free_pushs:
357 u_free(addr: args->push.s);
358err_free_ins:
359 u_free(addr: args->in_sync.s);
360 return ret;
361}
362
363static void
364nouveau_exec_ufree(struct nouveau_exec_job_args *args)
365{
366 u_free(addr: args->push.s);
367 u_free(addr: args->in_sync.s);
368 u_free(addr: args->out_sync.s);
369}
370
371int
372nouveau_exec_ioctl_exec(struct drm_device *dev,
373 void *data,
374 struct drm_file *file_priv)
375{
376 struct nouveau_abi16 *abi16 = nouveau_abi16_get(file_priv);
377 struct nouveau_cli *cli = nouveau_cli(fpriv: file_priv);
378 struct nouveau_abi16_chan *chan16;
379 struct nouveau_channel *chan = NULL;
380 struct nouveau_exec_job_args args = {};
381 struct drm_nouveau_exec *req = data;
382 int push_max, ret = 0;
383
384 if (unlikely(!abi16))
385 return -ENOMEM;
386
387 /* abi16 locks already */
388 if (unlikely(!nouveau_cli_uvmm(cli)))
389 return nouveau_abi16_put(abi16, -ENOSYS);
390
391 list_for_each_entry(chan16, &abi16->channels, head) {
392 if (chan16->chan->chid == req->channel) {
393 chan = chan16->chan;
394 break;
395 }
396 }
397
398 if (!chan)
399 return nouveau_abi16_put(abi16, -ENOENT);
400
401 if (unlikely(atomic_read(&chan->killed)))
402 return nouveau_abi16_put(abi16, -ENODEV);
403
404 if (!chan->dma.ib_max)
405 return nouveau_abi16_put(abi16, -ENOSYS);
406
407 push_max = nouveau_exec_push_max_from_ib_max(ib_max: chan->dma.ib_max);
408 if (unlikely(req->push_count > push_max)) {
409 NV_PRINTK(err, cli, "pushbuf push count exceeds limit: %d max %d\n",
410 req->push_count, push_max);
411 return nouveau_abi16_put(abi16, -EINVAL);
412 }
413
414 ret = nouveau_exec_ucopy(args: &args, req);
415 if (ret)
416 goto out;
417
418 args.sched_entity = &chan16->sched_entity;
419 args.file_priv = file_priv;
420 args.chan = chan;
421
422 ret = nouveau_exec(args: &args);
423 if (ret)
424 goto out_free_args;
425
426out_free_args:
427 nouveau_exec_ufree(args: &args);
428out:
429 return nouveau_abi16_put(abi16, ret);
430}
431

source code of linux/drivers/gpu/drm/nouveau/nouveau_exec.c