1 | // SPDX-License-Identifier: MIT |
2 | |
3 | #include <drm/drm_exec.h> |
4 | |
5 | #include "nouveau_drv.h" |
6 | #include "nouveau_gem.h" |
7 | #include "nouveau_mem.h" |
8 | #include "nouveau_dma.h" |
9 | #include "nouveau_exec.h" |
10 | #include "nouveau_abi16.h" |
11 | #include "nouveau_chan.h" |
12 | #include "nouveau_sched.h" |
13 | #include "nouveau_uvmm.h" |
14 | |
15 | /** |
16 | * DOC: Overview |
17 | * |
18 | * Nouveau's VM_BIND / EXEC UAPI consists of three ioctls: DRM_NOUVEAU_VM_INIT, |
19 | * DRM_NOUVEAU_VM_BIND and DRM_NOUVEAU_EXEC. |
20 | * |
21 | * In order to use the UAPI firstly a user client must initialize the VA space |
22 | * using the DRM_NOUVEAU_VM_INIT ioctl specifying which region of the VA space |
23 | * should be managed by the kernel and which by the UMD. |
24 | * |
25 | * The DRM_NOUVEAU_VM_BIND ioctl provides clients an interface to manage the |
26 | * userspace-managable portion of the VA space. It provides operations to map |
27 | * and unmap memory. Mappings may be flagged as sparse. Sparse mappings are not |
28 | * backed by a GEM object and the kernel will ignore GEM handles provided |
29 | * alongside a sparse mapping. |
30 | * |
31 | * Userspace may request memory backed mappings either within or outside of the |
32 | * bounds (but not crossing those bounds) of a previously mapped sparse |
33 | * mapping. Subsequently requested memory backed mappings within a sparse |
34 | * mapping will take precedence over the corresponding range of the sparse |
35 | * mapping. If such memory backed mappings are unmapped the kernel will make |
36 | * sure that the corresponding sparse mapping will take their place again. |
37 | * Requests to unmap a sparse mapping that still contains memory backed mappings |
38 | * will result in those memory backed mappings being unmapped first. |
39 | * |
40 | * Unmap requests are not bound to the range of existing mappings and can even |
41 | * overlap the bounds of sparse mappings. For such a request the kernel will |
42 | * make sure to unmap all memory backed mappings within the given range, |
43 | * splitting up memory backed mappings which are only partially contained |
44 | * within the given range. Unmap requests with the sparse flag set must match |
45 | * the range of a previously mapped sparse mapping exactly though. |
46 | * |
47 | * While the kernel generally permits arbitrary sequences and ranges of memory |
48 | * backed mappings being mapped and unmapped, either within a single or multiple |
49 | * VM_BIND ioctl calls, there are some restrictions for sparse mappings. |
50 | * |
51 | * The kernel does not permit to: |
52 | * - unmap non-existent sparse mappings |
53 | * - unmap a sparse mapping and map a new sparse mapping overlapping the range |
54 | * of the previously unmapped sparse mapping within the same VM_BIND ioctl |
55 | * - unmap a sparse mapping and map new memory backed mappings overlapping the |
56 | * range of the previously unmapped sparse mapping within the same VM_BIND |
57 | * ioctl |
58 | * |
59 | * When using the VM_BIND ioctl to request the kernel to map memory to a given |
60 | * virtual address in the GPU's VA space there is no guarantee that the actual |
61 | * mappings are created in the GPU's MMU. If the given memory is swapped out |
62 | * at the time the bind operation is executed the kernel will stash the mapping |
63 | * details into it's internal alloctor and create the actual MMU mappings once |
64 | * the memory is swapped back in. While this is transparent for userspace, it is |
65 | * guaranteed that all the backing memory is swapped back in and all the memory |
66 | * mappings, as requested by userspace previously, are actually mapped once the |
67 | * DRM_NOUVEAU_EXEC ioctl is called to submit an exec job. |
68 | * |
69 | * A VM_BIND job can be executed either synchronously or asynchronously. If |
70 | * exectued asynchronously, userspace may provide a list of syncobjs this job |
71 | * will wait for and/or a list of syncobj the kernel will signal once the |
72 | * VM_BIND job finished execution. If executed synchronously the ioctl will |
73 | * block until the bind job is finished. For synchronous jobs the kernel will |
74 | * not permit any syncobjs submitted to the kernel. |
75 | * |
76 | * To execute a push buffer the UAPI provides the DRM_NOUVEAU_EXEC ioctl. EXEC |
77 | * jobs are always executed asynchronously, and, equal to VM_BIND jobs, provide |
78 | * the option to synchronize them with syncobjs. |
79 | * |
80 | * Besides that, EXEC jobs can be scheduled for a specified channel to execute on. |
81 | * |
82 | * Since VM_BIND jobs update the GPU's VA space on job submit, EXEC jobs do have |
83 | * an up to date view of the VA space. However, the actual mappings might still |
84 | * be pending. Hence, EXEC jobs require to have the particular fences - of |
85 | * the corresponding VM_BIND jobs they depent on - attached to them. |
86 | */ |
87 | |
88 | static int |
89 | nouveau_exec_job_submit(struct nouveau_job *job) |
90 | { |
91 | struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job); |
92 | struct nouveau_cli *cli = job->cli; |
93 | struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(cli); |
94 | struct drm_exec *exec = &job->exec; |
95 | struct drm_gem_object *obj; |
96 | unsigned long index; |
97 | int ret; |
98 | |
99 | /* Create a new fence, but do not emit yet. */ |
100 | ret = nouveau_fence_create(&exec_job->fence, exec_job->chan); |
101 | if (ret) |
102 | return ret; |
103 | |
104 | nouveau_uvmm_lock(uvmm); |
105 | drm_exec_init(exec, DRM_EXEC_INTERRUPTIBLE_WAIT | |
106 | DRM_EXEC_IGNORE_DUPLICATES); |
107 | drm_exec_until_all_locked(exec) { |
108 | struct drm_gpuva *va; |
109 | |
110 | drm_gpuvm_for_each_va(va, &uvmm->base) { |
111 | if (unlikely(va == &uvmm->base.kernel_alloc_node)) |
112 | continue; |
113 | |
114 | ret = drm_exec_prepare_obj(exec, obj: va->gem.obj, num_fences: 1); |
115 | drm_exec_retry_on_contention(exec); |
116 | if (ret) |
117 | goto err_uvmm_unlock; |
118 | } |
119 | } |
120 | nouveau_uvmm_unlock(uvmm); |
121 | |
122 | drm_exec_for_each_locked_object(exec, index, obj) { |
123 | struct nouveau_bo *nvbo = nouveau_gem_object(gem: obj); |
124 | |
125 | ret = nouveau_bo_validate(nvbo, interruptible: true, no_wait_gpu: false); |
126 | if (ret) |
127 | goto err_exec_fini; |
128 | } |
129 | |
130 | return 0; |
131 | |
132 | err_uvmm_unlock: |
133 | nouveau_uvmm_unlock(uvmm); |
134 | err_exec_fini: |
135 | drm_exec_fini(exec); |
136 | return ret; |
137 | |
138 | } |
139 | |
140 | static void |
141 | nouveau_exec_job_armed_submit(struct nouveau_job *job) |
142 | { |
143 | struct drm_exec *exec = &job->exec; |
144 | struct drm_gem_object *obj; |
145 | unsigned long index; |
146 | |
147 | drm_exec_for_each_locked_object(exec, index, obj) |
148 | dma_resv_add_fence(obj: obj->resv, fence: job->done_fence, usage: job->resv_usage); |
149 | |
150 | drm_exec_fini(exec); |
151 | } |
152 | |
153 | static struct dma_fence * |
154 | nouveau_exec_job_run(struct nouveau_job *job) |
155 | { |
156 | struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job); |
157 | struct nouveau_channel *chan = exec_job->chan; |
158 | struct nouveau_fence *fence = exec_job->fence; |
159 | int i, ret; |
160 | |
161 | ret = nouveau_dma_wait(chan, slots: exec_job->push.count + 1, size: 16); |
162 | if (ret) { |
163 | NV_PRINTK(err, job->cli, "nv50cal_space: %d\n" , ret); |
164 | return ERR_PTR(error: ret); |
165 | } |
166 | |
167 | for (i = 0; i < exec_job->push.count; i++) { |
168 | struct drm_nouveau_exec_push *p = &exec_job->push.s[i]; |
169 | bool no_prefetch = p->flags & DRM_NOUVEAU_EXEC_PUSH_NO_PREFETCH; |
170 | |
171 | nv50_dma_push(chan, addr: p->va, length: p->va_len, no_prefetch); |
172 | } |
173 | |
174 | ret = nouveau_fence_emit(fence); |
175 | if (ret) { |
176 | nouveau_fence_unref(&exec_job->fence); |
177 | NV_PRINTK(err, job->cli, "error fencing pushbuf: %d\n" , ret); |
178 | WIND_RING(chan); |
179 | return ERR_PTR(error: ret); |
180 | } |
181 | |
182 | /* The fence was emitted successfully, set the job's fence pointer to |
183 | * NULL in order to avoid freeing it up when the job is cleaned up. |
184 | */ |
185 | exec_job->fence = NULL; |
186 | |
187 | return &fence->base; |
188 | } |
189 | |
190 | static void |
191 | nouveau_exec_job_free(struct nouveau_job *job) |
192 | { |
193 | struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job); |
194 | |
195 | nouveau_job_free(job); |
196 | |
197 | kfree(objp: exec_job->fence); |
198 | kfree(objp: exec_job->push.s); |
199 | kfree(objp: exec_job); |
200 | } |
201 | |
202 | static enum drm_gpu_sched_stat |
203 | nouveau_exec_job_timeout(struct nouveau_job *job) |
204 | { |
205 | struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job); |
206 | struct nouveau_channel *chan = exec_job->chan; |
207 | |
208 | if (unlikely(!atomic_read(&chan->killed))) |
209 | nouveau_channel_kill(chan); |
210 | |
211 | NV_PRINTK(warn, job->cli, "job timeout, channel %d killed!\n" , |
212 | chan->chid); |
213 | |
214 | nouveau_sched_entity_fini(entity: job->entity); |
215 | |
216 | return DRM_GPU_SCHED_STAT_NOMINAL; |
217 | } |
218 | |
219 | static struct nouveau_job_ops nouveau_exec_job_ops = { |
220 | .submit = nouveau_exec_job_submit, |
221 | .armed_submit = nouveau_exec_job_armed_submit, |
222 | .run = nouveau_exec_job_run, |
223 | .free = nouveau_exec_job_free, |
224 | .timeout = nouveau_exec_job_timeout, |
225 | }; |
226 | |
227 | int |
228 | nouveau_exec_job_init(struct nouveau_exec_job **pjob, |
229 | struct nouveau_exec_job_args *__args) |
230 | { |
231 | struct nouveau_exec_job *job; |
232 | struct nouveau_job_args args = {}; |
233 | int i, ret; |
234 | |
235 | for (i = 0; i < __args->push.count; i++) { |
236 | struct drm_nouveau_exec_push *p = &__args->push.s[i]; |
237 | |
238 | if (unlikely(p->va_len > NV50_DMA_PUSH_MAX_LENGTH)) { |
239 | NV_PRINTK(err, nouveau_cli(__args->file_priv), |
240 | "pushbuf size exceeds limit: 0x%x max 0x%x\n" , |
241 | p->va_len, NV50_DMA_PUSH_MAX_LENGTH); |
242 | return -EINVAL; |
243 | } |
244 | } |
245 | |
246 | job = *pjob = kzalloc(size: sizeof(*job), GFP_KERNEL); |
247 | if (!job) |
248 | return -ENOMEM; |
249 | |
250 | job->push.count = __args->push.count; |
251 | if (__args->push.count) { |
252 | job->push.s = kmemdup(p: __args->push.s, |
253 | size: sizeof(*__args->push.s) * |
254 | __args->push.count, |
255 | GFP_KERNEL); |
256 | if (!job->push.s) { |
257 | ret = -ENOMEM; |
258 | goto err_free_job; |
259 | } |
260 | } |
261 | |
262 | job->chan = __args->chan; |
263 | |
264 | args.sched_entity = __args->sched_entity; |
265 | args.file_priv = __args->file_priv; |
266 | |
267 | args.in_sync.count = __args->in_sync.count; |
268 | args.in_sync.s = __args->in_sync.s; |
269 | |
270 | args.out_sync.count = __args->out_sync.count; |
271 | args.out_sync.s = __args->out_sync.s; |
272 | |
273 | args.ops = &nouveau_exec_job_ops; |
274 | args.resv_usage = DMA_RESV_USAGE_WRITE; |
275 | |
276 | ret = nouveau_job_init(job: &job->base, args: &args); |
277 | if (ret) |
278 | goto err_free_pushs; |
279 | |
280 | return 0; |
281 | |
282 | err_free_pushs: |
283 | kfree(objp: job->push.s); |
284 | err_free_job: |
285 | kfree(objp: job); |
286 | *pjob = NULL; |
287 | |
288 | return ret; |
289 | } |
290 | |
291 | static int |
292 | nouveau_exec(struct nouveau_exec_job_args *args) |
293 | { |
294 | struct nouveau_exec_job *job; |
295 | int ret; |
296 | |
297 | ret = nouveau_exec_job_init(pjob: &job, args: args); |
298 | if (ret) |
299 | return ret; |
300 | |
301 | ret = nouveau_job_submit(job: &job->base); |
302 | if (ret) |
303 | goto err_job_fini; |
304 | |
305 | return 0; |
306 | |
307 | err_job_fini: |
308 | nouveau_job_fini(job: &job->base); |
309 | return ret; |
310 | } |
311 | |
312 | static int |
313 | nouveau_exec_ucopy(struct nouveau_exec_job_args *args, |
314 | struct drm_nouveau_exec *req) |
315 | { |
316 | struct drm_nouveau_sync **s; |
317 | u32 inc = req->wait_count; |
318 | u64 ins = req->wait_ptr; |
319 | u32 outc = req->sig_count; |
320 | u64 outs = req->sig_ptr; |
321 | u32 pushc = req->push_count; |
322 | u64 pushs = req->push_ptr; |
323 | int ret; |
324 | |
325 | if (pushc) { |
326 | args->push.count = pushc; |
327 | args->push.s = u_memcpya(user: pushs, nmemb: pushc, size: sizeof(*args->push.s)); |
328 | if (IS_ERR(ptr: args->push.s)) |
329 | return PTR_ERR(ptr: args->push.s); |
330 | } |
331 | |
332 | if (inc) { |
333 | s = &args->in_sync.s; |
334 | |
335 | args->in_sync.count = inc; |
336 | *s = u_memcpya(user: ins, nmemb: inc, size: sizeof(**s)); |
337 | if (IS_ERR(ptr: *s)) { |
338 | ret = PTR_ERR(ptr: *s); |
339 | goto err_free_pushs; |
340 | } |
341 | } |
342 | |
343 | if (outc) { |
344 | s = &args->out_sync.s; |
345 | |
346 | args->out_sync.count = outc; |
347 | *s = u_memcpya(user: outs, nmemb: outc, size: sizeof(**s)); |
348 | if (IS_ERR(ptr: *s)) { |
349 | ret = PTR_ERR(ptr: *s); |
350 | goto err_free_ins; |
351 | } |
352 | } |
353 | |
354 | return 0; |
355 | |
356 | err_free_pushs: |
357 | u_free(addr: args->push.s); |
358 | err_free_ins: |
359 | u_free(addr: args->in_sync.s); |
360 | return ret; |
361 | } |
362 | |
363 | static void |
364 | nouveau_exec_ufree(struct nouveau_exec_job_args *args) |
365 | { |
366 | u_free(addr: args->push.s); |
367 | u_free(addr: args->in_sync.s); |
368 | u_free(addr: args->out_sync.s); |
369 | } |
370 | |
371 | int |
372 | nouveau_exec_ioctl_exec(struct drm_device *dev, |
373 | void *data, |
374 | struct drm_file *file_priv) |
375 | { |
376 | struct nouveau_abi16 *abi16 = nouveau_abi16_get(file_priv); |
377 | struct nouveau_cli *cli = nouveau_cli(fpriv: file_priv); |
378 | struct nouveau_abi16_chan *chan16; |
379 | struct nouveau_channel *chan = NULL; |
380 | struct nouveau_exec_job_args args = {}; |
381 | struct drm_nouveau_exec *req = data; |
382 | int push_max, ret = 0; |
383 | |
384 | if (unlikely(!abi16)) |
385 | return -ENOMEM; |
386 | |
387 | /* abi16 locks already */ |
388 | if (unlikely(!nouveau_cli_uvmm(cli))) |
389 | return nouveau_abi16_put(abi16, -ENOSYS); |
390 | |
391 | list_for_each_entry(chan16, &abi16->channels, head) { |
392 | if (chan16->chan->chid == req->channel) { |
393 | chan = chan16->chan; |
394 | break; |
395 | } |
396 | } |
397 | |
398 | if (!chan) |
399 | return nouveau_abi16_put(abi16, -ENOENT); |
400 | |
401 | if (unlikely(atomic_read(&chan->killed))) |
402 | return nouveau_abi16_put(abi16, -ENODEV); |
403 | |
404 | if (!chan->dma.ib_max) |
405 | return nouveau_abi16_put(abi16, -ENOSYS); |
406 | |
407 | push_max = nouveau_exec_push_max_from_ib_max(ib_max: chan->dma.ib_max); |
408 | if (unlikely(req->push_count > push_max)) { |
409 | NV_PRINTK(err, cli, "pushbuf push count exceeds limit: %d max %d\n" , |
410 | req->push_count, push_max); |
411 | return nouveau_abi16_put(abi16, -EINVAL); |
412 | } |
413 | |
414 | ret = nouveau_exec_ucopy(args: &args, req); |
415 | if (ret) |
416 | goto out; |
417 | |
418 | args.sched_entity = &chan16->sched_entity; |
419 | args.file_priv = file_priv; |
420 | args.chan = chan; |
421 | |
422 | ret = nouveau_exec(args: &args); |
423 | if (ret) |
424 | goto out_free_args; |
425 | |
426 | out_free_args: |
427 | nouveau_exec_ufree(args: &args); |
428 | out: |
429 | return nouveau_abi16_put(abi16, ret); |
430 | } |
431 | |