1 | /* |
2 | * Copyright © 2014 Broadcom |
3 | * |
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
5 | * copy of this software and associated documentation files (the "Software"), |
6 | * to deal in the Software without restriction, including without limitation |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
8 | * and/or sell copies of the Software, and to permit persons to whom the |
9 | * Software is furnished to do so, subject to the following conditions: |
10 | * |
11 | * The above copyright notice and this permission notice (including the next |
12 | * paragraph) shall be included in all copies or substantial portions of the |
13 | * Software. |
14 | * |
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
21 | * IN THE SOFTWARE. |
22 | */ |
23 | |
24 | #include <linux/module.h> |
25 | #include <linux/platform_device.h> |
26 | #include <linux/pm_runtime.h> |
27 | #include <linux/device.h> |
28 | #include <linux/io.h> |
29 | #include <linux/sched/signal.h> |
30 | #include <linux/dma-fence-array.h> |
31 | |
32 | #include <drm/drm_syncobj.h> |
33 | |
34 | #include "uapi/drm/vc4_drm.h" |
35 | #include "vc4_drv.h" |
36 | #include "vc4_regs.h" |
37 | #include "vc4_trace.h" |
38 | |
39 | static void |
40 | vc4_queue_hangcheck(struct drm_device *dev) |
41 | { |
42 | struct vc4_dev *vc4 = to_vc4_dev(dev); |
43 | |
44 | mod_timer(timer: &vc4->hangcheck.timer, |
45 | expires: round_jiffies_up(j: jiffies + msecs_to_jiffies(m: 100))); |
46 | } |
47 | |
48 | struct vc4_hang_state { |
49 | struct drm_vc4_get_hang_state user_state; |
50 | |
51 | u32 bo_count; |
52 | struct drm_gem_object **bo; |
53 | }; |
54 | |
55 | static void |
56 | vc4_free_hang_state(struct drm_device *dev, struct vc4_hang_state *state) |
57 | { |
58 | unsigned int i; |
59 | |
60 | for (i = 0; i < state->user_state.bo_count; i++) |
61 | drm_gem_object_put(obj: state->bo[i]); |
62 | |
63 | kfree(objp: state); |
64 | } |
65 | |
66 | int |
67 | vc4_get_hang_state_ioctl(struct drm_device *dev, void *data, |
68 | struct drm_file *file_priv) |
69 | { |
70 | struct drm_vc4_get_hang_state *get_state = data; |
71 | struct drm_vc4_get_hang_state_bo *bo_state; |
72 | struct vc4_hang_state *kernel_state; |
73 | struct drm_vc4_get_hang_state *state; |
74 | struct vc4_dev *vc4 = to_vc4_dev(dev); |
75 | unsigned long irqflags; |
76 | u32 i; |
77 | int ret = 0; |
78 | |
79 | if (WARN_ON_ONCE(vc4->is_vc5)) |
80 | return -ENODEV; |
81 | |
82 | if (!vc4->v3d) { |
83 | DRM_DEBUG("VC4_GET_HANG_STATE with no VC4 V3D probed\n" ); |
84 | return -ENODEV; |
85 | } |
86 | |
87 | spin_lock_irqsave(&vc4->job_lock, irqflags); |
88 | kernel_state = vc4->hang_state; |
89 | if (!kernel_state) { |
90 | spin_unlock_irqrestore(lock: &vc4->job_lock, flags: irqflags); |
91 | return -ENOENT; |
92 | } |
93 | state = &kernel_state->user_state; |
94 | |
95 | /* If the user's array isn't big enough, just return the |
96 | * required array size. |
97 | */ |
98 | if (get_state->bo_count < state->bo_count) { |
99 | get_state->bo_count = state->bo_count; |
100 | spin_unlock_irqrestore(lock: &vc4->job_lock, flags: irqflags); |
101 | return 0; |
102 | } |
103 | |
104 | vc4->hang_state = NULL; |
105 | spin_unlock_irqrestore(lock: &vc4->job_lock, flags: irqflags); |
106 | |
107 | /* Save the user's BO pointer, so we don't stomp it with the memcpy. */ |
108 | state->bo = get_state->bo; |
109 | memcpy(get_state, state, sizeof(*state)); |
110 | |
111 | bo_state = kcalloc(n: state->bo_count, size: sizeof(*bo_state), GFP_KERNEL); |
112 | if (!bo_state) { |
113 | ret = -ENOMEM; |
114 | goto err_free; |
115 | } |
116 | |
117 | for (i = 0; i < state->bo_count; i++) { |
118 | struct vc4_bo *vc4_bo = to_vc4_bo(kernel_state->bo[i]); |
119 | u32 handle; |
120 | |
121 | ret = drm_gem_handle_create(file_priv, obj: kernel_state->bo[i], |
122 | handlep: &handle); |
123 | |
124 | if (ret) { |
125 | state->bo_count = i; |
126 | goto err_delete_handle; |
127 | } |
128 | bo_state[i].handle = handle; |
129 | bo_state[i].paddr = vc4_bo->base.dma_addr; |
130 | bo_state[i].size = vc4_bo->base.base.size; |
131 | } |
132 | |
133 | if (copy_to_user(u64_to_user_ptr(get_state->bo), |
134 | from: bo_state, |
135 | n: state->bo_count * sizeof(*bo_state))) |
136 | ret = -EFAULT; |
137 | |
138 | err_delete_handle: |
139 | if (ret) { |
140 | for (i = 0; i < state->bo_count; i++) |
141 | drm_gem_handle_delete(filp: file_priv, handle: bo_state[i].handle); |
142 | } |
143 | |
144 | err_free: |
145 | vc4_free_hang_state(dev, state: kernel_state); |
146 | kfree(objp: bo_state); |
147 | |
148 | return ret; |
149 | } |
150 | |
151 | static void |
152 | vc4_save_hang_state(struct drm_device *dev) |
153 | { |
154 | struct vc4_dev *vc4 = to_vc4_dev(dev); |
155 | struct drm_vc4_get_hang_state *state; |
156 | struct vc4_hang_state *kernel_state; |
157 | struct vc4_exec_info *exec[2]; |
158 | struct vc4_bo *bo; |
159 | unsigned long irqflags; |
160 | unsigned int i, j, k, unref_list_count; |
161 | |
162 | kernel_state = kcalloc(n: 1, size: sizeof(*kernel_state), GFP_KERNEL); |
163 | if (!kernel_state) |
164 | return; |
165 | |
166 | state = &kernel_state->user_state; |
167 | |
168 | spin_lock_irqsave(&vc4->job_lock, irqflags); |
169 | exec[0] = vc4_first_bin_job(vc4); |
170 | exec[1] = vc4_first_render_job(vc4); |
171 | if (!exec[0] && !exec[1]) { |
172 | spin_unlock_irqrestore(lock: &vc4->job_lock, flags: irqflags); |
173 | return; |
174 | } |
175 | |
176 | /* Get the bos from both binner and renderer into hang state. */ |
177 | state->bo_count = 0; |
178 | for (i = 0; i < 2; i++) { |
179 | if (!exec[i]) |
180 | continue; |
181 | |
182 | unref_list_count = 0; |
183 | list_for_each_entry(bo, &exec[i]->unref_list, unref_head) |
184 | unref_list_count++; |
185 | state->bo_count += exec[i]->bo_count + unref_list_count; |
186 | } |
187 | |
188 | kernel_state->bo = kcalloc(n: state->bo_count, |
189 | size: sizeof(*kernel_state->bo), GFP_ATOMIC); |
190 | |
191 | if (!kernel_state->bo) { |
192 | spin_unlock_irqrestore(lock: &vc4->job_lock, flags: irqflags); |
193 | return; |
194 | } |
195 | |
196 | k = 0; |
197 | for (i = 0; i < 2; i++) { |
198 | if (!exec[i]) |
199 | continue; |
200 | |
201 | for (j = 0; j < exec[i]->bo_count; j++) { |
202 | bo = to_vc4_bo(exec[i]->bo[j]); |
203 | |
204 | /* Retain BOs just in case they were marked purgeable. |
205 | * This prevents the BO from being purged before |
206 | * someone had a chance to dump the hang state. |
207 | */ |
208 | WARN_ON(!refcount_read(&bo->usecnt)); |
209 | refcount_inc(r: &bo->usecnt); |
210 | drm_gem_object_get(obj: exec[i]->bo[j]); |
211 | kernel_state->bo[k++] = exec[i]->bo[j]; |
212 | } |
213 | |
214 | list_for_each_entry(bo, &exec[i]->unref_list, unref_head) { |
215 | /* No need to retain BOs coming from the ->unref_list |
216 | * because they are naturally unpurgeable. |
217 | */ |
218 | drm_gem_object_get(obj: &bo->base.base); |
219 | kernel_state->bo[k++] = &bo->base.base; |
220 | } |
221 | } |
222 | |
223 | WARN_ON_ONCE(k != state->bo_count); |
224 | |
225 | if (exec[0]) |
226 | state->start_bin = exec[0]->ct0ca; |
227 | if (exec[1]) |
228 | state->start_render = exec[1]->ct1ca; |
229 | |
230 | spin_unlock_irqrestore(lock: &vc4->job_lock, flags: irqflags); |
231 | |
232 | state->ct0ca = V3D_READ(V3D_CTNCA(0)); |
233 | state->ct0ea = V3D_READ(V3D_CTNEA(0)); |
234 | |
235 | state->ct1ca = V3D_READ(V3D_CTNCA(1)); |
236 | state->ct1ea = V3D_READ(V3D_CTNEA(1)); |
237 | |
238 | state->ct0cs = V3D_READ(V3D_CTNCS(0)); |
239 | state->ct1cs = V3D_READ(V3D_CTNCS(1)); |
240 | |
241 | state->ct0ra0 = V3D_READ(V3D_CT00RA0); |
242 | state->ct1ra0 = V3D_READ(V3D_CT01RA0); |
243 | |
244 | state->bpca = V3D_READ(V3D_BPCA); |
245 | state->bpcs = V3D_READ(V3D_BPCS); |
246 | state->bpoa = V3D_READ(V3D_BPOA); |
247 | state->bpos = V3D_READ(V3D_BPOS); |
248 | |
249 | state->vpmbase = V3D_READ(V3D_VPMBASE); |
250 | |
251 | state->dbge = V3D_READ(V3D_DBGE); |
252 | state->fdbgo = V3D_READ(V3D_FDBGO); |
253 | state->fdbgb = V3D_READ(V3D_FDBGB); |
254 | state->fdbgr = V3D_READ(V3D_FDBGR); |
255 | state->fdbgs = V3D_READ(V3D_FDBGS); |
256 | state->errstat = V3D_READ(V3D_ERRSTAT); |
257 | |
258 | /* We need to turn purgeable BOs into unpurgeable ones so that |
259 | * userspace has a chance to dump the hang state before the kernel |
260 | * decides to purge those BOs. |
261 | * Note that BO consistency at dump time cannot be guaranteed. For |
262 | * example, if the owner of these BOs decides to re-use them or mark |
263 | * them purgeable again there's nothing we can do to prevent it. |
264 | */ |
265 | for (i = 0; i < kernel_state->user_state.bo_count; i++) { |
266 | struct vc4_bo *bo = to_vc4_bo(kernel_state->bo[i]); |
267 | |
268 | if (bo->madv == __VC4_MADV_NOTSUPP) |
269 | continue; |
270 | |
271 | mutex_lock(&bo->madv_lock); |
272 | if (!WARN_ON(bo->madv == __VC4_MADV_PURGED)) |
273 | bo->madv = VC4_MADV_WILLNEED; |
274 | refcount_dec(r: &bo->usecnt); |
275 | mutex_unlock(lock: &bo->madv_lock); |
276 | } |
277 | |
278 | spin_lock_irqsave(&vc4->job_lock, irqflags); |
279 | if (vc4->hang_state) { |
280 | spin_unlock_irqrestore(lock: &vc4->job_lock, flags: irqflags); |
281 | vc4_free_hang_state(dev, state: kernel_state); |
282 | } else { |
283 | vc4->hang_state = kernel_state; |
284 | spin_unlock_irqrestore(lock: &vc4->job_lock, flags: irqflags); |
285 | } |
286 | } |
287 | |
288 | static void |
289 | vc4_reset(struct drm_device *dev) |
290 | { |
291 | struct vc4_dev *vc4 = to_vc4_dev(dev); |
292 | |
293 | DRM_INFO("Resetting GPU.\n" ); |
294 | |
295 | mutex_lock(&vc4->power_lock); |
296 | if (vc4->power_refcount) { |
297 | /* Power the device off and back on the by dropping the |
298 | * reference on runtime PM. |
299 | */ |
300 | pm_runtime_put_sync_suspend(dev: &vc4->v3d->pdev->dev); |
301 | pm_runtime_get_sync(dev: &vc4->v3d->pdev->dev); |
302 | } |
303 | mutex_unlock(lock: &vc4->power_lock); |
304 | |
305 | vc4_irq_reset(dev); |
306 | |
307 | /* Rearm the hangcheck -- another job might have been waiting |
308 | * for our hung one to get kicked off, and vc4_irq_reset() |
309 | * would have started it. |
310 | */ |
311 | vc4_queue_hangcheck(dev); |
312 | } |
313 | |
314 | static void |
315 | vc4_reset_work(struct work_struct *work) |
316 | { |
317 | struct vc4_dev *vc4 = |
318 | container_of(work, struct vc4_dev, hangcheck.reset_work); |
319 | |
320 | vc4_save_hang_state(dev: &vc4->base); |
321 | |
322 | vc4_reset(dev: &vc4->base); |
323 | } |
324 | |
325 | static void |
326 | vc4_hangcheck_elapsed(struct timer_list *t) |
327 | { |
328 | struct vc4_dev *vc4 = from_timer(vc4, t, hangcheck.timer); |
329 | struct drm_device *dev = &vc4->base; |
330 | uint32_t ct0ca, ct1ca; |
331 | unsigned long irqflags; |
332 | struct vc4_exec_info *bin_exec, *render_exec; |
333 | |
334 | spin_lock_irqsave(&vc4->job_lock, irqflags); |
335 | |
336 | bin_exec = vc4_first_bin_job(vc4); |
337 | render_exec = vc4_first_render_job(vc4); |
338 | |
339 | /* If idle, we can stop watching for hangs. */ |
340 | if (!bin_exec && !render_exec) { |
341 | spin_unlock_irqrestore(lock: &vc4->job_lock, flags: irqflags); |
342 | return; |
343 | } |
344 | |
345 | ct0ca = V3D_READ(V3D_CTNCA(0)); |
346 | ct1ca = V3D_READ(V3D_CTNCA(1)); |
347 | |
348 | /* If we've made any progress in execution, rearm the timer |
349 | * and wait. |
350 | */ |
351 | if ((bin_exec && ct0ca != bin_exec->last_ct0ca) || |
352 | (render_exec && ct1ca != render_exec->last_ct1ca)) { |
353 | if (bin_exec) |
354 | bin_exec->last_ct0ca = ct0ca; |
355 | if (render_exec) |
356 | render_exec->last_ct1ca = ct1ca; |
357 | spin_unlock_irqrestore(lock: &vc4->job_lock, flags: irqflags); |
358 | vc4_queue_hangcheck(dev); |
359 | return; |
360 | } |
361 | |
362 | spin_unlock_irqrestore(lock: &vc4->job_lock, flags: irqflags); |
363 | |
364 | /* We've gone too long with no progress, reset. This has to |
365 | * be done from a work struct, since resetting can sleep and |
366 | * this timer hook isn't allowed to. |
367 | */ |
368 | schedule_work(work: &vc4->hangcheck.reset_work); |
369 | } |
370 | |
371 | static void |
372 | submit_cl(struct drm_device *dev, uint32_t thread, uint32_t start, uint32_t end) |
373 | { |
374 | struct vc4_dev *vc4 = to_vc4_dev(dev); |
375 | |
376 | /* Set the current and end address of the control list. |
377 | * Writing the end register is what starts the job. |
378 | */ |
379 | V3D_WRITE(V3D_CTNCA(thread), start); |
380 | V3D_WRITE(V3D_CTNEA(thread), end); |
381 | } |
382 | |
383 | int |
384 | vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno, uint64_t timeout_ns, |
385 | bool interruptible) |
386 | { |
387 | struct vc4_dev *vc4 = to_vc4_dev(dev); |
388 | int ret = 0; |
389 | unsigned long timeout_expire; |
390 | DEFINE_WAIT(wait); |
391 | |
392 | if (WARN_ON_ONCE(vc4->is_vc5)) |
393 | return -ENODEV; |
394 | |
395 | if (vc4->finished_seqno >= seqno) |
396 | return 0; |
397 | |
398 | if (timeout_ns == 0) |
399 | return -ETIME; |
400 | |
401 | timeout_expire = jiffies + nsecs_to_jiffies(n: timeout_ns); |
402 | |
403 | trace_vc4_wait_for_seqno_begin(dev, seqno, timeout: timeout_ns); |
404 | for (;;) { |
405 | prepare_to_wait(wq_head: &vc4->job_wait_queue, wq_entry: &wait, |
406 | state: interruptible ? TASK_INTERRUPTIBLE : |
407 | TASK_UNINTERRUPTIBLE); |
408 | |
409 | if (interruptible && signal_pending(current)) { |
410 | ret = -ERESTARTSYS; |
411 | break; |
412 | } |
413 | |
414 | if (vc4->finished_seqno >= seqno) |
415 | break; |
416 | |
417 | if (timeout_ns != ~0ull) { |
418 | if (time_after_eq(jiffies, timeout_expire)) { |
419 | ret = -ETIME; |
420 | break; |
421 | } |
422 | schedule_timeout(timeout: timeout_expire - jiffies); |
423 | } else { |
424 | schedule(); |
425 | } |
426 | } |
427 | |
428 | finish_wait(wq_head: &vc4->job_wait_queue, wq_entry: &wait); |
429 | trace_vc4_wait_for_seqno_end(dev, seqno); |
430 | |
431 | return ret; |
432 | } |
433 | |
434 | static void |
435 | vc4_flush_caches(struct drm_device *dev) |
436 | { |
437 | struct vc4_dev *vc4 = to_vc4_dev(dev); |
438 | |
439 | /* Flush the GPU L2 caches. These caches sit on top of system |
440 | * L3 (the 128kb or so shared with the CPU), and are |
441 | * non-allocating in the L3. |
442 | */ |
443 | V3D_WRITE(V3D_L2CACTL, |
444 | V3D_L2CACTL_L2CCLR); |
445 | |
446 | V3D_WRITE(V3D_SLCACTL, |
447 | VC4_SET_FIELD(0xf, V3D_SLCACTL_T1CC) | |
448 | VC4_SET_FIELD(0xf, V3D_SLCACTL_T0CC) | |
449 | VC4_SET_FIELD(0xf, V3D_SLCACTL_UCC) | |
450 | VC4_SET_FIELD(0xf, V3D_SLCACTL_ICC)); |
451 | } |
452 | |
453 | static void |
454 | vc4_flush_texture_caches(struct drm_device *dev) |
455 | { |
456 | struct vc4_dev *vc4 = to_vc4_dev(dev); |
457 | |
458 | V3D_WRITE(V3D_L2CACTL, |
459 | V3D_L2CACTL_L2CCLR); |
460 | |
461 | V3D_WRITE(V3D_SLCACTL, |
462 | VC4_SET_FIELD(0xf, V3D_SLCACTL_T1CC) | |
463 | VC4_SET_FIELD(0xf, V3D_SLCACTL_T0CC)); |
464 | } |
465 | |
466 | /* Sets the registers for the next job to be actually be executed in |
467 | * the hardware. |
468 | * |
469 | * The job_lock should be held during this. |
470 | */ |
471 | void |
472 | vc4_submit_next_bin_job(struct drm_device *dev) |
473 | { |
474 | struct vc4_dev *vc4 = to_vc4_dev(dev); |
475 | struct vc4_exec_info *exec; |
476 | |
477 | if (WARN_ON_ONCE(vc4->is_vc5)) |
478 | return; |
479 | |
480 | again: |
481 | exec = vc4_first_bin_job(vc4); |
482 | if (!exec) |
483 | return; |
484 | |
485 | vc4_flush_caches(dev); |
486 | |
487 | /* Only start the perfmon if it was not already started by a previous |
488 | * job. |
489 | */ |
490 | if (exec->perfmon && vc4->active_perfmon != exec->perfmon) |
491 | vc4_perfmon_start(vc4, perfmon: exec->perfmon); |
492 | |
493 | /* Either put the job in the binner if it uses the binner, or |
494 | * immediately move it to the to-be-rendered queue. |
495 | */ |
496 | if (exec->ct0ca != exec->ct0ea) { |
497 | trace_vc4_submit_cl(dev, is_render: false, seqno: exec->seqno, ctnqba: exec->ct0ca, |
498 | ctnqea: exec->ct0ea); |
499 | submit_cl(dev, thread: 0, start: exec->ct0ca, end: exec->ct0ea); |
500 | } else { |
501 | struct vc4_exec_info *next; |
502 | |
503 | vc4_move_job_to_render(dev, exec); |
504 | next = vc4_first_bin_job(vc4); |
505 | |
506 | /* We can't start the next bin job if the previous job had a |
507 | * different perfmon instance attached to it. The same goes |
508 | * if one of them had a perfmon attached to it and the other |
509 | * one doesn't. |
510 | */ |
511 | if (next && next->perfmon == exec->perfmon) |
512 | goto again; |
513 | } |
514 | } |
515 | |
516 | void |
517 | vc4_submit_next_render_job(struct drm_device *dev) |
518 | { |
519 | struct vc4_dev *vc4 = to_vc4_dev(dev); |
520 | struct vc4_exec_info *exec = vc4_first_render_job(vc4); |
521 | |
522 | if (!exec) |
523 | return; |
524 | |
525 | if (WARN_ON_ONCE(vc4->is_vc5)) |
526 | return; |
527 | |
528 | /* A previous RCL may have written to one of our textures, and |
529 | * our full cache flush at bin time may have occurred before |
530 | * that RCL completed. Flush the texture cache now, but not |
531 | * the instructions or uniforms (since we don't write those |
532 | * from an RCL). |
533 | */ |
534 | vc4_flush_texture_caches(dev); |
535 | |
536 | trace_vc4_submit_cl(dev, is_render: true, seqno: exec->seqno, ctnqba: exec->ct1ca, ctnqea: exec->ct1ea); |
537 | submit_cl(dev, thread: 1, start: exec->ct1ca, end: exec->ct1ea); |
538 | } |
539 | |
540 | void |
541 | vc4_move_job_to_render(struct drm_device *dev, struct vc4_exec_info *exec) |
542 | { |
543 | struct vc4_dev *vc4 = to_vc4_dev(dev); |
544 | bool was_empty = list_empty(head: &vc4->render_job_list); |
545 | |
546 | if (WARN_ON_ONCE(vc4->is_vc5)) |
547 | return; |
548 | |
549 | list_move_tail(list: &exec->head, head: &vc4->render_job_list); |
550 | if (was_empty) |
551 | vc4_submit_next_render_job(dev); |
552 | } |
553 | |
554 | static void |
555 | vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno) |
556 | { |
557 | struct vc4_bo *bo; |
558 | unsigned i; |
559 | |
560 | for (i = 0; i < exec->bo_count; i++) { |
561 | bo = to_vc4_bo(exec->bo[i]); |
562 | bo->seqno = seqno; |
563 | |
564 | dma_resv_add_fence(obj: bo->base.base.resv, fence: exec->fence, |
565 | usage: DMA_RESV_USAGE_READ); |
566 | } |
567 | |
568 | list_for_each_entry(bo, &exec->unref_list, unref_head) { |
569 | bo->seqno = seqno; |
570 | } |
571 | |
572 | for (i = 0; i < exec->rcl_write_bo_count; i++) { |
573 | bo = to_vc4_bo(&exec->rcl_write_bo[i]->base); |
574 | bo->write_seqno = seqno; |
575 | |
576 | dma_resv_add_fence(obj: bo->base.base.resv, fence: exec->fence, |
577 | usage: DMA_RESV_USAGE_WRITE); |
578 | } |
579 | } |
580 | |
581 | static void |
582 | vc4_unlock_bo_reservations(struct drm_device *dev, |
583 | struct vc4_exec_info *exec, |
584 | struct ww_acquire_ctx *acquire_ctx) |
585 | { |
586 | int i; |
587 | |
588 | for (i = 0; i < exec->bo_count; i++) |
589 | dma_resv_unlock(obj: exec->bo[i]->resv); |
590 | |
591 | ww_acquire_fini(ctx: acquire_ctx); |
592 | } |
593 | |
594 | /* Takes the reservation lock on all the BOs being referenced, so that |
595 | * at queue submit time we can update the reservations. |
596 | * |
597 | * We don't lock the RCL the tile alloc/state BOs, or overflow memory |
598 | * (all of which are on exec->unref_list). They're entirely private |
599 | * to vc4, so we don't attach dma-buf fences to them. |
600 | */ |
601 | static int |
602 | vc4_lock_bo_reservations(struct drm_device *dev, |
603 | struct vc4_exec_info *exec, |
604 | struct ww_acquire_ctx *acquire_ctx) |
605 | { |
606 | int contended_lock = -1; |
607 | int i, ret; |
608 | struct drm_gem_object *bo; |
609 | |
610 | ww_acquire_init(ctx: acquire_ctx, ww_class: &reservation_ww_class); |
611 | |
612 | retry: |
613 | if (contended_lock != -1) { |
614 | bo = exec->bo[contended_lock]; |
615 | ret = dma_resv_lock_slow_interruptible(obj: bo->resv, ctx: acquire_ctx); |
616 | if (ret) { |
617 | ww_acquire_done(ctx: acquire_ctx); |
618 | return ret; |
619 | } |
620 | } |
621 | |
622 | for (i = 0; i < exec->bo_count; i++) { |
623 | if (i == contended_lock) |
624 | continue; |
625 | |
626 | bo = exec->bo[i]; |
627 | |
628 | ret = dma_resv_lock_interruptible(obj: bo->resv, ctx: acquire_ctx); |
629 | if (ret) { |
630 | int j; |
631 | |
632 | for (j = 0; j < i; j++) { |
633 | bo = exec->bo[j]; |
634 | dma_resv_unlock(obj: bo->resv); |
635 | } |
636 | |
637 | if (contended_lock != -1 && contended_lock >= i) { |
638 | bo = exec->bo[contended_lock]; |
639 | |
640 | dma_resv_unlock(obj: bo->resv); |
641 | } |
642 | |
643 | if (ret == -EDEADLK) { |
644 | contended_lock = i; |
645 | goto retry; |
646 | } |
647 | |
648 | ww_acquire_done(ctx: acquire_ctx); |
649 | return ret; |
650 | } |
651 | } |
652 | |
653 | ww_acquire_done(ctx: acquire_ctx); |
654 | |
655 | /* Reserve space for our shared (read-only) fence references, |
656 | * before we commit the CL to the hardware. |
657 | */ |
658 | for (i = 0; i < exec->bo_count; i++) { |
659 | bo = exec->bo[i]; |
660 | |
661 | ret = dma_resv_reserve_fences(obj: bo->resv, num_fences: 1); |
662 | if (ret) { |
663 | vc4_unlock_bo_reservations(dev, exec, acquire_ctx); |
664 | return ret; |
665 | } |
666 | } |
667 | |
668 | return 0; |
669 | } |
670 | |
671 | /* Queues a struct vc4_exec_info for execution. If no job is |
672 | * currently executing, then submits it. |
673 | * |
674 | * Unlike most GPUs, our hardware only handles one command list at a |
675 | * time. To queue multiple jobs at once, we'd need to edit the |
676 | * previous command list to have a jump to the new one at the end, and |
677 | * then bump the end address. That's a change for a later date, |
678 | * though. |
679 | */ |
680 | static int |
681 | vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec, |
682 | struct ww_acquire_ctx *acquire_ctx, |
683 | struct drm_syncobj *out_sync) |
684 | { |
685 | struct vc4_dev *vc4 = to_vc4_dev(dev); |
686 | struct vc4_exec_info *renderjob; |
687 | uint64_t seqno; |
688 | unsigned long irqflags; |
689 | struct vc4_fence *fence; |
690 | |
691 | fence = kzalloc(size: sizeof(*fence), GFP_KERNEL); |
692 | if (!fence) |
693 | return -ENOMEM; |
694 | fence->dev = dev; |
695 | |
696 | spin_lock_irqsave(&vc4->job_lock, irqflags); |
697 | |
698 | seqno = ++vc4->emit_seqno; |
699 | exec->seqno = seqno; |
700 | |
701 | dma_fence_init(fence: &fence->base, ops: &vc4_fence_ops, lock: &vc4->job_lock, |
702 | context: vc4->dma_fence_context, seqno: exec->seqno); |
703 | fence->seqno = exec->seqno; |
704 | exec->fence = &fence->base; |
705 | |
706 | if (out_sync) |
707 | drm_syncobj_replace_fence(syncobj: out_sync, fence: exec->fence); |
708 | |
709 | vc4_update_bo_seqnos(exec, seqno); |
710 | |
711 | vc4_unlock_bo_reservations(dev, exec, acquire_ctx); |
712 | |
713 | list_add_tail(new: &exec->head, head: &vc4->bin_job_list); |
714 | |
715 | /* If no bin job was executing and if the render job (if any) has the |
716 | * same perfmon as our job attached to it (or if both jobs don't have |
717 | * perfmon activated), then kick ours off. Otherwise, it'll get |
718 | * started when the previous job's flush/render done interrupt occurs. |
719 | */ |
720 | renderjob = vc4_first_render_job(vc4); |
721 | if (vc4_first_bin_job(vc4) == exec && |
722 | (!renderjob || renderjob->perfmon == exec->perfmon)) { |
723 | vc4_submit_next_bin_job(dev); |
724 | vc4_queue_hangcheck(dev); |
725 | } |
726 | |
727 | spin_unlock_irqrestore(lock: &vc4->job_lock, flags: irqflags); |
728 | |
729 | return 0; |
730 | } |
731 | |
732 | /** |
733 | * vc4_cl_lookup_bos() - Sets up exec->bo[] with the GEM objects |
734 | * referenced by the job. |
735 | * @dev: DRM device |
736 | * @file_priv: DRM file for this fd |
737 | * @exec: V3D job being set up |
738 | * |
739 | * The command validator needs to reference BOs by their index within |
740 | * the submitted job's BO list. This does the validation of the job's |
741 | * BO list and reference counting for the lifetime of the job. |
742 | */ |
743 | static int |
744 | vc4_cl_lookup_bos(struct drm_device *dev, |
745 | struct drm_file *file_priv, |
746 | struct vc4_exec_info *exec) |
747 | { |
748 | struct drm_vc4_submit_cl *args = exec->args; |
749 | int ret = 0; |
750 | int i; |
751 | |
752 | exec->bo_count = args->bo_handle_count; |
753 | |
754 | if (!exec->bo_count) { |
755 | /* See comment on bo_index for why we have to check |
756 | * this. |
757 | */ |
758 | DRM_DEBUG("Rendering requires BOs to validate\n" ); |
759 | return -EINVAL; |
760 | } |
761 | |
762 | ret = drm_gem_objects_lookup(filp: file_priv, u64_to_user_ptr(args->bo_handles), |
763 | count: exec->bo_count, objs_out: &exec->bo); |
764 | |
765 | if (ret) |
766 | goto fail_put_bo; |
767 | |
768 | for (i = 0; i < exec->bo_count; i++) { |
769 | ret = vc4_bo_inc_usecnt(to_vc4_bo(exec->bo[i])); |
770 | if (ret) |
771 | goto fail_dec_usecnt; |
772 | } |
773 | |
774 | return 0; |
775 | |
776 | fail_dec_usecnt: |
777 | /* Decrease usecnt on acquired objects. |
778 | * We cannot rely on vc4_complete_exec() to release resources here, |
779 | * because vc4_complete_exec() has no information about which BO has |
780 | * had its ->usecnt incremented. |
781 | * To make things easier we just free everything explicitly and set |
782 | * exec->bo to NULL so that vc4_complete_exec() skips the 'BO release' |
783 | * step. |
784 | */ |
785 | for (i-- ; i >= 0; i--) |
786 | vc4_bo_dec_usecnt(to_vc4_bo(exec->bo[i])); |
787 | |
788 | fail_put_bo: |
789 | /* Release any reference to acquired objects. */ |
790 | for (i = 0; i < exec->bo_count && exec->bo[i]; i++) |
791 | drm_gem_object_put(obj: exec->bo[i]); |
792 | |
793 | kvfree(addr: exec->bo); |
794 | exec->bo = NULL; |
795 | return ret; |
796 | } |
797 | |
798 | static int |
799 | vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec) |
800 | { |
801 | struct drm_vc4_submit_cl *args = exec->args; |
802 | struct vc4_dev *vc4 = to_vc4_dev(dev); |
803 | void *temp = NULL; |
804 | void *bin; |
805 | int ret = 0; |
806 | uint32_t bin_offset = 0; |
807 | uint32_t shader_rec_offset = roundup(bin_offset + args->bin_cl_size, |
808 | 16); |
809 | uint32_t uniforms_offset = shader_rec_offset + args->shader_rec_size; |
810 | uint32_t exec_size = uniforms_offset + args->uniforms_size; |
811 | uint32_t temp_size = exec_size + (sizeof(struct vc4_shader_state) * |
812 | args->shader_rec_count); |
813 | struct vc4_bo *bo; |
814 | |
815 | if (shader_rec_offset < args->bin_cl_size || |
816 | uniforms_offset < shader_rec_offset || |
817 | exec_size < uniforms_offset || |
818 | args->shader_rec_count >= (UINT_MAX / |
819 | sizeof(struct vc4_shader_state)) || |
820 | temp_size < exec_size) { |
821 | DRM_DEBUG("overflow in exec arguments\n" ); |
822 | ret = -EINVAL; |
823 | goto fail; |
824 | } |
825 | |
826 | /* Allocate space where we'll store the copied in user command lists |
827 | * and shader records. |
828 | * |
829 | * We don't just copy directly into the BOs because we need to |
830 | * read the contents back for validation, and I think the |
831 | * bo->vaddr is uncached access. |
832 | */ |
833 | temp = kvmalloc_array(n: temp_size, size: 1, GFP_KERNEL); |
834 | if (!temp) { |
835 | DRM_ERROR("Failed to allocate storage for copying " |
836 | "in bin/render CLs.\n" ); |
837 | ret = -ENOMEM; |
838 | goto fail; |
839 | } |
840 | bin = temp + bin_offset; |
841 | exec->shader_rec_u = temp + shader_rec_offset; |
842 | exec->uniforms_u = temp + uniforms_offset; |
843 | exec->shader_state = temp + exec_size; |
844 | exec->shader_state_size = args->shader_rec_count; |
845 | |
846 | if (copy_from_user(to: bin, |
847 | u64_to_user_ptr(args->bin_cl), |
848 | n: args->bin_cl_size)) { |
849 | ret = -EFAULT; |
850 | goto fail; |
851 | } |
852 | |
853 | if (copy_from_user(to: exec->shader_rec_u, |
854 | u64_to_user_ptr(args->shader_rec), |
855 | n: args->shader_rec_size)) { |
856 | ret = -EFAULT; |
857 | goto fail; |
858 | } |
859 | |
860 | if (copy_from_user(to: exec->uniforms_u, |
861 | u64_to_user_ptr(args->uniforms), |
862 | n: args->uniforms_size)) { |
863 | ret = -EFAULT; |
864 | goto fail; |
865 | } |
866 | |
867 | bo = vc4_bo_create(dev, size: exec_size, from_cache: true, type: VC4_BO_TYPE_BCL); |
868 | if (IS_ERR(ptr: bo)) { |
869 | DRM_ERROR("Couldn't allocate BO for binning\n" ); |
870 | ret = PTR_ERR(ptr: bo); |
871 | goto fail; |
872 | } |
873 | exec->exec_bo = &bo->base; |
874 | |
875 | list_add_tail(new: &to_vc4_bo(&exec->exec_bo->base)->unref_head, |
876 | head: &exec->unref_list); |
877 | |
878 | exec->ct0ca = exec->exec_bo->dma_addr + bin_offset; |
879 | |
880 | exec->bin_u = bin; |
881 | |
882 | exec->shader_rec_v = exec->exec_bo->vaddr + shader_rec_offset; |
883 | exec->shader_rec_p = exec->exec_bo->dma_addr + shader_rec_offset; |
884 | exec->shader_rec_size = args->shader_rec_size; |
885 | |
886 | exec->uniforms_v = exec->exec_bo->vaddr + uniforms_offset; |
887 | exec->uniforms_p = exec->exec_bo->dma_addr + uniforms_offset; |
888 | exec->uniforms_size = args->uniforms_size; |
889 | |
890 | ret = vc4_validate_bin_cl(dev, |
891 | validated: exec->exec_bo->vaddr + bin_offset, |
892 | unvalidated: bin, |
893 | exec); |
894 | if (ret) |
895 | goto fail; |
896 | |
897 | ret = vc4_validate_shader_recs(dev, exec); |
898 | if (ret) |
899 | goto fail; |
900 | |
901 | if (exec->found_tile_binning_mode_config_packet) { |
902 | ret = vc4_v3d_bin_bo_get(vc4, used: &exec->bin_bo_used); |
903 | if (ret) |
904 | goto fail; |
905 | } |
906 | |
907 | /* Block waiting on any previous rendering into the CS's VBO, |
908 | * IB, or textures, so that pixels are actually written by the |
909 | * time we try to read them. |
910 | */ |
911 | ret = vc4_wait_for_seqno(dev, seqno: exec->bin_dep_seqno, timeout_ns: ~0ull, interruptible: true); |
912 | |
913 | fail: |
914 | kvfree(addr: temp); |
915 | return ret; |
916 | } |
917 | |
918 | static void |
919 | vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec) |
920 | { |
921 | struct vc4_dev *vc4 = to_vc4_dev(dev); |
922 | unsigned long irqflags; |
923 | unsigned i; |
924 | |
925 | /* If we got force-completed because of GPU reset rather than |
926 | * through our IRQ handler, signal the fence now. |
927 | */ |
928 | if (exec->fence) { |
929 | dma_fence_signal(fence: exec->fence); |
930 | dma_fence_put(fence: exec->fence); |
931 | } |
932 | |
933 | if (exec->bo) { |
934 | for (i = 0; i < exec->bo_count; i++) { |
935 | struct vc4_bo *bo = to_vc4_bo(exec->bo[i]); |
936 | |
937 | vc4_bo_dec_usecnt(bo); |
938 | drm_gem_object_put(obj: exec->bo[i]); |
939 | } |
940 | kvfree(addr: exec->bo); |
941 | } |
942 | |
943 | while (!list_empty(head: &exec->unref_list)) { |
944 | struct vc4_bo *bo = list_first_entry(&exec->unref_list, |
945 | struct vc4_bo, unref_head); |
946 | list_del(entry: &bo->unref_head); |
947 | drm_gem_object_put(obj: &bo->base.base); |
948 | } |
949 | |
950 | /* Free up the allocation of any bin slots we used. */ |
951 | spin_lock_irqsave(&vc4->job_lock, irqflags); |
952 | vc4->bin_alloc_used &= ~exec->bin_slots; |
953 | spin_unlock_irqrestore(lock: &vc4->job_lock, flags: irqflags); |
954 | |
955 | /* Release the reference on the binner BO if needed. */ |
956 | if (exec->bin_bo_used) |
957 | vc4_v3d_bin_bo_put(vc4); |
958 | |
959 | /* Release the reference we had on the perf monitor. */ |
960 | vc4_perfmon_put(perfmon: exec->perfmon); |
961 | |
962 | vc4_v3d_pm_put(vc4); |
963 | |
964 | kfree(objp: exec); |
965 | } |
966 | |
967 | void |
968 | vc4_job_handle_completed(struct vc4_dev *vc4) |
969 | { |
970 | unsigned long irqflags; |
971 | struct vc4_seqno_cb *cb, *cb_temp; |
972 | |
973 | if (WARN_ON_ONCE(vc4->is_vc5)) |
974 | return; |
975 | |
976 | spin_lock_irqsave(&vc4->job_lock, irqflags); |
977 | while (!list_empty(head: &vc4->job_done_list)) { |
978 | struct vc4_exec_info *exec = |
979 | list_first_entry(&vc4->job_done_list, |
980 | struct vc4_exec_info, head); |
981 | list_del(entry: &exec->head); |
982 | |
983 | spin_unlock_irqrestore(lock: &vc4->job_lock, flags: irqflags); |
984 | vc4_complete_exec(dev: &vc4->base, exec); |
985 | spin_lock_irqsave(&vc4->job_lock, irqflags); |
986 | } |
987 | |
988 | list_for_each_entry_safe(cb, cb_temp, &vc4->seqno_cb_list, work.entry) { |
989 | if (cb->seqno <= vc4->finished_seqno) { |
990 | list_del_init(entry: &cb->work.entry); |
991 | schedule_work(work: &cb->work); |
992 | } |
993 | } |
994 | |
995 | spin_unlock_irqrestore(lock: &vc4->job_lock, flags: irqflags); |
996 | } |
997 | |
998 | static void vc4_seqno_cb_work(struct work_struct *work) |
999 | { |
1000 | struct vc4_seqno_cb *cb = container_of(work, struct vc4_seqno_cb, work); |
1001 | |
1002 | cb->func(cb); |
1003 | } |
1004 | |
1005 | int vc4_queue_seqno_cb(struct drm_device *dev, |
1006 | struct vc4_seqno_cb *cb, uint64_t seqno, |
1007 | void (*func)(struct vc4_seqno_cb *cb)) |
1008 | { |
1009 | struct vc4_dev *vc4 = to_vc4_dev(dev); |
1010 | unsigned long irqflags; |
1011 | |
1012 | if (WARN_ON_ONCE(vc4->is_vc5)) |
1013 | return -ENODEV; |
1014 | |
1015 | cb->func = func; |
1016 | INIT_WORK(&cb->work, vc4_seqno_cb_work); |
1017 | |
1018 | spin_lock_irqsave(&vc4->job_lock, irqflags); |
1019 | if (seqno > vc4->finished_seqno) { |
1020 | cb->seqno = seqno; |
1021 | list_add_tail(new: &cb->work.entry, head: &vc4->seqno_cb_list); |
1022 | } else { |
1023 | schedule_work(work: &cb->work); |
1024 | } |
1025 | spin_unlock_irqrestore(lock: &vc4->job_lock, flags: irqflags); |
1026 | |
1027 | return 0; |
1028 | } |
1029 | |
1030 | /* Scheduled when any job has been completed, this walks the list of |
1031 | * jobs that had completed and unrefs their BOs and frees their exec |
1032 | * structs. |
1033 | */ |
1034 | static void |
1035 | vc4_job_done_work(struct work_struct *work) |
1036 | { |
1037 | struct vc4_dev *vc4 = |
1038 | container_of(work, struct vc4_dev, job_done_work); |
1039 | |
1040 | vc4_job_handle_completed(vc4); |
1041 | } |
1042 | |
1043 | static int |
1044 | vc4_wait_for_seqno_ioctl_helper(struct drm_device *dev, |
1045 | uint64_t seqno, |
1046 | uint64_t *timeout_ns) |
1047 | { |
1048 | unsigned long start = jiffies; |
1049 | int ret = vc4_wait_for_seqno(dev, seqno, timeout_ns: *timeout_ns, interruptible: true); |
1050 | |
1051 | if ((ret == -EINTR || ret == -ERESTARTSYS) && *timeout_ns != ~0ull) { |
1052 | uint64_t delta = jiffies_to_nsecs(j: jiffies - start); |
1053 | |
1054 | if (*timeout_ns >= delta) |
1055 | *timeout_ns -= delta; |
1056 | } |
1057 | |
1058 | return ret; |
1059 | } |
1060 | |
1061 | int |
1062 | vc4_wait_seqno_ioctl(struct drm_device *dev, void *data, |
1063 | struct drm_file *file_priv) |
1064 | { |
1065 | struct vc4_dev *vc4 = to_vc4_dev(dev); |
1066 | struct drm_vc4_wait_seqno *args = data; |
1067 | |
1068 | if (WARN_ON_ONCE(vc4->is_vc5)) |
1069 | return -ENODEV; |
1070 | |
1071 | return vc4_wait_for_seqno_ioctl_helper(dev, seqno: args->seqno, |
1072 | timeout_ns: &args->timeout_ns); |
1073 | } |
1074 | |
1075 | int |
1076 | vc4_wait_bo_ioctl(struct drm_device *dev, void *data, |
1077 | struct drm_file *file_priv) |
1078 | { |
1079 | struct vc4_dev *vc4 = to_vc4_dev(dev); |
1080 | int ret; |
1081 | struct drm_vc4_wait_bo *args = data; |
1082 | struct drm_gem_object *gem_obj; |
1083 | struct vc4_bo *bo; |
1084 | |
1085 | if (WARN_ON_ONCE(vc4->is_vc5)) |
1086 | return -ENODEV; |
1087 | |
1088 | if (args->pad != 0) |
1089 | return -EINVAL; |
1090 | |
1091 | gem_obj = drm_gem_object_lookup(filp: file_priv, handle: args->handle); |
1092 | if (!gem_obj) { |
1093 | DRM_DEBUG("Failed to look up GEM BO %d\n" , args->handle); |
1094 | return -EINVAL; |
1095 | } |
1096 | bo = to_vc4_bo(gem_obj); |
1097 | |
1098 | ret = vc4_wait_for_seqno_ioctl_helper(dev, seqno: bo->seqno, |
1099 | timeout_ns: &args->timeout_ns); |
1100 | |
1101 | drm_gem_object_put(obj: gem_obj); |
1102 | return ret; |
1103 | } |
1104 | |
1105 | /** |
1106 | * vc4_submit_cl_ioctl() - Submits a job (frame) to the VC4. |
1107 | * @dev: DRM device |
1108 | * @data: ioctl argument |
1109 | * @file_priv: DRM file for this fd |
1110 | * |
1111 | * This is the main entrypoint for userspace to submit a 3D frame to |
1112 | * the GPU. Userspace provides the binner command list (if |
1113 | * applicable), and the kernel sets up the render command list to draw |
1114 | * to the framebuffer described in the ioctl, using the command lists |
1115 | * that the 3D engine's binner will produce. |
1116 | */ |
1117 | int |
1118 | vc4_submit_cl_ioctl(struct drm_device *dev, void *data, |
1119 | struct drm_file *file_priv) |
1120 | { |
1121 | struct vc4_dev *vc4 = to_vc4_dev(dev); |
1122 | struct vc4_file *vc4file = file_priv->driver_priv; |
1123 | struct drm_vc4_submit_cl *args = data; |
1124 | struct drm_syncobj *out_sync = NULL; |
1125 | struct vc4_exec_info *exec; |
1126 | struct ww_acquire_ctx acquire_ctx; |
1127 | struct dma_fence *in_fence; |
1128 | int ret = 0; |
1129 | |
1130 | trace_vc4_submit_cl_ioctl(dev, bin_cl_size: args->bin_cl_size, |
1131 | shader_rec_size: args->shader_rec_size, |
1132 | bo_count: args->bo_handle_count); |
1133 | |
1134 | if (WARN_ON_ONCE(vc4->is_vc5)) |
1135 | return -ENODEV; |
1136 | |
1137 | if (!vc4->v3d) { |
1138 | DRM_DEBUG("VC4_SUBMIT_CL with no VC4 V3D probed\n" ); |
1139 | return -ENODEV; |
1140 | } |
1141 | |
1142 | if ((args->flags & ~(VC4_SUBMIT_CL_USE_CLEAR_COLOR | |
1143 | VC4_SUBMIT_CL_FIXED_RCL_ORDER | |
1144 | VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X | |
1145 | VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y)) != 0) { |
1146 | DRM_DEBUG("Unknown flags: 0x%02x\n" , args->flags); |
1147 | return -EINVAL; |
1148 | } |
1149 | |
1150 | if (args->pad2 != 0) { |
1151 | DRM_DEBUG("Invalid pad: 0x%08x\n" , args->pad2); |
1152 | return -EINVAL; |
1153 | } |
1154 | |
1155 | exec = kcalloc(n: 1, size: sizeof(*exec), GFP_KERNEL); |
1156 | if (!exec) { |
1157 | DRM_ERROR("malloc failure on exec struct\n" ); |
1158 | return -ENOMEM; |
1159 | } |
1160 | exec->dev = vc4; |
1161 | |
1162 | ret = vc4_v3d_pm_get(vc4); |
1163 | if (ret) { |
1164 | kfree(objp: exec); |
1165 | return ret; |
1166 | } |
1167 | |
1168 | exec->args = args; |
1169 | INIT_LIST_HEAD(list: &exec->unref_list); |
1170 | |
1171 | ret = vc4_cl_lookup_bos(dev, file_priv, exec); |
1172 | if (ret) |
1173 | goto fail; |
1174 | |
1175 | if (args->perfmonid) { |
1176 | exec->perfmon = vc4_perfmon_find(vc4file, |
1177 | id: args->perfmonid); |
1178 | if (!exec->perfmon) { |
1179 | ret = -ENOENT; |
1180 | goto fail; |
1181 | } |
1182 | } |
1183 | |
1184 | if (args->in_sync) { |
1185 | ret = drm_syncobj_find_fence(file_private: file_priv, handle: args->in_sync, |
1186 | point: 0, flags: 0, fence: &in_fence); |
1187 | if (ret) |
1188 | goto fail; |
1189 | |
1190 | /* When the fence (or fence array) is exclusively from our |
1191 | * context we can skip the wait since jobs are executed in |
1192 | * order of their submission through this ioctl and this can |
1193 | * only have fences from a prior job. |
1194 | */ |
1195 | if (!dma_fence_match_context(fence: in_fence, |
1196 | context: vc4->dma_fence_context)) { |
1197 | ret = dma_fence_wait(fence: in_fence, intr: true); |
1198 | if (ret) { |
1199 | dma_fence_put(fence: in_fence); |
1200 | goto fail; |
1201 | } |
1202 | } |
1203 | |
1204 | dma_fence_put(fence: in_fence); |
1205 | } |
1206 | |
1207 | if (exec->args->bin_cl_size != 0) { |
1208 | ret = vc4_get_bcl(dev, exec); |
1209 | if (ret) |
1210 | goto fail; |
1211 | } else { |
1212 | exec->ct0ca = 0; |
1213 | exec->ct0ea = 0; |
1214 | } |
1215 | |
1216 | ret = vc4_get_rcl(dev, exec); |
1217 | if (ret) |
1218 | goto fail; |
1219 | |
1220 | ret = vc4_lock_bo_reservations(dev, exec, acquire_ctx: &acquire_ctx); |
1221 | if (ret) |
1222 | goto fail; |
1223 | |
1224 | if (args->out_sync) { |
1225 | out_sync = drm_syncobj_find(file_private: file_priv, handle: args->out_sync); |
1226 | if (!out_sync) { |
1227 | ret = -EINVAL; |
1228 | goto fail; |
1229 | } |
1230 | |
1231 | /* We replace the fence in out_sync in vc4_queue_submit since |
1232 | * the render job could execute immediately after that call. |
1233 | * If it finishes before our ioctl processing resumes the |
1234 | * render job fence could already have been freed. |
1235 | */ |
1236 | } |
1237 | |
1238 | /* Clear this out of the struct we'll be putting in the queue, |
1239 | * since it's part of our stack. |
1240 | */ |
1241 | exec->args = NULL; |
1242 | |
1243 | ret = vc4_queue_submit(dev, exec, acquire_ctx: &acquire_ctx, out_sync); |
1244 | |
1245 | /* The syncobj isn't part of the exec data and we need to free our |
1246 | * reference even if job submission failed. |
1247 | */ |
1248 | if (out_sync) |
1249 | drm_syncobj_put(obj: out_sync); |
1250 | |
1251 | if (ret) |
1252 | goto fail; |
1253 | |
1254 | /* Return the seqno for our job. */ |
1255 | args->seqno = vc4->emit_seqno; |
1256 | |
1257 | return 0; |
1258 | |
1259 | fail: |
1260 | vc4_complete_exec(dev: &vc4->base, exec); |
1261 | |
1262 | return ret; |
1263 | } |
1264 | |
1265 | static void vc4_gem_destroy(struct drm_device *dev, void *unused); |
1266 | int vc4_gem_init(struct drm_device *dev) |
1267 | { |
1268 | struct vc4_dev *vc4 = to_vc4_dev(dev); |
1269 | int ret; |
1270 | |
1271 | if (WARN_ON_ONCE(vc4->is_vc5)) |
1272 | return -ENODEV; |
1273 | |
1274 | vc4->dma_fence_context = dma_fence_context_alloc(num: 1); |
1275 | |
1276 | INIT_LIST_HEAD(list: &vc4->bin_job_list); |
1277 | INIT_LIST_HEAD(list: &vc4->render_job_list); |
1278 | INIT_LIST_HEAD(list: &vc4->job_done_list); |
1279 | INIT_LIST_HEAD(list: &vc4->seqno_cb_list); |
1280 | spin_lock_init(&vc4->job_lock); |
1281 | |
1282 | INIT_WORK(&vc4->hangcheck.reset_work, vc4_reset_work); |
1283 | timer_setup(&vc4->hangcheck.timer, vc4_hangcheck_elapsed, 0); |
1284 | |
1285 | INIT_WORK(&vc4->job_done_work, vc4_job_done_work); |
1286 | |
1287 | ret = drmm_mutex_init(dev, &vc4->power_lock); |
1288 | if (ret) |
1289 | return ret; |
1290 | |
1291 | INIT_LIST_HEAD(list: &vc4->purgeable.list); |
1292 | |
1293 | ret = drmm_mutex_init(dev, &vc4->purgeable.lock); |
1294 | if (ret) |
1295 | return ret; |
1296 | |
1297 | return drmm_add_action_or_reset(dev, vc4_gem_destroy, NULL); |
1298 | } |
1299 | |
1300 | static void vc4_gem_destroy(struct drm_device *dev, void *unused) |
1301 | { |
1302 | struct vc4_dev *vc4 = to_vc4_dev(dev); |
1303 | |
1304 | /* Waiting for exec to finish would need to be done before |
1305 | * unregistering V3D. |
1306 | */ |
1307 | WARN_ON(vc4->emit_seqno != vc4->finished_seqno); |
1308 | |
1309 | /* V3D should already have disabled its interrupt and cleared |
1310 | * the overflow allocation registers. Now free the object. |
1311 | */ |
1312 | if (vc4->bin_bo) { |
1313 | drm_gem_object_put(obj: &vc4->bin_bo->base.base); |
1314 | vc4->bin_bo = NULL; |
1315 | } |
1316 | |
1317 | if (vc4->hang_state) |
1318 | vc4_free_hang_state(dev, state: vc4->hang_state); |
1319 | } |
1320 | |
1321 | int vc4_gem_madvise_ioctl(struct drm_device *dev, void *data, |
1322 | struct drm_file *file_priv) |
1323 | { |
1324 | struct vc4_dev *vc4 = to_vc4_dev(dev); |
1325 | struct drm_vc4_gem_madvise *args = data; |
1326 | struct drm_gem_object *gem_obj; |
1327 | struct vc4_bo *bo; |
1328 | int ret; |
1329 | |
1330 | if (WARN_ON_ONCE(vc4->is_vc5)) |
1331 | return -ENODEV; |
1332 | |
1333 | switch (args->madv) { |
1334 | case VC4_MADV_DONTNEED: |
1335 | case VC4_MADV_WILLNEED: |
1336 | break; |
1337 | default: |
1338 | return -EINVAL; |
1339 | } |
1340 | |
1341 | if (args->pad != 0) |
1342 | return -EINVAL; |
1343 | |
1344 | gem_obj = drm_gem_object_lookup(filp: file_priv, handle: args->handle); |
1345 | if (!gem_obj) { |
1346 | DRM_DEBUG("Failed to look up GEM BO %d\n" , args->handle); |
1347 | return -ENOENT; |
1348 | } |
1349 | |
1350 | bo = to_vc4_bo(gem_obj); |
1351 | |
1352 | /* Only BOs exposed to userspace can be purged. */ |
1353 | if (bo->madv == __VC4_MADV_NOTSUPP) { |
1354 | DRM_DEBUG("madvise not supported on this BO\n" ); |
1355 | ret = -EINVAL; |
1356 | goto out_put_gem; |
1357 | } |
1358 | |
1359 | /* Not sure it's safe to purge imported BOs. Let's just assume it's |
1360 | * not until proven otherwise. |
1361 | */ |
1362 | if (gem_obj->import_attach) { |
1363 | DRM_DEBUG("madvise not supported on imported BOs\n" ); |
1364 | ret = -EINVAL; |
1365 | goto out_put_gem; |
1366 | } |
1367 | |
1368 | mutex_lock(&bo->madv_lock); |
1369 | |
1370 | if (args->madv == VC4_MADV_DONTNEED && bo->madv == VC4_MADV_WILLNEED && |
1371 | !refcount_read(r: &bo->usecnt)) { |
1372 | /* If the BO is about to be marked as purgeable, is not used |
1373 | * and is not already purgeable or purged, add it to the |
1374 | * purgeable list. |
1375 | */ |
1376 | vc4_bo_add_to_purgeable_pool(bo); |
1377 | } else if (args->madv == VC4_MADV_WILLNEED && |
1378 | bo->madv == VC4_MADV_DONTNEED && |
1379 | !refcount_read(r: &bo->usecnt)) { |
1380 | /* The BO has not been purged yet, just remove it from |
1381 | * the purgeable list. |
1382 | */ |
1383 | vc4_bo_remove_from_purgeable_pool(bo); |
1384 | } |
1385 | |
1386 | /* Save the purged state. */ |
1387 | args->retained = bo->madv != __VC4_MADV_PURGED; |
1388 | |
1389 | /* Update internal madv state only if the bo was not purged. */ |
1390 | if (bo->madv != __VC4_MADV_PURGED) |
1391 | bo->madv = args->madv; |
1392 | |
1393 | mutex_unlock(lock: &bo->madv_lock); |
1394 | |
1395 | ret = 0; |
1396 | |
1397 | out_put_gem: |
1398 | drm_gem_object_put(obj: gem_obj); |
1399 | |
1400 | return ret; |
1401 | } |
1402 | |