1 | // SPDX-License-Identifier: GPL-2.0+ |
2 | /* Copyright (C) 2014-2018 Broadcom */ |
3 | |
4 | #include <linux/device.h> |
5 | #include <linux/dma-mapping.h> |
6 | #include <linux/io.h> |
7 | #include <linux/module.h> |
8 | #include <linux/platform_device.h> |
9 | #include <linux/reset.h> |
10 | #include <linux/sched/signal.h> |
11 | #include <linux/uaccess.h> |
12 | |
13 | #include <drm/drm_managed.h> |
14 | #include <drm/drm_syncobj.h> |
15 | #include <uapi/drm/v3d_drm.h> |
16 | |
17 | #include "v3d_drv.h" |
18 | #include "v3d_regs.h" |
19 | #include "v3d_trace.h" |
20 | |
21 | static void |
22 | v3d_init_core(struct v3d_dev *v3d, int core) |
23 | { |
24 | /* Set OVRTMUOUT, which means that the texture sampler uniform |
25 | * configuration's tmu output type field is used, instead of |
26 | * using the hardware default behavior based on the texture |
27 | * type. If you want the default behavior, you can still put |
28 | * "2" in the indirect texture state's output_type field. |
29 | */ |
30 | if (v3d->ver < 40) |
31 | V3D_CORE_WRITE(core, V3D_CTL_MISCCFG, V3D_MISCCFG_OVRTMUOUT); |
32 | |
33 | /* Whenever we flush the L2T cache, we always want to flush |
34 | * the whole thing. |
35 | */ |
36 | V3D_CORE_WRITE(core, V3D_CTL_L2TFLSTA, 0); |
37 | V3D_CORE_WRITE(core, V3D_CTL_L2TFLEND, ~0); |
38 | } |
39 | |
40 | /* Sets invariant state for the HW. */ |
41 | static void |
42 | v3d_init_hw_state(struct v3d_dev *v3d) |
43 | { |
44 | v3d_init_core(v3d, core: 0); |
45 | } |
46 | |
47 | static void |
48 | v3d_idle_axi(struct v3d_dev *v3d, int core) |
49 | { |
50 | V3D_CORE_WRITE(core, V3D_GMP_CFG, V3D_GMP_CFG_STOP_REQ); |
51 | |
52 | if (wait_for((V3D_CORE_READ(core, V3D_GMP_STATUS) & |
53 | (V3D_GMP_STATUS_RD_COUNT_MASK | |
54 | V3D_GMP_STATUS_WR_COUNT_MASK | |
55 | V3D_GMP_STATUS_CFG_BUSY)) == 0, 100)) { |
56 | DRM_ERROR("Failed to wait for safe GMP shutdown\n" ); |
57 | } |
58 | } |
59 | |
60 | static void |
61 | v3d_idle_gca(struct v3d_dev *v3d) |
62 | { |
63 | if (v3d->ver >= 41) |
64 | return; |
65 | |
66 | V3D_GCA_WRITE(V3D_GCA_SAFE_SHUTDOWN, V3D_GCA_SAFE_SHUTDOWN_EN); |
67 | |
68 | if (wait_for((V3D_GCA_READ(V3D_GCA_SAFE_SHUTDOWN_ACK) & |
69 | V3D_GCA_SAFE_SHUTDOWN_ACK_ACKED) == |
70 | V3D_GCA_SAFE_SHUTDOWN_ACK_ACKED, 100)) { |
71 | DRM_ERROR("Failed to wait for safe GCA shutdown\n" ); |
72 | } |
73 | } |
74 | |
75 | static void |
76 | v3d_reset_by_bridge(struct v3d_dev *v3d) |
77 | { |
78 | int version = V3D_BRIDGE_READ(V3D_TOP_GR_BRIDGE_REVISION); |
79 | |
80 | if (V3D_GET_FIELD(version, V3D_TOP_GR_BRIDGE_MAJOR) == 2) { |
81 | V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_0, |
82 | V3D_TOP_GR_BRIDGE_SW_INIT_0_V3D_CLK_108_SW_INIT); |
83 | V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_0, 0); |
84 | |
85 | /* GFXH-1383: The SW_INIT may cause a stray write to address 0 |
86 | * of the unit, so reset it to its power-on value here. |
87 | */ |
88 | V3D_WRITE(V3D_HUB_AXICFG, V3D_HUB_AXICFG_MAX_LEN_MASK); |
89 | } else { |
90 | WARN_ON_ONCE(V3D_GET_FIELD(version, |
91 | V3D_TOP_GR_BRIDGE_MAJOR) != 7); |
92 | V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_1, |
93 | V3D_TOP_GR_BRIDGE_SW_INIT_1_V3D_CLK_108_SW_INIT); |
94 | V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_1, 0); |
95 | } |
96 | } |
97 | |
98 | static void |
99 | v3d_reset_v3d(struct v3d_dev *v3d) |
100 | { |
101 | if (v3d->reset) |
102 | reset_control_reset(rstc: v3d->reset); |
103 | else |
104 | v3d_reset_by_bridge(v3d); |
105 | |
106 | v3d_init_hw_state(v3d); |
107 | } |
108 | |
109 | void |
110 | v3d_reset(struct v3d_dev *v3d) |
111 | { |
112 | struct drm_device *dev = &v3d->drm; |
113 | |
114 | DRM_DEV_ERROR(dev->dev, "Resetting GPU for hang.\n" ); |
115 | DRM_DEV_ERROR(dev->dev, "V3D_ERR_STAT: 0x%08x\n" , |
116 | V3D_CORE_READ(0, V3D_ERR_STAT)); |
117 | trace_v3d_reset_begin(dev); |
118 | |
119 | /* XXX: only needed for safe powerdown, not reset. */ |
120 | if (false) |
121 | v3d_idle_axi(v3d, core: 0); |
122 | |
123 | v3d_idle_gca(v3d); |
124 | v3d_reset_v3d(v3d); |
125 | |
126 | v3d_mmu_set_page_table(v3d); |
127 | v3d_irq_reset(v3d); |
128 | |
129 | v3d_perfmon_stop(v3d, perfmon: v3d->active_perfmon, capture: false); |
130 | |
131 | trace_v3d_reset_end(dev); |
132 | } |
133 | |
134 | static void |
135 | v3d_flush_l3(struct v3d_dev *v3d) |
136 | { |
137 | if (v3d->ver < 41) { |
138 | u32 gca_ctrl = V3D_GCA_READ(V3D_GCA_CACHE_CTRL); |
139 | |
140 | V3D_GCA_WRITE(V3D_GCA_CACHE_CTRL, |
141 | gca_ctrl | V3D_GCA_CACHE_CTRL_FLUSH); |
142 | |
143 | if (v3d->ver < 33) { |
144 | V3D_GCA_WRITE(V3D_GCA_CACHE_CTRL, |
145 | gca_ctrl & ~V3D_GCA_CACHE_CTRL_FLUSH); |
146 | } |
147 | } |
148 | } |
149 | |
150 | /* Invalidates the (read-only) L2C cache. This was the L2 cache for |
151 | * uniforms and instructions on V3D 3.2. |
152 | */ |
153 | static void |
154 | v3d_invalidate_l2c(struct v3d_dev *v3d, int core) |
155 | { |
156 | if (v3d->ver > 32) |
157 | return; |
158 | |
159 | V3D_CORE_WRITE(core, V3D_CTL_L2CACTL, |
160 | V3D_L2CACTL_L2CCLR | |
161 | V3D_L2CACTL_L2CENA); |
162 | } |
163 | |
164 | /* Invalidates texture L2 cachelines */ |
165 | static void |
166 | v3d_flush_l2t(struct v3d_dev *v3d, int core) |
167 | { |
168 | /* While there is a busy bit (V3D_L2TCACTL_L2TFLS), we don't |
169 | * need to wait for completion before dispatching the job -- |
170 | * L2T accesses will be stalled until the flush has completed. |
171 | * However, we do need to make sure we don't try to trigger a |
172 | * new flush while the L2_CLEAN queue is trying to |
173 | * synchronously clean after a job. |
174 | */ |
175 | mutex_lock(&v3d->cache_clean_lock); |
176 | V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL, |
177 | V3D_L2TCACTL_L2TFLS | |
178 | V3D_SET_FIELD(V3D_L2TCACTL_FLM_FLUSH, V3D_L2TCACTL_FLM)); |
179 | mutex_unlock(lock: &v3d->cache_clean_lock); |
180 | } |
181 | |
182 | /* Cleans texture L1 and L2 cachelines (writing back dirty data). |
183 | * |
184 | * For cleaning, which happens from the CACHE_CLEAN queue after CSD has |
185 | * executed, we need to make sure that the clean is done before |
186 | * signaling job completion. So, we synchronously wait before |
187 | * returning, and we make sure that L2 invalidates don't happen in the |
188 | * meantime to confuse our are-we-done checks. |
189 | */ |
190 | void |
191 | v3d_clean_caches(struct v3d_dev *v3d) |
192 | { |
193 | struct drm_device *dev = &v3d->drm; |
194 | int core = 0; |
195 | |
196 | trace_v3d_cache_clean_begin(dev); |
197 | |
198 | V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL, V3D_L2TCACTL_TMUWCF); |
199 | if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) & |
200 | V3D_L2TCACTL_TMUWCF), 100)) { |
201 | DRM_ERROR("Timeout waiting for TMU write combiner flush\n" ); |
202 | } |
203 | |
204 | mutex_lock(&v3d->cache_clean_lock); |
205 | V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL, |
206 | V3D_L2TCACTL_L2TFLS | |
207 | V3D_SET_FIELD(V3D_L2TCACTL_FLM_CLEAN, V3D_L2TCACTL_FLM)); |
208 | |
209 | if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) & |
210 | V3D_L2TCACTL_L2TFLS), 100)) { |
211 | DRM_ERROR("Timeout waiting for L2T clean\n" ); |
212 | } |
213 | |
214 | mutex_unlock(lock: &v3d->cache_clean_lock); |
215 | |
216 | trace_v3d_cache_clean_end(dev); |
217 | } |
218 | |
219 | /* Invalidates the slice caches. These are read-only caches. */ |
220 | static void |
221 | v3d_invalidate_slices(struct v3d_dev *v3d, int core) |
222 | { |
223 | V3D_CORE_WRITE(core, V3D_CTL_SLCACTL, |
224 | V3D_SET_FIELD(0xf, V3D_SLCACTL_TVCCS) | |
225 | V3D_SET_FIELD(0xf, V3D_SLCACTL_TDCCS) | |
226 | V3D_SET_FIELD(0xf, V3D_SLCACTL_UCC) | |
227 | V3D_SET_FIELD(0xf, V3D_SLCACTL_ICC)); |
228 | } |
229 | |
230 | void |
231 | v3d_invalidate_caches(struct v3d_dev *v3d) |
232 | { |
233 | /* Invalidate the caches from the outside in. That way if |
234 | * another CL's concurrent use of nearby memory were to pull |
235 | * an invalidated cacheline back in, we wouldn't leave stale |
236 | * data in the inner cache. |
237 | */ |
238 | v3d_flush_l3(v3d); |
239 | v3d_invalidate_l2c(v3d, core: 0); |
240 | v3d_flush_l2t(v3d, core: 0); |
241 | v3d_invalidate_slices(v3d, core: 0); |
242 | } |
243 | |
244 | /* Takes the reservation lock on all the BOs being referenced, so that |
245 | * at queue submit time we can update the reservations. |
246 | * |
247 | * We don't lock the RCL the tile alloc/state BOs, or overflow memory |
248 | * (all of which are on exec->unref_list). They're entirely private |
249 | * to v3d, so we don't attach dma-buf fences to them. |
250 | */ |
251 | static int |
252 | v3d_lock_bo_reservations(struct v3d_job *job, |
253 | struct ww_acquire_ctx *acquire_ctx) |
254 | { |
255 | int i, ret; |
256 | |
257 | ret = drm_gem_lock_reservations(objs: job->bo, count: job->bo_count, acquire_ctx); |
258 | if (ret) |
259 | return ret; |
260 | |
261 | for (i = 0; i < job->bo_count; i++) { |
262 | ret = dma_resv_reserve_fences(obj: job->bo[i]->resv, num_fences: 1); |
263 | if (ret) |
264 | goto fail; |
265 | |
266 | ret = drm_sched_job_add_implicit_dependencies(job: &job->base, |
267 | obj: job->bo[i], write: true); |
268 | if (ret) |
269 | goto fail; |
270 | } |
271 | |
272 | return 0; |
273 | |
274 | fail: |
275 | drm_gem_unlock_reservations(objs: job->bo, count: job->bo_count, acquire_ctx); |
276 | return ret; |
277 | } |
278 | |
279 | /** |
280 | * v3d_lookup_bos() - Sets up job->bo[] with the GEM objects |
281 | * referenced by the job. |
282 | * @dev: DRM device |
283 | * @file_priv: DRM file for this fd |
284 | * @job: V3D job being set up |
285 | * @bo_handles: GEM handles |
286 | * @bo_count: Number of GEM handles passed in |
287 | * |
288 | * The command validator needs to reference BOs by their index within |
289 | * the submitted job's BO list. This does the validation of the job's |
290 | * BO list and reference counting for the lifetime of the job. |
291 | * |
292 | * Note that this function doesn't need to unreference the BOs on |
293 | * failure, because that will happen at v3d_exec_cleanup() time. |
294 | */ |
295 | static int |
296 | v3d_lookup_bos(struct drm_device *dev, |
297 | struct drm_file *file_priv, |
298 | struct v3d_job *job, |
299 | u64 bo_handles, |
300 | u32 bo_count) |
301 | { |
302 | job->bo_count = bo_count; |
303 | |
304 | if (!job->bo_count) { |
305 | /* See comment on bo_index for why we have to check |
306 | * this. |
307 | */ |
308 | DRM_DEBUG("Rendering requires BOs\n" ); |
309 | return -EINVAL; |
310 | } |
311 | |
312 | return drm_gem_objects_lookup(filp: file_priv, |
313 | bo_handles: (void __user *)(uintptr_t)bo_handles, |
314 | count: job->bo_count, objs_out: &job->bo); |
315 | } |
316 | |
317 | static void |
318 | v3d_job_free(struct kref *ref) |
319 | { |
320 | struct v3d_job *job = container_of(ref, struct v3d_job, refcount); |
321 | int i; |
322 | |
323 | if (job->bo) { |
324 | for (i = 0; i < job->bo_count; i++) |
325 | drm_gem_object_put(obj: job->bo[i]); |
326 | kvfree(addr: job->bo); |
327 | } |
328 | |
329 | dma_fence_put(fence: job->irq_fence); |
330 | dma_fence_put(fence: job->done_fence); |
331 | |
332 | if (job->perfmon) |
333 | v3d_perfmon_put(perfmon: job->perfmon); |
334 | |
335 | kfree(objp: job); |
336 | } |
337 | |
338 | static void |
339 | v3d_render_job_free(struct kref *ref) |
340 | { |
341 | struct v3d_render_job *job = container_of(ref, struct v3d_render_job, |
342 | base.refcount); |
343 | struct v3d_bo *bo, *save; |
344 | |
345 | list_for_each_entry_safe(bo, save, &job->unref_list, unref_head) { |
346 | drm_gem_object_put(obj: &bo->base.base); |
347 | } |
348 | |
349 | v3d_job_free(ref); |
350 | } |
351 | |
352 | void v3d_job_cleanup(struct v3d_job *job) |
353 | { |
354 | if (!job) |
355 | return; |
356 | |
357 | drm_sched_job_cleanup(job: &job->base); |
358 | v3d_job_put(job); |
359 | } |
360 | |
361 | void v3d_job_put(struct v3d_job *job) |
362 | { |
363 | kref_put(kref: &job->refcount, release: job->free); |
364 | } |
365 | |
366 | int |
367 | v3d_wait_bo_ioctl(struct drm_device *dev, void *data, |
368 | struct drm_file *file_priv) |
369 | { |
370 | int ret; |
371 | struct drm_v3d_wait_bo *args = data; |
372 | ktime_t start = ktime_get(); |
373 | u64 delta_ns; |
374 | unsigned long timeout_jiffies = |
375 | nsecs_to_jiffies_timeout(n: args->timeout_ns); |
376 | |
377 | if (args->pad != 0) |
378 | return -EINVAL; |
379 | |
380 | ret = drm_gem_dma_resv_wait(filep: file_priv, handle: args->handle, |
381 | wait_all: true, timeout: timeout_jiffies); |
382 | |
383 | /* Decrement the user's timeout, in case we got interrupted |
384 | * such that the ioctl will be restarted. |
385 | */ |
386 | delta_ns = ktime_to_ns(ktime_sub(ktime_get(), start)); |
387 | if (delta_ns < args->timeout_ns) |
388 | args->timeout_ns -= delta_ns; |
389 | else |
390 | args->timeout_ns = 0; |
391 | |
392 | /* Asked to wait beyond the jiffie/scheduler precision? */ |
393 | if (ret == -ETIME && args->timeout_ns) |
394 | ret = -EAGAIN; |
395 | |
396 | return ret; |
397 | } |
398 | |
399 | static int |
400 | v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv, |
401 | void **container, size_t size, void (*free)(struct kref *ref), |
402 | u32 in_sync, struct v3d_submit_ext *se, enum v3d_queue queue) |
403 | { |
404 | struct v3d_file_priv *v3d_priv = file_priv->driver_priv; |
405 | struct v3d_job *job; |
406 | bool has_multisync = se && (se->flags & DRM_V3D_EXT_ID_MULTI_SYNC); |
407 | int ret, i; |
408 | |
409 | *container = kcalloc(n: 1, size, GFP_KERNEL); |
410 | if (!*container) { |
411 | DRM_ERROR("Cannot allocate memory for v3d job." ); |
412 | return -ENOMEM; |
413 | } |
414 | |
415 | job = *container; |
416 | job->v3d = v3d; |
417 | job->free = free; |
418 | |
419 | ret = drm_sched_job_init(job: &job->base, entity: &v3d_priv->sched_entity[queue], |
420 | owner: v3d_priv); |
421 | if (ret) |
422 | goto fail; |
423 | |
424 | if (has_multisync) { |
425 | if (se->in_sync_count && se->wait_stage == queue) { |
426 | struct drm_v3d_sem __user *handle = u64_to_user_ptr(se->in_syncs); |
427 | |
428 | for (i = 0; i < se->in_sync_count; i++) { |
429 | struct drm_v3d_sem in; |
430 | |
431 | if (copy_from_user(to: &in, from: handle++, n: sizeof(in))) { |
432 | ret = -EFAULT; |
433 | DRM_DEBUG("Failed to copy wait dep handle.\n" ); |
434 | goto fail_deps; |
435 | } |
436 | ret = drm_sched_job_add_syncobj_dependency(job: &job->base, file: file_priv, handle: in.handle, point: 0); |
437 | |
438 | // TODO: Investigate why this was filtered out for the IOCTL. |
439 | if (ret && ret != -ENOENT) |
440 | goto fail_deps; |
441 | } |
442 | } |
443 | } else { |
444 | ret = drm_sched_job_add_syncobj_dependency(job: &job->base, file: file_priv, handle: in_sync, point: 0); |
445 | |
446 | // TODO: Investigate why this was filtered out for the IOCTL. |
447 | if (ret && ret != -ENOENT) |
448 | goto fail_deps; |
449 | } |
450 | |
451 | kref_init(kref: &job->refcount); |
452 | |
453 | return 0; |
454 | |
455 | fail_deps: |
456 | drm_sched_job_cleanup(job: &job->base); |
457 | fail: |
458 | kfree(objp: *container); |
459 | *container = NULL; |
460 | |
461 | return ret; |
462 | } |
463 | |
464 | static void |
465 | v3d_push_job(struct v3d_job *job) |
466 | { |
467 | drm_sched_job_arm(job: &job->base); |
468 | |
469 | job->done_fence = dma_fence_get(fence: &job->base.s_fence->finished); |
470 | |
471 | /* put by scheduler job completion */ |
472 | kref_get(kref: &job->refcount); |
473 | |
474 | drm_sched_entity_push_job(sched_job: &job->base); |
475 | } |
476 | |
477 | static void |
478 | v3d_attach_fences_and_unlock_reservation(struct drm_file *file_priv, |
479 | struct v3d_job *job, |
480 | struct ww_acquire_ctx *acquire_ctx, |
481 | u32 out_sync, |
482 | struct v3d_submit_ext *se, |
483 | struct dma_fence *done_fence) |
484 | { |
485 | struct drm_syncobj *sync_out; |
486 | bool has_multisync = se && (se->flags & DRM_V3D_EXT_ID_MULTI_SYNC); |
487 | int i; |
488 | |
489 | for (i = 0; i < job->bo_count; i++) { |
490 | /* XXX: Use shared fences for read-only objects. */ |
491 | dma_resv_add_fence(obj: job->bo[i]->resv, fence: job->done_fence, |
492 | usage: DMA_RESV_USAGE_WRITE); |
493 | } |
494 | |
495 | drm_gem_unlock_reservations(objs: job->bo, count: job->bo_count, acquire_ctx); |
496 | |
497 | /* Update the return sync object for the job */ |
498 | /* If it only supports a single signal semaphore*/ |
499 | if (!has_multisync) { |
500 | sync_out = drm_syncobj_find(file_private: file_priv, handle: out_sync); |
501 | if (sync_out) { |
502 | drm_syncobj_replace_fence(syncobj: sync_out, fence: done_fence); |
503 | drm_syncobj_put(obj: sync_out); |
504 | } |
505 | return; |
506 | } |
507 | |
508 | /* If multiple semaphores extension is supported */ |
509 | if (se->out_sync_count) { |
510 | for (i = 0; i < se->out_sync_count; i++) { |
511 | drm_syncobj_replace_fence(syncobj: se->out_syncs[i].syncobj, |
512 | fence: done_fence); |
513 | drm_syncobj_put(obj: se->out_syncs[i].syncobj); |
514 | } |
515 | kvfree(addr: se->out_syncs); |
516 | } |
517 | } |
518 | |
519 | static void |
520 | v3d_put_multisync_post_deps(struct v3d_submit_ext *se) |
521 | { |
522 | unsigned int i; |
523 | |
524 | if (!(se && se->out_sync_count)) |
525 | return; |
526 | |
527 | for (i = 0; i < se->out_sync_count; i++) |
528 | drm_syncobj_put(obj: se->out_syncs[i].syncobj); |
529 | kvfree(addr: se->out_syncs); |
530 | } |
531 | |
532 | static int |
533 | v3d_get_multisync_post_deps(struct drm_file *file_priv, |
534 | struct v3d_submit_ext *se, |
535 | u32 count, u64 handles) |
536 | { |
537 | struct drm_v3d_sem __user *post_deps; |
538 | int i, ret; |
539 | |
540 | if (!count) |
541 | return 0; |
542 | |
543 | se->out_syncs = (struct v3d_submit_outsync *) |
544 | kvmalloc_array(n: count, |
545 | size: sizeof(struct v3d_submit_outsync), |
546 | GFP_KERNEL); |
547 | if (!se->out_syncs) |
548 | return -ENOMEM; |
549 | |
550 | post_deps = u64_to_user_ptr(handles); |
551 | |
552 | for (i = 0; i < count; i++) { |
553 | struct drm_v3d_sem out; |
554 | |
555 | if (copy_from_user(to: &out, from: post_deps++, n: sizeof(out))) { |
556 | ret = -EFAULT; |
557 | DRM_DEBUG("Failed to copy post dep handles\n" ); |
558 | goto fail; |
559 | } |
560 | |
561 | se->out_syncs[i].syncobj = drm_syncobj_find(file_private: file_priv, |
562 | handle: out.handle); |
563 | if (!se->out_syncs[i].syncobj) { |
564 | ret = -EINVAL; |
565 | goto fail; |
566 | } |
567 | } |
568 | se->out_sync_count = count; |
569 | |
570 | return 0; |
571 | |
572 | fail: |
573 | for (i--; i >= 0; i--) |
574 | drm_syncobj_put(obj: se->out_syncs[i].syncobj); |
575 | kvfree(addr: se->out_syncs); |
576 | |
577 | return ret; |
578 | } |
579 | |
580 | /* Get data for multiple binary semaphores synchronization. Parse syncobj |
581 | * to be signaled when job completes (out_sync). |
582 | */ |
583 | static int |
584 | v3d_get_multisync_submit_deps(struct drm_file *file_priv, |
585 | struct drm_v3d_extension __user *ext, |
586 | void *data) |
587 | { |
588 | struct drm_v3d_multi_sync multisync; |
589 | struct v3d_submit_ext *se = data; |
590 | int ret; |
591 | |
592 | if (copy_from_user(to: &multisync, from: ext, n: sizeof(multisync))) |
593 | return -EFAULT; |
594 | |
595 | if (multisync.pad) |
596 | return -EINVAL; |
597 | |
598 | ret = v3d_get_multisync_post_deps(file_priv, se: data, count: multisync.out_sync_count, |
599 | handles: multisync.out_syncs); |
600 | if (ret) |
601 | return ret; |
602 | |
603 | se->in_sync_count = multisync.in_sync_count; |
604 | se->in_syncs = multisync.in_syncs; |
605 | se->flags |= DRM_V3D_EXT_ID_MULTI_SYNC; |
606 | se->wait_stage = multisync.wait_stage; |
607 | |
608 | return 0; |
609 | } |
610 | |
611 | /* Whenever userspace sets ioctl extensions, v3d_get_extensions parses data |
612 | * according to the extension id (name). |
613 | */ |
614 | static int |
615 | v3d_get_extensions(struct drm_file *file_priv, |
616 | u64 ext_handles, |
617 | void *data) |
618 | { |
619 | struct drm_v3d_extension __user *user_ext; |
620 | int ret; |
621 | |
622 | user_ext = u64_to_user_ptr(ext_handles); |
623 | while (user_ext) { |
624 | struct drm_v3d_extension ext; |
625 | |
626 | if (copy_from_user(to: &ext, from: user_ext, n: sizeof(ext))) { |
627 | DRM_DEBUG("Failed to copy submit extension\n" ); |
628 | return -EFAULT; |
629 | } |
630 | |
631 | switch (ext.id) { |
632 | case DRM_V3D_EXT_ID_MULTI_SYNC: |
633 | ret = v3d_get_multisync_submit_deps(file_priv, ext: user_ext, data); |
634 | if (ret) |
635 | return ret; |
636 | break; |
637 | default: |
638 | DRM_DEBUG_DRIVER("Unknown extension id: %d\n" , ext.id); |
639 | return -EINVAL; |
640 | } |
641 | |
642 | user_ext = u64_to_user_ptr(ext.next); |
643 | } |
644 | |
645 | return 0; |
646 | } |
647 | |
648 | /** |
649 | * v3d_submit_cl_ioctl() - Submits a job (frame) to the V3D. |
650 | * @dev: DRM device |
651 | * @data: ioctl argument |
652 | * @file_priv: DRM file for this fd |
653 | * |
654 | * This is the main entrypoint for userspace to submit a 3D frame to |
655 | * the GPU. Userspace provides the binner command list (if |
656 | * applicable), and the kernel sets up the render command list to draw |
657 | * to the framebuffer described in the ioctl, using the command lists |
658 | * that the 3D engine's binner will produce. |
659 | */ |
660 | int |
661 | v3d_submit_cl_ioctl(struct drm_device *dev, void *data, |
662 | struct drm_file *file_priv) |
663 | { |
664 | struct v3d_dev *v3d = to_v3d_dev(dev); |
665 | struct v3d_file_priv *v3d_priv = file_priv->driver_priv; |
666 | struct drm_v3d_submit_cl *args = data; |
667 | struct v3d_submit_ext se = {0}; |
668 | struct v3d_bin_job *bin = NULL; |
669 | struct v3d_render_job *render = NULL; |
670 | struct v3d_job *clean_job = NULL; |
671 | struct v3d_job *last_job; |
672 | struct ww_acquire_ctx acquire_ctx; |
673 | int ret = 0; |
674 | |
675 | trace_v3d_submit_cl_ioctl(dev: &v3d->drm, ct1qba: args->rcl_start, ct1qea: args->rcl_end); |
676 | |
677 | if (args->pad) |
678 | return -EINVAL; |
679 | |
680 | if (args->flags && |
681 | args->flags & ~(DRM_V3D_SUBMIT_CL_FLUSH_CACHE | |
682 | DRM_V3D_SUBMIT_EXTENSION)) { |
683 | DRM_INFO("invalid flags: %d\n" , args->flags); |
684 | return -EINVAL; |
685 | } |
686 | |
687 | if (args->flags & DRM_V3D_SUBMIT_EXTENSION) { |
688 | ret = v3d_get_extensions(file_priv, ext_handles: args->extensions, data: &se); |
689 | if (ret) { |
690 | DRM_DEBUG("Failed to get extensions.\n" ); |
691 | return ret; |
692 | } |
693 | } |
694 | |
695 | ret = v3d_job_init(v3d, file_priv, container: (void *)&render, size: sizeof(*render), |
696 | free: v3d_render_job_free, in_sync: args->in_sync_rcl, se: &se, queue: V3D_RENDER); |
697 | if (ret) |
698 | goto fail; |
699 | |
700 | render->start = args->rcl_start; |
701 | render->end = args->rcl_end; |
702 | INIT_LIST_HEAD(list: &render->unref_list); |
703 | |
704 | if (args->bcl_start != args->bcl_end) { |
705 | ret = v3d_job_init(v3d, file_priv, container: (void *)&bin, size: sizeof(*bin), |
706 | free: v3d_job_free, in_sync: args->in_sync_bcl, se: &se, queue: V3D_BIN); |
707 | if (ret) |
708 | goto fail; |
709 | |
710 | bin->start = args->bcl_start; |
711 | bin->end = args->bcl_end; |
712 | bin->qma = args->qma; |
713 | bin->qms = args->qms; |
714 | bin->qts = args->qts; |
715 | bin->render = render; |
716 | } |
717 | |
718 | if (args->flags & DRM_V3D_SUBMIT_CL_FLUSH_CACHE) { |
719 | ret = v3d_job_init(v3d, file_priv, container: (void *)&clean_job, size: sizeof(*clean_job), |
720 | free: v3d_job_free, in_sync: 0, NULL, queue: V3D_CACHE_CLEAN); |
721 | if (ret) |
722 | goto fail; |
723 | |
724 | last_job = clean_job; |
725 | } else { |
726 | last_job = &render->base; |
727 | } |
728 | |
729 | ret = v3d_lookup_bos(dev, file_priv, job: last_job, |
730 | bo_handles: args->bo_handles, bo_count: args->bo_handle_count); |
731 | if (ret) |
732 | goto fail; |
733 | |
734 | ret = v3d_lock_bo_reservations(job: last_job, acquire_ctx: &acquire_ctx); |
735 | if (ret) |
736 | goto fail; |
737 | |
738 | if (args->perfmon_id) { |
739 | render->base.perfmon = v3d_perfmon_find(v3d_priv, |
740 | id: args->perfmon_id); |
741 | |
742 | if (!render->base.perfmon) { |
743 | ret = -ENOENT; |
744 | goto fail_perfmon; |
745 | } |
746 | } |
747 | |
748 | mutex_lock(&v3d->sched_lock); |
749 | if (bin) { |
750 | bin->base.perfmon = render->base.perfmon; |
751 | v3d_perfmon_get(perfmon: bin->base.perfmon); |
752 | v3d_push_job(job: &bin->base); |
753 | |
754 | ret = drm_sched_job_add_dependency(job: &render->base.base, |
755 | fence: dma_fence_get(fence: bin->base.done_fence)); |
756 | if (ret) |
757 | goto fail_unreserve; |
758 | } |
759 | |
760 | v3d_push_job(job: &render->base); |
761 | |
762 | if (clean_job) { |
763 | struct dma_fence *render_fence = |
764 | dma_fence_get(fence: render->base.done_fence); |
765 | ret = drm_sched_job_add_dependency(job: &clean_job->base, |
766 | fence: render_fence); |
767 | if (ret) |
768 | goto fail_unreserve; |
769 | clean_job->perfmon = render->base.perfmon; |
770 | v3d_perfmon_get(perfmon: clean_job->perfmon); |
771 | v3d_push_job(job: clean_job); |
772 | } |
773 | |
774 | mutex_unlock(lock: &v3d->sched_lock); |
775 | |
776 | v3d_attach_fences_and_unlock_reservation(file_priv, |
777 | job: last_job, |
778 | acquire_ctx: &acquire_ctx, |
779 | out_sync: args->out_sync, |
780 | se: &se, |
781 | done_fence: last_job->done_fence); |
782 | |
783 | if (bin) |
784 | v3d_job_put(job: &bin->base); |
785 | v3d_job_put(job: &render->base); |
786 | if (clean_job) |
787 | v3d_job_put(job: clean_job); |
788 | |
789 | return 0; |
790 | |
791 | fail_unreserve: |
792 | mutex_unlock(lock: &v3d->sched_lock); |
793 | fail_perfmon: |
794 | drm_gem_unlock_reservations(objs: last_job->bo, |
795 | count: last_job->bo_count, acquire_ctx: &acquire_ctx); |
796 | fail: |
797 | v3d_job_cleanup(job: (void *)bin); |
798 | v3d_job_cleanup(job: (void *)render); |
799 | v3d_job_cleanup(job: clean_job); |
800 | v3d_put_multisync_post_deps(se: &se); |
801 | |
802 | return ret; |
803 | } |
804 | |
805 | /** |
806 | * v3d_submit_tfu_ioctl() - Submits a TFU (texture formatting) job to the V3D. |
807 | * @dev: DRM device |
808 | * @data: ioctl argument |
809 | * @file_priv: DRM file for this fd |
810 | * |
811 | * Userspace provides the register setup for the TFU, which we don't |
812 | * need to validate since the TFU is behind the MMU. |
813 | */ |
814 | int |
815 | v3d_submit_tfu_ioctl(struct drm_device *dev, void *data, |
816 | struct drm_file *file_priv) |
817 | { |
818 | struct v3d_dev *v3d = to_v3d_dev(dev); |
819 | struct drm_v3d_submit_tfu *args = data; |
820 | struct v3d_submit_ext se = {0}; |
821 | struct v3d_tfu_job *job = NULL; |
822 | struct ww_acquire_ctx acquire_ctx; |
823 | int ret = 0; |
824 | |
825 | trace_v3d_submit_tfu_ioctl(dev: &v3d->drm, iia: args->iia); |
826 | |
827 | if (args->flags && !(args->flags & DRM_V3D_SUBMIT_EXTENSION)) { |
828 | DRM_DEBUG("invalid flags: %d\n" , args->flags); |
829 | return -EINVAL; |
830 | } |
831 | |
832 | if (args->flags & DRM_V3D_SUBMIT_EXTENSION) { |
833 | ret = v3d_get_extensions(file_priv, ext_handles: args->extensions, data: &se); |
834 | if (ret) { |
835 | DRM_DEBUG("Failed to get extensions.\n" ); |
836 | return ret; |
837 | } |
838 | } |
839 | |
840 | ret = v3d_job_init(v3d, file_priv, container: (void *)&job, size: sizeof(*job), |
841 | free: v3d_job_free, in_sync: args->in_sync, se: &se, queue: V3D_TFU); |
842 | if (ret) |
843 | goto fail; |
844 | |
845 | job->base.bo = kcalloc(ARRAY_SIZE(args->bo_handles), |
846 | size: sizeof(*job->base.bo), GFP_KERNEL); |
847 | if (!job->base.bo) { |
848 | ret = -ENOMEM; |
849 | goto fail; |
850 | } |
851 | |
852 | job->args = *args; |
853 | |
854 | for (job->base.bo_count = 0; |
855 | job->base.bo_count < ARRAY_SIZE(args->bo_handles); |
856 | job->base.bo_count++) { |
857 | struct drm_gem_object *bo; |
858 | |
859 | if (!args->bo_handles[job->base.bo_count]) |
860 | break; |
861 | |
862 | bo = drm_gem_object_lookup(filp: file_priv, handle: args->bo_handles[job->base.bo_count]); |
863 | if (!bo) { |
864 | DRM_DEBUG("Failed to look up GEM BO %d: %d\n" , |
865 | job->base.bo_count, |
866 | args->bo_handles[job->base.bo_count]); |
867 | ret = -ENOENT; |
868 | goto fail; |
869 | } |
870 | job->base.bo[job->base.bo_count] = bo; |
871 | } |
872 | |
873 | ret = v3d_lock_bo_reservations(job: &job->base, acquire_ctx: &acquire_ctx); |
874 | if (ret) |
875 | goto fail; |
876 | |
877 | mutex_lock(&v3d->sched_lock); |
878 | v3d_push_job(job: &job->base); |
879 | mutex_unlock(lock: &v3d->sched_lock); |
880 | |
881 | v3d_attach_fences_and_unlock_reservation(file_priv, |
882 | job: &job->base, acquire_ctx: &acquire_ctx, |
883 | out_sync: args->out_sync, |
884 | se: &se, |
885 | done_fence: job->base.done_fence); |
886 | |
887 | v3d_job_put(job: &job->base); |
888 | |
889 | return 0; |
890 | |
891 | fail: |
892 | v3d_job_cleanup(job: (void *)job); |
893 | v3d_put_multisync_post_deps(se: &se); |
894 | |
895 | return ret; |
896 | } |
897 | |
898 | /** |
899 | * v3d_submit_csd_ioctl() - Submits a CSD (texture formatting) job to the V3D. |
900 | * @dev: DRM device |
901 | * @data: ioctl argument |
902 | * @file_priv: DRM file for this fd |
903 | * |
904 | * Userspace provides the register setup for the CSD, which we don't |
905 | * need to validate since the CSD is behind the MMU. |
906 | */ |
907 | int |
908 | v3d_submit_csd_ioctl(struct drm_device *dev, void *data, |
909 | struct drm_file *file_priv) |
910 | { |
911 | struct v3d_dev *v3d = to_v3d_dev(dev); |
912 | struct v3d_file_priv *v3d_priv = file_priv->driver_priv; |
913 | struct drm_v3d_submit_csd *args = data; |
914 | struct v3d_submit_ext se = {0}; |
915 | struct v3d_csd_job *job = NULL; |
916 | struct v3d_job *clean_job = NULL; |
917 | struct ww_acquire_ctx acquire_ctx; |
918 | int ret; |
919 | |
920 | trace_v3d_submit_csd_ioctl(dev: &v3d->drm, cfg5: args->cfg[5], cfg6: args->cfg[6]); |
921 | |
922 | if (args->pad) |
923 | return -EINVAL; |
924 | |
925 | if (!v3d_has_csd(v3d)) { |
926 | DRM_DEBUG("Attempting CSD submit on non-CSD hardware\n" ); |
927 | return -EINVAL; |
928 | } |
929 | |
930 | if (args->flags && !(args->flags & DRM_V3D_SUBMIT_EXTENSION)) { |
931 | DRM_INFO("invalid flags: %d\n" , args->flags); |
932 | return -EINVAL; |
933 | } |
934 | |
935 | if (args->flags & DRM_V3D_SUBMIT_EXTENSION) { |
936 | ret = v3d_get_extensions(file_priv, ext_handles: args->extensions, data: &se); |
937 | if (ret) { |
938 | DRM_DEBUG("Failed to get extensions.\n" ); |
939 | return ret; |
940 | } |
941 | } |
942 | |
943 | ret = v3d_job_init(v3d, file_priv, container: (void *)&job, size: sizeof(*job), |
944 | free: v3d_job_free, in_sync: args->in_sync, se: &se, queue: V3D_CSD); |
945 | if (ret) |
946 | goto fail; |
947 | |
948 | ret = v3d_job_init(v3d, file_priv, container: (void *)&clean_job, size: sizeof(*clean_job), |
949 | free: v3d_job_free, in_sync: 0, NULL, queue: V3D_CACHE_CLEAN); |
950 | if (ret) |
951 | goto fail; |
952 | |
953 | job->args = *args; |
954 | |
955 | ret = v3d_lookup_bos(dev, file_priv, job: clean_job, |
956 | bo_handles: args->bo_handles, bo_count: args->bo_handle_count); |
957 | if (ret) |
958 | goto fail; |
959 | |
960 | ret = v3d_lock_bo_reservations(job: clean_job, acquire_ctx: &acquire_ctx); |
961 | if (ret) |
962 | goto fail; |
963 | |
964 | if (args->perfmon_id) { |
965 | job->base.perfmon = v3d_perfmon_find(v3d_priv, |
966 | id: args->perfmon_id); |
967 | if (!job->base.perfmon) { |
968 | ret = -ENOENT; |
969 | goto fail_perfmon; |
970 | } |
971 | } |
972 | |
973 | mutex_lock(&v3d->sched_lock); |
974 | v3d_push_job(job: &job->base); |
975 | |
976 | ret = drm_sched_job_add_dependency(job: &clean_job->base, |
977 | fence: dma_fence_get(fence: job->base.done_fence)); |
978 | if (ret) |
979 | goto fail_unreserve; |
980 | |
981 | v3d_push_job(job: clean_job); |
982 | mutex_unlock(lock: &v3d->sched_lock); |
983 | |
984 | v3d_attach_fences_and_unlock_reservation(file_priv, |
985 | job: clean_job, |
986 | acquire_ctx: &acquire_ctx, |
987 | out_sync: args->out_sync, |
988 | se: &se, |
989 | done_fence: clean_job->done_fence); |
990 | |
991 | v3d_job_put(job: &job->base); |
992 | v3d_job_put(job: clean_job); |
993 | |
994 | return 0; |
995 | |
996 | fail_unreserve: |
997 | mutex_unlock(lock: &v3d->sched_lock); |
998 | fail_perfmon: |
999 | drm_gem_unlock_reservations(objs: clean_job->bo, count: clean_job->bo_count, |
1000 | acquire_ctx: &acquire_ctx); |
1001 | fail: |
1002 | v3d_job_cleanup(job: (void *)job); |
1003 | v3d_job_cleanup(job: clean_job); |
1004 | v3d_put_multisync_post_deps(se: &se); |
1005 | |
1006 | return ret; |
1007 | } |
1008 | |
1009 | int |
1010 | v3d_gem_init(struct drm_device *dev) |
1011 | { |
1012 | struct v3d_dev *v3d = to_v3d_dev(dev); |
1013 | u32 pt_size = 4096 * 1024; |
1014 | int ret, i; |
1015 | |
1016 | for (i = 0; i < V3D_MAX_QUEUES; i++) |
1017 | v3d->queue[i].fence_context = dma_fence_context_alloc(num: 1); |
1018 | |
1019 | spin_lock_init(&v3d->mm_lock); |
1020 | spin_lock_init(&v3d->job_lock); |
1021 | ret = drmm_mutex_init(dev, &v3d->bo_lock); |
1022 | if (ret) |
1023 | return ret; |
1024 | ret = drmm_mutex_init(dev, &v3d->reset_lock); |
1025 | if (ret) |
1026 | return ret; |
1027 | ret = drmm_mutex_init(dev, &v3d->sched_lock); |
1028 | if (ret) |
1029 | return ret; |
1030 | ret = drmm_mutex_init(dev, &v3d->cache_clean_lock); |
1031 | if (ret) |
1032 | return ret; |
1033 | |
1034 | /* Note: We don't allocate address 0. Various bits of HW |
1035 | * treat 0 as special, such as the occlusion query counters |
1036 | * where 0 means "disabled". |
1037 | */ |
1038 | drm_mm_init(mm: &v3d->mm, start: 1, size: pt_size / sizeof(u32) - 1); |
1039 | |
1040 | v3d->pt = dma_alloc_wc(dev: v3d->drm.dev, size: pt_size, |
1041 | dma_addr: &v3d->pt_paddr, |
1042 | GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO); |
1043 | if (!v3d->pt) { |
1044 | drm_mm_takedown(mm: &v3d->mm); |
1045 | dev_err(v3d->drm.dev, |
1046 | "Failed to allocate page tables. Please ensure you have DMA enabled.\n" ); |
1047 | return -ENOMEM; |
1048 | } |
1049 | |
1050 | v3d_init_hw_state(v3d); |
1051 | v3d_mmu_set_page_table(v3d); |
1052 | |
1053 | ret = v3d_sched_init(v3d); |
1054 | if (ret) { |
1055 | drm_mm_takedown(mm: &v3d->mm); |
1056 | dma_free_coherent(dev: v3d->drm.dev, size: 4096 * 1024, cpu_addr: (void *)v3d->pt, |
1057 | dma_handle: v3d->pt_paddr); |
1058 | } |
1059 | |
1060 | return 0; |
1061 | } |
1062 | |
1063 | void |
1064 | v3d_gem_destroy(struct drm_device *dev) |
1065 | { |
1066 | struct v3d_dev *v3d = to_v3d_dev(dev); |
1067 | |
1068 | v3d_sched_fini(v3d); |
1069 | |
1070 | /* Waiting for jobs to finish would need to be done before |
1071 | * unregistering V3D. |
1072 | */ |
1073 | WARN_ON(v3d->bin_job); |
1074 | WARN_ON(v3d->render_job); |
1075 | |
1076 | drm_mm_takedown(mm: &v3d->mm); |
1077 | |
1078 | dma_free_coherent(dev: v3d->drm.dev, size: 4096 * 1024, cpu_addr: (void *)v3d->pt, |
1079 | dma_handle: v3d->pt_paddr); |
1080 | } |
1081 | |