v3d_gem.c source code [linux/drivers/gpu/drm/v3d/v3d_gem.c]

1	// SPDX-License-Identifier: GPL-2.0+
2	/ Copyright (C) 2014-2018 Broadcom /
3
4	#include <linux/device.h>
5	#include <linux/dma-mapping.h>
6	#include <linux/io.h>
7	#include <linux/module.h>
8	#include <linux/platform_device.h>
9	#include <linux/reset.h>
10	#include <linux/sched/signal.h>
11	#include <linux/uaccess.h>
12
13	#include <drm/drm_managed.h>
14	#include <drm/drm_syncobj.h>
15	#include <uapi/drm/v3d_drm.h>
16
17	#include "v3d_drv.h"
18	#include "v3d_regs.h"
19	#include "v3d_trace.h"
20
21	static void
22	v3d_init_core(struct v3d_dev v3d, int* core)
23	{
24	/ Set OVRTMUOUT, which means that the texture sampler uniform*
25	* configuration's tmu output type field is used, instead of
26	* using the hardware default behavior based on the texture
27	* type. If you want the default behavior, you can still put
28	* "2" in the indirect texture state's output_type field.
29	*/
30	if (v3d->ver < `40`)
31	V3D_CORE_WRITE(core, V3D_CTL_MISCCFG, V3D_MISCCFG_OVRTMUOUT);
32
33	/ Whenever we flush the L2T cache, we always want to flush*
34	* the whole thing.
35	*/
36	V3D_CORE_WRITE(core, V3D_CTL_L2TFLSTA, `0`);
37	V3D_CORE_WRITE(core, V3D_CTL_L2TFLEND, ~`0`);
38	}
39
40	/ Sets invariant state for the HW. /
41	static void
42	v3d_init_hw_state(struct v3d_dev *v3d)
43	{
44	v3d_init_core(v3d, core: `0`);
45	}
46
47	static void
48	v3d_idle_axi(struct v3d_dev v3d, int* core)
49	{
50	V3D_CORE_WRITE(core, V3D_GMP_CFG, V3D_GMP_CFG_STOP_REQ);
51
52	if (wait_for((V3D_CORE_READ(core, V3D_GMP_STATUS) &
53	(V3D_GMP_STATUS_RD_COUNT_MASK \|
54	V3D_GMP_STATUS_WR_COUNT_MASK \|
55	V3D_GMP_STATUS_CFG_BUSY)) == `0`, `100`)) {
56	DRM_ERROR("Failed to wait for safe GMP shutdown\n");
57	}
58	}
59
60	static void
61	v3d_idle_gca(struct v3d_dev *v3d)
62	{
63	if (v3d->ver >= `41`)
64	return;
65
66	V3D_GCA_WRITE(V3D_GCA_SAFE_SHUTDOWN, V3D_GCA_SAFE_SHUTDOWN_EN);
67
68	if (wait_for((V3D_GCA_READ(V3D_GCA_SAFE_SHUTDOWN_ACK) &
69	V3D_GCA_SAFE_SHUTDOWN_ACK_ACKED) ==
70	V3D_GCA_SAFE_SHUTDOWN_ACK_ACKED, `100`)) {
71	DRM_ERROR("Failed to wait for safe GCA shutdown\n");
72	}
73	}
74
75	static void
76	v3d_reset_by_bridge(struct v3d_dev *v3d)
77	{
78	int version = V3D_BRIDGE_READ(V3D_TOP_GR_BRIDGE_REVISION);
79
80	if (V3D_GET_FIELD(version, V3D_TOP_GR_BRIDGE_MAJOR) == `2`) {
81	V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_0,
82	V3D_TOP_GR_BRIDGE_SW_INIT_0_V3D_CLK_108_SW_INIT);
83	V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_0, `0`);
84
85	/ GFXH-1383: The SW_INIT may cause a stray write to address 0*
86	* of the unit, so reset it to its power-on value here.
87	*/
88	V3D_WRITE(V3D_HUB_AXICFG, V3D_HUB_AXICFG_MAX_LEN_MASK);
89	} else {
90	WARN_ON_ONCE(V3D_GET_FIELD(version,
91	V3D_TOP_GR_BRIDGE_MAJOR) != `7`);
92	V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_1,
93	V3D_TOP_GR_BRIDGE_SW_INIT_1_V3D_CLK_108_SW_INIT);
94	V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_1, `0`);
95	}
96	}
97
98	static void
99	v3d_reset_v3d(struct v3d_dev *v3d)
100	{
101	if (v3d->reset)
102	reset_control_reset(rstc: v3d->reset);
103	else
104	v3d_reset_by_bridge(v3d);
105
106	v3d_init_hw_state(v3d);
107	}
108
109	void
110	v3d_reset(struct v3d_dev *v3d)
111	{
112	struct drm_device *dev = &v3d->drm;
113
114	DRM_DEV_ERROR(dev->dev, "Resetting GPU for hang.\n");
115	DRM_DEV_ERROR(dev->dev, "V3D_ERR_STAT: 0x%08x\n",
116	V3D_CORE_READ(`0`, V3D_ERR_STAT));
117	trace_v3d_reset_begin(dev);
118
119	/ XXX: only needed for safe powerdown, not reset. /
120	if (false)
121	v3d_idle_axi(v3d, core: `0`);
122
123	v3d_idle_gca(v3d);
124	v3d_reset_v3d(v3d);
125
126	v3d_mmu_set_page_table(v3d);
127	v3d_irq_reset(v3d);
128
129	v3d_perfmon_stop(v3d, perfmon: v3d->active_perfmon, capture: false);
130
131	trace_v3d_reset_end(dev);
132	}
133
134	static void
135	v3d_flush_l3(struct v3d_dev *v3d)
136	{
137	if (v3d->ver < `41`) {
138	u32 gca_ctrl = V3D_GCA_READ(V3D_GCA_CACHE_CTRL);
139
140	V3D_GCA_WRITE(V3D_GCA_CACHE_CTRL,
141	gca_ctrl \| V3D_GCA_CACHE_CTRL_FLUSH);
142
143	if (v3d->ver < `33`) {
144	V3D_GCA_WRITE(V3D_GCA_CACHE_CTRL,
145	gca_ctrl & ~V3D_GCA_CACHE_CTRL_FLUSH);
146	}
147	}
148	}
149
150	/ Invalidates the (read-only) L2C cache. This was the L2 cache for*
151	* uniforms and instructions on V3D 3.2.
152	*/
153	static void
154	v3d_invalidate_l2c(struct v3d_dev v3d, int* core)
155	{
156	if (v3d->ver > `32`)
157	return;
158
159	V3D_CORE_WRITE(core, V3D_CTL_L2CACTL,
160	V3D_L2CACTL_L2CCLR \|
161	V3D_L2CACTL_L2CENA);
162	}
163
164	/ Invalidates texture L2 cachelines /
165	static void
166	v3d_flush_l2t(struct v3d_dev v3d, int* core)
167	{
168	/ While there is a busy bit (V3D_L2TCACTL_L2TFLS), we don't*
169	* need to wait for completion before dispatching the job --
170	* L2T accesses will be stalled until the flush has completed.
171	* However, we do need to make sure we don't try to trigger a
172	* new flush while the L2_CLEAN queue is trying to
173	* synchronously clean after a job.
174	*/
175	mutex_lock(&v3d->cache_clean_lock);
176	V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL,
177	V3D_L2TCACTL_L2TFLS \|
178	V3D_SET_FIELD(V3D_L2TCACTL_FLM_FLUSH, V3D_L2TCACTL_FLM));
179	mutex_unlock(lock: &v3d->cache_clean_lock);
180	}
181
182	/ Cleans texture L1 and L2 cachelines (writing back dirty data).*
183	*
184	* For cleaning, which happens from the CACHE_CLEAN queue after CSD has
185	* executed, we need to make sure that the clean is done before
186	* signaling job completion. So, we synchronously wait before
187	* returning, and we make sure that L2 invalidates don't happen in the
188	* meantime to confuse our are-we-done checks.
189	*/
190	void
191	v3d_clean_caches(struct v3d_dev *v3d)
192	{
193	struct drm_device *dev = &v3d->drm;
194	int core = `0`;
195
196	trace_v3d_cache_clean_begin(dev);
197
198	V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL, V3D_L2TCACTL_TMUWCF);
199	if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) &
200	V3D_L2TCACTL_TMUWCF), `100`)) {
201	DRM_ERROR("Timeout waiting for TMU write combiner flush\n");
202	}
203
204	mutex_lock(&v3d->cache_clean_lock);
205	V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL,
206	V3D_L2TCACTL_L2TFLS \|
207	V3D_SET_FIELD(V3D_L2TCACTL_FLM_CLEAN, V3D_L2TCACTL_FLM));
208
209	if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) &
210	V3D_L2TCACTL_L2TFLS), `100`)) {
211	DRM_ERROR("Timeout waiting for L2T clean\n");
212	}
213
214	mutex_unlock(lock: &v3d->cache_clean_lock);
215
216	trace_v3d_cache_clean_end(dev);
217	}
218
219	/ Invalidates the slice caches. These are read-only caches. /
220	static void
221	v3d_invalidate_slices(struct v3d_dev v3d, int* core)
222	{
223	V3D_CORE_WRITE(core, V3D_CTL_SLCACTL,
224	V3D_SET_FIELD(`0xf`, V3D_SLCACTL_TVCCS) \|
225	V3D_SET_FIELD(`0xf`, V3D_SLCACTL_TDCCS) \|
226	V3D_SET_FIELD(`0xf`, V3D_SLCACTL_UCC) \|
227	V3D_SET_FIELD(`0xf`, V3D_SLCACTL_ICC));
228	}
229
230	void
231	v3d_invalidate_caches(struct v3d_dev *v3d)
232	{
233	/ Invalidate the caches from the outside in. That way if*
234	* another CL's concurrent use of nearby memory were to pull
235	* an invalidated cacheline back in, we wouldn't leave stale
236	* data in the inner cache.
237	*/
238	v3d_flush_l3(v3d);
239	v3d_invalidate_l2c(v3d, core: `0`);
240	v3d_flush_l2t(v3d, core: `0`);
241	v3d_invalidate_slices(v3d, core: `0`);
242	}
243
244	/ Takes the reservation lock on all the BOs being referenced, so that*
245	* at queue submit time we can update the reservations.
246	*
247	* We don't lock the RCL the tile alloc/state BOs, or overflow memory
248	* (all of which are on exec->unref_list). They're entirely private
249	* to v3d, so we don't attach dma-buf fences to them.
250	*/
251	static int
252	v3d_lock_bo_reservations(struct v3d_job *job,
253	struct ww_acquire_ctx *acquire_ctx)
254	{
255	int i, ret;
256
257	ret = drm_gem_lock_reservations(objs: job->bo, count: job->bo_count, acquire_ctx);
258	if (ret)
259	return ret;
260
261	for (i = `0`; i < job->bo_count; i++) {
262	ret = dma_resv_reserve_fences(obj: job->bo[i]->resv, num_fences: `1`);
263	if (ret)
264	goto fail;
265
266	ret = drm_sched_job_add_implicit_dependencies(job: &job->base,
267	obj: job->bo[i], write: true);
268	if (ret)
269	goto fail;
270	}
271
272	return `0`;
273
274	fail:
275	drm_gem_unlock_reservations(objs: job->bo, count: job->bo_count, acquire_ctx);
276	return ret;
277	}
278
279	/**
280	* v3d_lookup_bos() - Sets up job->bo[] with the GEM objects
281	* referenced by the job.
282	* @dev: DRM device
283	* @file_priv: DRM file for this fd
284	* @job: V3D job being set up
285	* @bo_handles: GEM handles
286	* @bo_count: Number of GEM handles passed in
287	*
288	* The command validator needs to reference BOs by their index within
289	* the submitted job's BO list. This does the validation of the job's
290	* BO list and reference counting for the lifetime of the job.
291	*
292	* Note that this function doesn't need to unreference the BOs on
293	* failure, because that will happen at v3d_exec_cleanup() time.
294	*/
295	static int
296	v3d_lookup_bos(struct drm_device *dev,
297	struct drm_file *file_priv,
298	struct v3d_job *job,
299	u64 bo_handles,
300	u32 bo_count)
301	{
302	job->bo_count = bo_count;
303
304	if (!job->bo_count) {
305	/ See comment on bo_index for why we have to check*
306	* this.
307	*/
308	DRM_DEBUG("Rendering requires BOs\n");
309	return -EINVAL;
310	}
311
312	return drm_gem_objects_lookup(filp: file_priv,
313	bo_handles: (void __user *)(uintptr_t)bo_handles,
314	count: job->bo_count, objs_out: &job->bo);
315	}
316
317	static void
318	v3d_job_free(struct kref *ref)
319	{
320	struct v3d_job job = container_of(ref, struct* v3d_job, refcount);
321	int i;
322
323	if (job->bo) {
324	for (i = `0`; i < job->bo_count; i++)
325	drm_gem_object_put(obj: job->bo[i]);
326	kvfree(addr: job->bo);
327	}
328
329	dma_fence_put(fence: job->irq_fence);
330	dma_fence_put(fence: job->done_fence);
331
332	if (job->perfmon)
333	v3d_perfmon_put(perfmon: job->perfmon);
334
335	kfree(objp: job);
336	}
337
338	static void
339	v3d_render_job_free(struct kref *ref)
340	{
341	struct v3d_render_job job = container_of(ref, struct* v3d_render_job,
342	base.refcount);
343	struct v3d_bo bo, save;
344
345	list_for_each_entry_safe(bo, save, &job->unref_list, unref_head) {
346	drm_gem_object_put(obj: &bo->base.base);
347	}
348
349	v3d_job_free(ref);
350	}
351
352	void v3d_job_cleanup(struct v3d_job *job)
353	{
354	if (!job)
355	return;
356
357	drm_sched_job_cleanup(job: &job->base);
358	v3d_job_put(job);
359	}
360
361	void v3d_job_put(struct v3d_job *job)
362	{
363	kref_put(kref: &job->refcount, release: job->free);
364	}
365
366	int
367	v3d_wait_bo_ioctl(struct drm_device dev, void* *data,
368	struct drm_file *file_priv)
369	{
370	int ret;
371	struct drm_v3d_wait_bo *args = data;
372	ktime_t start = ktime_get();
373	u64 delta_ns;
374	unsigned long timeout_jiffies =
375	nsecs_to_jiffies_timeout(n: args->timeout_ns);
376
377	if (args->pad != `0`)
378	return -EINVAL;
379
380	ret = drm_gem_dma_resv_wait(filep: file_priv, handle: args->handle,
381	wait_all: true, timeout: timeout_jiffies);
382
383	/ Decrement the user's timeout, in case we got interrupted*
384	* such that the ioctl will be restarted.
385	*/
386	delta_ns = ktime_to_ns(ktime_sub(ktime_get(), start));
387	if (delta_ns < args->timeout_ns)
388	args->timeout_ns -= delta_ns;
389	else
390	args->timeout_ns = `0`;
391
392	/ Asked to wait beyond the jiffie/scheduler precision? /
393	if (ret == -ETIME && args->timeout_ns)
394	ret = -EAGAIN;
395
396	return ret;
397	}
398
399	static int
400	v3d_job_init(struct v3d_dev v3d, struct* drm_file *file_priv,
401	void *container, size_t size, void* (free)(struct* kref *ref),
402	u32 in_sync, struct v3d_submit_ext se, enum* v3d_queue queue)
403	{
404	struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
405	struct v3d_job *job;
406	bool has_multisync = se && (se->flags & DRM_V3D_EXT_ID_MULTI_SYNC);
407	int ret, i;
408
409	*container = kcalloc(n: `1`, size, GFP_KERNEL);
410	if (!*container) {
411	DRM_ERROR("Cannot allocate memory for v3d job.");
412	return -ENOMEM;
413	}
414
415	job = *container;
416	job->v3d = v3d;
417	job->free = free;
418
419	ret = drm_sched_job_init(job: &job->base, entity: &v3d_priv->sched_entity[queue],
420	owner: v3d_priv);
421	if (ret)
422	goto fail;
423
424	if (has_multisync) {
425	if (se->in_sync_count && se->wait_stage == queue) {
426	struct drm_v3d_sem __user *handle = u64_to_user_ptr(se->in_syncs);
427
428	for (i = `0`; i < se->in_sync_count; i++) {
429	struct drm_v3d_sem in;
430
431	if (copy_from_user(to: &in, from: handle++, n: sizeof(in))) {
432	ret = -EFAULT;
433	DRM_DEBUG("Failed to copy wait dep handle.\n");
434	goto fail_deps;
435	}
436	ret = drm_sched_job_add_syncobj_dependency(job: &job->base, file: file_priv, handle: in.handle, point: `0`);
437
438	// TODO: Investigate why this was filtered out for the IOCTL.
439	if (ret && ret != -ENOENT)
440	goto fail_deps;
441	}
442	}
443	} else {
444	ret = drm_sched_job_add_syncobj_dependency(job: &job->base, file: file_priv, handle: in_sync, point: `0`);
445
446	// TODO: Investigate why this was filtered out for the IOCTL.
447	if (ret && ret != -ENOENT)
448	goto fail_deps;
449	}
450
451	kref_init(kref: &job->refcount);
452
453	return `0`;
454
455	fail_deps:
456	drm_sched_job_cleanup(job: &job->base);
457	fail:
458	kfree(objp: *container);
459	*container = NULL;
460
461	return ret;
462	}
463
464	static void
465	v3d_push_job(struct v3d_job *job)
466	{
467	drm_sched_job_arm(job: &job->base);
468
469	job->done_fence = dma_fence_get(fence: &job->base.s_fence->finished);
470
471	/ put by scheduler job completion /
472	kref_get(kref: &job->refcount);
473
474	drm_sched_entity_push_job(sched_job: &job->base);
475	}
476
477	static void
478	v3d_attach_fences_and_unlock_reservation(struct drm_file *file_priv,
479	struct v3d_job *job,
480	struct ww_acquire_ctx *acquire_ctx,
481	u32 out_sync,
482	struct v3d_submit_ext *se,
483	struct dma_fence *done_fence)
484	{
485	struct drm_syncobj *sync_out;
486	bool has_multisync = se && (se->flags & DRM_V3D_EXT_ID_MULTI_SYNC);
487	int i;
488
489	for (i = `0`; i < job->bo_count; i++) {
490	/ XXX: Use shared fences for read-only objects. /
491	dma_resv_add_fence(obj: job->bo[i]->resv, fence: job->done_fence,
492	usage: DMA_RESV_USAGE_WRITE);
493	}
494
495	drm_gem_unlock_reservations(objs: job->bo, count: job->bo_count, acquire_ctx);
496
497	/ Update the return sync object for the job /
498	/ If it only supports a single signal semaphore/
499	if (!has_multisync) {
500	sync_out = drm_syncobj_find(file_private: file_priv, handle: out_sync);
501	if (sync_out) {
502	drm_syncobj_replace_fence(syncobj: sync_out, fence: done_fence);
503	drm_syncobj_put(obj: sync_out);
504	}
505	return;
506	}
507
508	/ If multiple semaphores extension is supported /
509	if (se->out_sync_count) {
510	for (i = `0`; i < se->out_sync_count; i++) {
511	drm_syncobj_replace_fence(syncobj: se->out_syncs[i].syncobj,
512	fence: done_fence);
513	drm_syncobj_put(obj: se->out_syncs[i].syncobj);
514	}
515	kvfree(addr: se->out_syncs);
516	}
517	}
518
519	static void
520	v3d_put_multisync_post_deps(struct v3d_submit_ext *se)
521	{
522	unsigned int i;
523
524	if (!(se && se->out_sync_count))
525	return;
526
527	for (i = `0`; i < se->out_sync_count; i++)
528	drm_syncobj_put(obj: se->out_syncs[i].syncobj);
529	kvfree(addr: se->out_syncs);
530	}
531
532	static int
533	v3d_get_multisync_post_deps(struct drm_file *file_priv,
534	struct v3d_submit_ext *se,
535	u32 count, u64 handles)
536	{
537	struct drm_v3d_sem __user *post_deps;
538	int i, ret;
539
540	if (!count)
541	return `0`;
542
543	se->out_syncs = (struct v3d_submit_outsync *)
544	kvmalloc_array(n: count,
545	size: sizeof(struct v3d_submit_outsync),
546	GFP_KERNEL);
547	if (!se->out_syncs)
548	return -ENOMEM;
549
550	post_deps = u64_to_user_ptr(handles);
551
552	for (i = `0`; i < count; i++) {
553	struct drm_v3d_sem out;
554
555	if (copy_from_user(to: &out, from: post_deps++, n: sizeof(out))) {
556	ret = -EFAULT;
557	DRM_DEBUG("Failed to copy post dep handles\n");
558	goto fail;
559	}
560
561	se->out_syncs[i].syncobj = drm_syncobj_find(file_private: file_priv,
562	handle: out.handle);
563	if (!se->out_syncs[i].syncobj) {
564	ret = -EINVAL;
565	goto fail;
566	}
567	}
568	se->out_sync_count = count;
569
570	return `0`;
571
572	fail:
573	for (i--; i >= `0`; i--)
574	drm_syncobj_put(obj: se->out_syncs[i].syncobj);
575	kvfree(addr: se->out_syncs);
576
577	return ret;
578	}
579
580	/ Get data for multiple binary semaphores synchronization. Parse syncobj*
581	* to be signaled when job completes (out_sync).
582	*/
583	static int
584	v3d_get_multisync_submit_deps(struct drm_file *file_priv,
585	struct drm_v3d_extension __user *ext,
586	void *data)
587	{
588	struct drm_v3d_multi_sync multisync;
589	struct v3d_submit_ext *se = data;
590	int ret;
591
592	if (copy_from_user(to: &multisync, from: ext, n: sizeof(multisync)))
593	return -EFAULT;
594
595	if (multisync.pad)
596	return -EINVAL;
597
598	ret = v3d_get_multisync_post_deps(file_priv, se: data, count: multisync.out_sync_count,
599	handles: multisync.out_syncs);
600	if (ret)
601	return ret;
602
603	se->in_sync_count = multisync.in_sync_count;
604	se->in_syncs = multisync.in_syncs;
605	se->flags \|= DRM_V3D_EXT_ID_MULTI_SYNC;
606	se->wait_stage = multisync.wait_stage;
607
608	return `0`;
609	}
610
611	/ Whenever userspace sets ioctl extensions, v3d_get_extensions parses data*
612	* according to the extension id (name).
613	*/
614	static int
615	v3d_get_extensions(struct drm_file *file_priv,
616	u64 ext_handles,
617	void *data)
618	{
619	struct drm_v3d_extension __user *user_ext;
620	int ret;
621
622	user_ext = u64_to_user_ptr(ext_handles);
623	while (user_ext) {
624	struct drm_v3d_extension ext;
625
626	if (copy_from_user(to: &ext, from: user_ext, n: sizeof(ext))) {
627	DRM_DEBUG("Failed to copy submit extension\n");
628	return -EFAULT;
629	}
630
631	switch (ext.id) {
632	case DRM_V3D_EXT_ID_MULTI_SYNC:
633	ret = v3d_get_multisync_submit_deps(file_priv, ext: user_ext, data);
634	if (ret)
635	return ret;
636	break;
637	default:
638	DRM_DEBUG_DRIVER("Unknown extension id: %d\n", ext.id);
639	return -EINVAL;
640	}
641
642	user_ext = u64_to_user_ptr(ext.next);
643	}
644
645	return `0`;
646	}
647
648	/**
649	* v3d_submit_cl_ioctl() - Submits a job (frame) to the V3D.
650	* @dev: DRM device
651	* @data: ioctl argument
652	* @file_priv: DRM file for this fd
653	*
654	* This is the main entrypoint for userspace to submit a 3D frame to
655	* the GPU. Userspace provides the binner command list (if
656	* applicable), and the kernel sets up the render command list to draw
657	* to the framebuffer described in the ioctl, using the command lists
658	* that the 3D engine's binner will produce.
659	*/
660	int
661	v3d_submit_cl_ioctl(struct drm_device dev, void* *data,
662	struct drm_file *file_priv)
663	{
664	struct v3d_dev *v3d = to_v3d_dev(dev);
665	struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
666	struct drm_v3d_submit_cl *args = data;
667	struct v3d_submit_ext se = {`0`};
668	struct v3d_bin_job *bin = NULL;
669	struct v3d_render_job *render = NULL;
670	struct v3d_job *clean_job = NULL;
671	struct v3d_job *last_job;
672	struct ww_acquire_ctx acquire_ctx;
673	int ret = `0`;
674
675	trace_v3d_submit_cl_ioctl(dev: &v3d->drm, ct1qba: args->rcl_start, ct1qea: args->rcl_end);
676
677	if (args->pad)
678	return -EINVAL;
679
680	if (args->flags &&
681	args->flags & ~(DRM_V3D_SUBMIT_CL_FLUSH_CACHE \|
682	DRM_V3D_SUBMIT_EXTENSION)) {
683	DRM_INFO("invalid flags: %d\n", args->flags);
684	return -EINVAL;
685	}
686
687	if (args->flags & DRM_V3D_SUBMIT_EXTENSION) {
688	ret = v3d_get_extensions(file_priv, ext_handles: args->extensions, data: &se);
689	if (ret) {
690	DRM_DEBUG("Failed to get extensions.\n");
691	return ret;
692	}
693	}
694
695	ret = v3d_job_init(v3d, file_priv, container: (void )&render, size: sizeof(render),
696	free: v3d_render_job_free, in_sync: args->in_sync_rcl, se: &se, queue: V3D_RENDER);
697	if (ret)
698	goto fail;
699
700	render->start = args->rcl_start;
701	render->end = args->rcl_end;
702	INIT_LIST_HEAD(list: &render->unref_list);
703
704	if (args->bcl_start != args->bcl_end) {
705	ret = v3d_job_init(v3d, file_priv, container: (void )&bin, size: sizeof(bin),
706	free: v3d_job_free, in_sync: args->in_sync_bcl, se: &se, queue: V3D_BIN);
707	if (ret)
708	goto fail;
709
710	bin->start = args->bcl_start;
711	bin->end = args->bcl_end;
712	bin->qma = args->qma;
713	bin->qms = args->qms;
714	bin->qts = args->qts;
715	bin->render = render;
716	}
717
718	if (args->flags & DRM_V3D_SUBMIT_CL_FLUSH_CACHE) {
719	ret = v3d_job_init(v3d, file_priv, container: (void )&clean_job, size: sizeof(clean_job),
720	free: v3d_job_free, in_sync: `0`, NULL, queue: V3D_CACHE_CLEAN);
721	if (ret)
722	goto fail;
723
724	last_job = clean_job;
725	} else {
726	last_job = &render->base;
727	}
728
729	ret = v3d_lookup_bos(dev, file_priv, job: last_job,
730	bo_handles: args->bo_handles, bo_count: args->bo_handle_count);
731	if (ret)
732	goto fail;
733
734	ret = v3d_lock_bo_reservations(job: last_job, acquire_ctx: &acquire_ctx);
735	if (ret)
736	goto fail;
737
738	if (args->perfmon_id) {
739	render->base.perfmon = v3d_perfmon_find(v3d_priv,
740	id: args->perfmon_id);
741
742	if (!render->base.perfmon) {
743	ret = -ENOENT;
744	goto fail_perfmon;
745	}
746	}
747
748	mutex_lock(&v3d->sched_lock);
749	if (bin) {
750	bin->base.perfmon = render->base.perfmon;
751	v3d_perfmon_get(perfmon: bin->base.perfmon);
752	v3d_push_job(job: &bin->base);
753
754	ret = drm_sched_job_add_dependency(job: &render->base.base,
755	fence: dma_fence_get(fence: bin->base.done_fence));
756	if (ret)
757	goto fail_unreserve;
758	}
759
760	v3d_push_job(job: &render->base);
761
762	if (clean_job) {
763	struct dma_fence *render_fence =
764	dma_fence_get(fence: render->base.done_fence);
765	ret = drm_sched_job_add_dependency(job: &clean_job->base,
766	fence: render_fence);
767	if (ret)
768	goto fail_unreserve;
769	clean_job->perfmon = render->base.perfmon;
770	v3d_perfmon_get(perfmon: clean_job->perfmon);
771	v3d_push_job(job: clean_job);
772	}
773
774	mutex_unlock(lock: &v3d->sched_lock);
775
776	v3d_attach_fences_and_unlock_reservation(file_priv,
777	job: last_job,
778	acquire_ctx: &acquire_ctx,
779	out_sync: args->out_sync,
780	se: &se,
781	done_fence: last_job->done_fence);
782
783	if (bin)
784	v3d_job_put(job: &bin->base);
785	v3d_job_put(job: &render->base);
786	if (clean_job)
787	v3d_job_put(job: clean_job);
788
789	return `0`;
790
791	fail_unreserve:
792	mutex_unlock(lock: &v3d->sched_lock);
793	fail_perfmon:
794	drm_gem_unlock_reservations(objs: last_job->bo,
795	count: last_job->bo_count, acquire_ctx: &acquire_ctx);
796	fail:
797	v3d_job_cleanup(job: (void *)bin);
798	v3d_job_cleanup(job: (void *)render);
799	v3d_job_cleanup(job: clean_job);
800	v3d_put_multisync_post_deps(se: &se);
801
802	return ret;
803	}
804
805	/**
806	* v3d_submit_tfu_ioctl() - Submits a TFU (texture formatting) job to the V3D.
807	* @dev: DRM device
808	* @data: ioctl argument
809	* @file_priv: DRM file for this fd
810	*
811	* Userspace provides the register setup for the TFU, which we don't
812	* need to validate since the TFU is behind the MMU.
813	*/
814	int
815	v3d_submit_tfu_ioctl(struct drm_device dev, void* *data,
816	struct drm_file *file_priv)
817	{
818	struct v3d_dev *v3d = to_v3d_dev(dev);
819	struct drm_v3d_submit_tfu *args = data;
820	struct v3d_submit_ext se = {`0`};
821	struct v3d_tfu_job *job = NULL;
822	struct ww_acquire_ctx acquire_ctx;
823	int ret = `0`;
824
825	trace_v3d_submit_tfu_ioctl(dev: &v3d->drm, iia: args->iia);
826
827	if (args->flags && !(args->flags & DRM_V3D_SUBMIT_EXTENSION)) {
828	DRM_DEBUG("invalid flags: %d\n", args->flags);
829	return -EINVAL;
830	}
831
832	if (args->flags & DRM_V3D_SUBMIT_EXTENSION) {
833	ret = v3d_get_extensions(file_priv, ext_handles: args->extensions, data: &se);
834	if (ret) {
835	DRM_DEBUG("Failed to get extensions.\n");
836	return ret;
837	}
838	}
839
840	ret = v3d_job_init(v3d, file_priv, container: (void )&job, size: sizeof(job),
841	free: v3d_job_free, in_sync: args->in_sync, se: &se, queue: V3D_TFU);
842	if (ret)
843	goto fail;
844
845	job->base.bo = kcalloc(ARRAY_SIZE(args->bo_handles),
846	size: sizeof(*job->base.bo), GFP_KERNEL);
847	if (!job->base.bo) {
848	ret = -ENOMEM;
849	goto fail;
850	}
851
852	job->args = *args;
853
854	for (job->base.bo_count = `0`;
855	job->base.bo_count < ARRAY_SIZE(args->bo_handles);
856	job->base.bo_count++) {
857	struct drm_gem_object *bo;
858
859	if (!args->bo_handles[job->base.bo_count])
860	break;
861
862	bo = drm_gem_object_lookup(filp: file_priv, handle: args->bo_handles[job->base.bo_count]);
863	if (!bo) {
864	DRM_DEBUG("Failed to look up GEM BO %d: %d\n",
865	job->base.bo_count,
866	args->bo_handles[job->base.bo_count]);
867	ret = -ENOENT;
868	goto fail;
869	}
870	job->base.bo[job->base.bo_count] = bo;
871	}
872
873	ret = v3d_lock_bo_reservations(job: &job->base, acquire_ctx: &acquire_ctx);
874	if (ret)
875	goto fail;
876
877	mutex_lock(&v3d->sched_lock);
878	v3d_push_job(job: &job->base);
879	mutex_unlock(lock: &v3d->sched_lock);
880
881	v3d_attach_fences_and_unlock_reservation(file_priv,
882	job: &job->base, acquire_ctx: &acquire_ctx,
883	out_sync: args->out_sync,
884	se: &se,
885	done_fence: job->base.done_fence);
886
887	v3d_job_put(job: &job->base);
888
889	return `0`;
890
891	fail:
892	v3d_job_cleanup(job: (void *)job);
893	v3d_put_multisync_post_deps(se: &se);
894
895	return ret;
896	}
897
898	/**
899	* v3d_submit_csd_ioctl() - Submits a CSD (texture formatting) job to the V3D.
900	* @dev: DRM device
901	* @data: ioctl argument
902	* @file_priv: DRM file for this fd
903	*
904	* Userspace provides the register setup for the CSD, which we don't
905	* need to validate since the CSD is behind the MMU.
906	*/
907	int
908	v3d_submit_csd_ioctl(struct drm_device dev, void* *data,
909	struct drm_file *file_priv)
910	{
911	struct v3d_dev *v3d = to_v3d_dev(dev);
912	struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
913	struct drm_v3d_submit_csd *args = data;
914	struct v3d_submit_ext se = {`0`};
915	struct v3d_csd_job *job = NULL;
916	struct v3d_job *clean_job = NULL;
917	struct ww_acquire_ctx acquire_ctx;
918	int ret;
919
920	trace_v3d_submit_csd_ioctl(dev: &v3d->drm, cfg5: args->cfg[`5`], cfg6: args->cfg[`6`]);
921
922	if (args->pad)
923	return -EINVAL;
924
925	if (!v3d_has_csd(v3d)) {
926	DRM_DEBUG("Attempting CSD submit on non-CSD hardware\n");
927	return -EINVAL;
928	}
929
930	if (args->flags && !(args->flags & DRM_V3D_SUBMIT_EXTENSION)) {
931	DRM_INFO("invalid flags: %d\n", args->flags);
932	return -EINVAL;
933	}
934
935	if (args->flags & DRM_V3D_SUBMIT_EXTENSION) {
936	ret = v3d_get_extensions(file_priv, ext_handles: args->extensions, data: &se);
937	if (ret) {
938	DRM_DEBUG("Failed to get extensions.\n");
939	return ret;
940	}
941	}
942
943	ret = v3d_job_init(v3d, file_priv, container: (void )&job, size: sizeof(job),
944	free: v3d_job_free, in_sync: args->in_sync, se: &se, queue: V3D_CSD);
945	if (ret)
946	goto fail;
947
948	ret = v3d_job_init(v3d, file_priv, container: (void )&clean_job, size: sizeof(clean_job),
949	free: v3d_job_free, in_sync: `0`, NULL, queue: V3D_CACHE_CLEAN);
950	if (ret)
951	goto fail;
952
953	job->args = *args;
954
955	ret = v3d_lookup_bos(dev, file_priv, job: clean_job,
956	bo_handles: args->bo_handles, bo_count: args->bo_handle_count);
957	if (ret)
958	goto fail;
959
960	ret = v3d_lock_bo_reservations(job: clean_job, acquire_ctx: &acquire_ctx);
961	if (ret)
962	goto fail;
963
964	if (args->perfmon_id) {
965	job->base.perfmon = v3d_perfmon_find(v3d_priv,
966	id: args->perfmon_id);
967	if (!job->base.perfmon) {
968	ret = -ENOENT;
969	goto fail_perfmon;
970	}
971	}
972
973	mutex_lock(&v3d->sched_lock);
974	v3d_push_job(job: &job->base);
975
976	ret = drm_sched_job_add_dependency(job: &clean_job->base,
977	fence: dma_fence_get(fence: job->base.done_fence));
978	if (ret)
979	goto fail_unreserve;
980
981	v3d_push_job(job: clean_job);
982	mutex_unlock(lock: &v3d->sched_lock);
983
984	v3d_attach_fences_and_unlock_reservation(file_priv,
985	job: clean_job,
986	acquire_ctx: &acquire_ctx,
987	out_sync: args->out_sync,
988	se: &se,
989	done_fence: clean_job->done_fence);
990
991	v3d_job_put(job: &job->base);
992	v3d_job_put(job: clean_job);
993
994	return `0`;
995
996	fail_unreserve:
997	mutex_unlock(lock: &v3d->sched_lock);
998	fail_perfmon:
999	drm_gem_unlock_reservations(objs: clean_job->bo, count: clean_job->bo_count,
1000	acquire_ctx: &acquire_ctx);
1001	fail:
1002	v3d_job_cleanup(job: (void *)job);
1003	v3d_job_cleanup(job: clean_job);
1004	v3d_put_multisync_post_deps(se: &se);
1005
1006	return ret;
1007	}
1008
1009	int
1010	v3d_gem_init(struct drm_device *dev)
1011	{
1012	struct v3d_dev *v3d = to_v3d_dev(dev);
1013	u32 pt_size = `4096` * `1024`;
1014	int ret, i;
1015
1016	for (i = `0`; i < V3D_MAX_QUEUES; i++)
1017	v3d->queue[i].fence_context = dma_fence_context_alloc(num: `1`);
1018
1019	spin_lock_init(&v3d->mm_lock);
1020	spin_lock_init(&v3d->job_lock);
1021	ret = drmm_mutex_init(dev, &v3d->bo_lock);
1022	if (ret)
1023	return ret;
1024	ret = drmm_mutex_init(dev, &v3d->reset_lock);
1025	if (ret)
1026	return ret;
1027	ret = drmm_mutex_init(dev, &v3d->sched_lock);
1028	if (ret)
1029	return ret;
1030	ret = drmm_mutex_init(dev, &v3d->cache_clean_lock);
1031	if (ret)
1032	return ret;
1033
1034	/ Note: We don't allocate address 0. Various bits of HW*
1035	* treat 0 as special, such as the occlusion query counters
1036	* where 0 means "disabled".
1037	*/
1038	drm_mm_init(mm: &v3d->mm, start: `1`, size: pt_size / sizeof(u32) - `1`);
1039
1040	v3d->pt = dma_alloc_wc(dev: v3d->drm.dev, size: pt_size,
1041	dma_addr: &v3d->pt_paddr,
1042	GFP_KERNEL \| __GFP_NOWARN \| __GFP_ZERO);
1043	if (!v3d->pt) {
1044	drm_mm_takedown(mm: &v3d->mm);
1045	dev_err(v3d->drm.dev,
1046	"Failed to allocate page tables. Please ensure you have DMA enabled.\n");
1047	return -ENOMEM;
1048	}
1049
1050	v3d_init_hw_state(v3d);
1051	v3d_mmu_set_page_table(v3d);
1052
1053	ret = v3d_sched_init(v3d);
1054	if (ret) {
1055	drm_mm_takedown(mm: &v3d->mm);
1056	dma_free_coherent(dev: v3d->drm.dev, size: `4096` * `1024`, cpu_addr: (void *)v3d->pt,
1057	dma_handle: v3d->pt_paddr);
1058	}
1059
1060	return `0`;
1061	}
1062
1063	void
1064	v3d_gem_destroy(struct drm_device *dev)
1065	{
1066	struct v3d_dev *v3d = to_v3d_dev(dev);
1067
1068	v3d_sched_fini(v3d);
1069
1070	/ Waiting for jobs to finish would need to be done before*
1071	* unregistering V3D.
1072	*/
1073	WARN_ON(v3d->bin_job);
1074	WARN_ON(v3d->render_job);
1075
1076	drm_mm_takedown(mm: &v3d->mm);
1077
1078	dma_free_coherent(dev: v3d->drm.dev, size: `4096` * `1024`, cpu_addr: (void *)v3d->pt,
1079	dma_handle: v3d->pt_paddr);
1080	}
1081

source code of linux/drivers/gpu/drm/v3d/v3d_gem.c