vc4_gem.c source code [linux/drivers/gpu/drm/vc4/vc4_gem.c]

1	/*
2	* Copyright © 2014 Broadcom
3	*
4	* Permission is hereby granted, free of charge, to any person obtaining a
5	* copy of this software and associated documentation files (the "Software"),
6	* to deal in the Software without restriction, including without limitation
7	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8	* and/or sell copies of the Software, and to permit persons to whom the
9	* Software is furnished to do so, subject to the following conditions:
10	*
11	* The above copyright notice and this permission notice (including the next
12	* paragraph) shall be included in all copies or substantial portions of the
13	* Software.
14	*
15	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21	* IN THE SOFTWARE.
22	*/
23
24	#include <linux/module.h>
25	#include <linux/platform_device.h>
26	#include <linux/pm_runtime.h>
27	#include <linux/device.h>
28	#include <linux/io.h>
29	#include <linux/sched/signal.h>
30	#include <linux/dma-fence-array.h>
31
32	#include <drm/drm_syncobj.h>
33
34	#include "uapi/drm/vc4_drm.h"
35	#include "vc4_drv.h"
36	#include "vc4_regs.h"
37	#include "vc4_trace.h"
38
39	static void
40	vc4_queue_hangcheck(struct drm_device *dev)
41	{
42	struct vc4_dev *vc4 = to_vc4_dev(dev);
43
44	mod_timer(timer: &vc4->hangcheck.timer,
45	expires: round_jiffies_up(j: jiffies + msecs_to_jiffies(m: `100`)));
46	}
47
48	struct vc4_hang_state {
49	struct drm_vc4_get_hang_state user_state;
50
51	u32 bo_count;
52	struct drm_gem_object **bo;
53	};
54
55	static void
56	vc4_free_hang_state(struct drm_device dev, struct* vc4_hang_state *state)
57	{
58	unsigned int i;
59
60	for (i = `0`; i < state->user_state.bo_count; i++)
61	drm_gem_object_put(obj: state->bo[i]);
62
63	kfree(objp: state);
64	}
65
66	int
67	vc4_get_hang_state_ioctl(struct drm_device dev, void* *data,
68	struct drm_file *file_priv)
69	{
70	struct drm_vc4_get_hang_state *get_state = data;
71	struct drm_vc4_get_hang_state_bo *bo_state;
72	struct vc4_hang_state *kernel_state;
73	struct drm_vc4_get_hang_state *state;
74	struct vc4_dev *vc4 = to_vc4_dev(dev);
75	unsigned long irqflags;
76	u32 i;
77	int ret = `0`;
78
79	if (WARN_ON_ONCE(vc4->is_vc5))
80	return -ENODEV;
81
82	if (!vc4->v3d) {
83	DRM_DEBUG("VC4_GET_HANG_STATE with no VC4 V3D probed\n");
84	return -ENODEV;
85	}
86
87	spin_lock_irqsave(&vc4->job_lock, irqflags);
88	kernel_state = vc4->hang_state;
89	if (!kernel_state) {
90	spin_unlock_irqrestore(lock: &vc4->job_lock, flags: irqflags);
91	return -ENOENT;
92	}
93	state = &kernel_state->user_state;
94
95	/ If the user's array isn't big enough, just return the*
96	* required array size.
97	*/
98	if (get_state->bo_count < state->bo_count) {
99	get_state->bo_count = state->bo_count;
100	spin_unlock_irqrestore(lock: &vc4->job_lock, flags: irqflags);
101	return `0`;
102	}
103
104	vc4->hang_state = NULL;
105	spin_unlock_irqrestore(lock: &vc4->job_lock, flags: irqflags);
106
107	/ Save the user's BO pointer, so we don't stomp it with the memcpy. /
108	state->bo = get_state->bo;
109	memcpy(get_state, state, sizeof(*state));
110
111	bo_state = kcalloc(n: state->bo_count, size: sizeof(*bo_state), GFP_KERNEL);
112	if (!bo_state) {
113	ret = -ENOMEM;
114	goto err_free;
115	}
116
117	for (i = `0`; i < state->bo_count; i++) {
118	struct vc4_bo *vc4_bo = to_vc4_bo(kernel_state->bo[i]);
119	u32 handle;
120
121	ret = drm_gem_handle_create(file_priv, obj: kernel_state->bo[i],
122	handlep: &handle);
123
124	if (ret) {
125	state->bo_count = i;
126	goto err_delete_handle;
127	}
128	bo_state[i].handle = handle;
129	bo_state[i].paddr = vc4_bo->base.dma_addr;
130	bo_state[i].size = vc4_bo->base.base.size;
131	}
132
133	if (copy_to_user(u64_to_user_ptr(get_state->bo),
134	from: bo_state,
135	n: state->bo_count * sizeof(*bo_state)))
136	ret = -EFAULT;
137
138	err_delete_handle:
139	if (ret) {
140	for (i = `0`; i < state->bo_count; i++)
141	drm_gem_handle_delete(filp: file_priv, handle: bo_state[i].handle);
142	}
143
144	err_free:
145	vc4_free_hang_state(dev, state: kernel_state);
146	kfree(objp: bo_state);
147
148	return ret;
149	}
150
151	static void
152	vc4_save_hang_state(struct drm_device *dev)
153	{
154	struct vc4_dev *vc4 = to_vc4_dev(dev);
155	struct drm_vc4_get_hang_state *state;
156	struct vc4_hang_state *kernel_state;
157	struct vc4_exec_info *exec[`2`];
158	struct vc4_bo *bo;
159	unsigned long irqflags;
160	unsigned int i, j, k, unref_list_count;
161
162	kernel_state = kcalloc(n: `1`, size: sizeof(*kernel_state), GFP_KERNEL);
163	if (!kernel_state)
164	return;
165
166	state = &kernel_state->user_state;
167
168	spin_lock_irqsave(&vc4->job_lock, irqflags);
169	exec[`0`] = vc4_first_bin_job(vc4);
170	exec[`1`] = vc4_first_render_job(vc4);
171	if (!exec[`0`] && !exec[`1`]) {
172	spin_unlock_irqrestore(lock: &vc4->job_lock, flags: irqflags);
173	return;
174	}
175
176	/ Get the bos from both binner and renderer into hang state. /
177	state->bo_count = `0`;
178	for (i = `0`; i < `2`; i++) {
179	if (!exec[i])
180	continue;
181
182	unref_list_count = `0`;
183	list_for_each_entry(bo, &exec[i]->unref_list, unref_head)
184	unref_list_count++;
185	state->bo_count += exec[i]->bo_count + unref_list_count;
186	}
187
188	kernel_state->bo = kcalloc(n: state->bo_count,
189	size: sizeof(*kernel_state->bo), GFP_ATOMIC);
190
191	if (!kernel_state->bo) {
192	spin_unlock_irqrestore(lock: &vc4->job_lock, flags: irqflags);
193	return;
194	}
195
196	k = `0`;
197	for (i = `0`; i < `2`; i++) {
198	if (!exec[i])
199	continue;
200
201	for (j = `0`; j < exec[i]->bo_count; j++) {
202	bo = to_vc4_bo(exec[i]->bo[j]);
203
204	/ Retain BOs just in case they were marked purgeable.*
205	* This prevents the BO from being purged before
206	* someone had a chance to dump the hang state.
207	*/
208	WARN_ON(!refcount_read(&bo->usecnt));
209	refcount_inc(r: &bo->usecnt);
210	drm_gem_object_get(obj: exec[i]->bo[j]);
211	kernel_state->bo[k++] = exec[i]->bo[j];
212	}
213
214	list_for_each_entry(bo, &exec[i]->unref_list, unref_head) {
215	/ No need to retain BOs coming from the ->unref_list*
216	* because they are naturally unpurgeable.
217	*/
218	drm_gem_object_get(obj: &bo->base.base);
219	kernel_state->bo[k++] = &bo->base.base;
220	}
221	}
222
223	WARN_ON_ONCE(k != state->bo_count);
224
225	if (exec[`0`])
226	state->start_bin = exec[`0`]->ct0ca;
227	if (exec[`1`])
228	state->start_render = exec[`1`]->ct1ca;
229
230	spin_unlock_irqrestore(lock: &vc4->job_lock, flags: irqflags);
231
232	state->ct0ca = V3D_READ(V3D_CTNCA(`0`));
233	state->ct0ea = V3D_READ(V3D_CTNEA(`0`));
234
235	state->ct1ca = V3D_READ(V3D_CTNCA(`1`));
236	state->ct1ea = V3D_READ(V3D_CTNEA(`1`));
237
238	state->ct0cs = V3D_READ(V3D_CTNCS(`0`));
239	state->ct1cs = V3D_READ(V3D_CTNCS(`1`));
240
241	state->ct0ra0 = V3D_READ(V3D_CT00RA0);
242	state->ct1ra0 = V3D_READ(V3D_CT01RA0);
243
244	state->bpca = V3D_READ(V3D_BPCA);
245	state->bpcs = V3D_READ(V3D_BPCS);
246	state->bpoa = V3D_READ(V3D_BPOA);
247	state->bpos = V3D_READ(V3D_BPOS);
248
249	state->vpmbase = V3D_READ(V3D_VPMBASE);
250
251	state->dbge = V3D_READ(V3D_DBGE);
252	state->fdbgo = V3D_READ(V3D_FDBGO);
253	state->fdbgb = V3D_READ(V3D_FDBGB);
254	state->fdbgr = V3D_READ(V3D_FDBGR);
255	state->fdbgs = V3D_READ(V3D_FDBGS);
256	state->errstat = V3D_READ(V3D_ERRSTAT);
257
258	/ We need to turn purgeable BOs into unpurgeable ones so that*
259	* userspace has a chance to dump the hang state before the kernel
260	* decides to purge those BOs.
261	* Note that BO consistency at dump time cannot be guaranteed. For
262	* example, if the owner of these BOs decides to re-use them or mark
263	* them purgeable again there's nothing we can do to prevent it.
264	*/
265	for (i = `0`; i < kernel_state->user_state.bo_count; i++) {
266	struct vc4_bo *bo = to_vc4_bo(kernel_state->bo[i]);
267
268	if (bo->madv == __VC4_MADV_NOTSUPP)
269	continue;
270
271	mutex_lock(&bo->madv_lock);
272	if (!WARN_ON(bo->madv == __VC4_MADV_PURGED))
273	bo->madv = VC4_MADV_WILLNEED;
274	refcount_dec(r: &bo->usecnt);
275	mutex_unlock(lock: &bo->madv_lock);
276	}
277
278	spin_lock_irqsave(&vc4->job_lock, irqflags);
279	if (vc4->hang_state) {
280	spin_unlock_irqrestore(lock: &vc4->job_lock, flags: irqflags);
281	vc4_free_hang_state(dev, state: kernel_state);
282	} else {
283	vc4->hang_state = kernel_state;
284	spin_unlock_irqrestore(lock: &vc4->job_lock, flags: irqflags);
285	}
286	}
287
288	static void
289	vc4_reset(struct drm_device *dev)
290	{
291	struct vc4_dev *vc4 = to_vc4_dev(dev);
292
293	DRM_INFO("Resetting GPU.\n");
294
295	mutex_lock(&vc4->power_lock);
296	if (vc4->power_refcount) {
297	/ Power the device off and back on the by dropping the*
298	* reference on runtime PM.
299	*/
300	pm_runtime_put_sync_suspend(dev: &vc4->v3d->pdev->dev);
301	pm_runtime_get_sync(dev: &vc4->v3d->pdev->dev);
302	}
303	mutex_unlock(lock: &vc4->power_lock);
304
305	vc4_irq_reset(dev);
306
307	/ Rearm the hangcheck -- another job might have been waiting*
308	* for our hung one to get kicked off, and vc4_irq_reset()
309	* would have started it.
310	*/
311	vc4_queue_hangcheck(dev);
312	}
313
314	static void
315	vc4_reset_work(struct work_struct *work)
316	{
317	struct vc4_dev *vc4 =
318	container_of(work, struct vc4_dev, hangcheck.reset_work);
319
320	vc4_save_hang_state(dev: &vc4->base);
321
322	vc4_reset(dev: &vc4->base);
323	}
324
325	static void
326	vc4_hangcheck_elapsed(struct timer_list *t)
327	{
328	struct vc4_dev *vc4 = from_timer(vc4, t, hangcheck.timer);
329	struct drm_device *dev = &vc4->base;
330	uint32_t ct0ca, ct1ca;
331	unsigned long irqflags;
332	struct vc4_exec_info bin_exec, render_exec;
333
334	spin_lock_irqsave(&vc4->job_lock, irqflags);
335
336	bin_exec = vc4_first_bin_job(vc4);
337	render_exec = vc4_first_render_job(vc4);
338
339	/ If idle, we can stop watching for hangs. /
340	if (!bin_exec && !render_exec) {
341	spin_unlock_irqrestore(lock: &vc4->job_lock, flags: irqflags);
342	return;
343	}
344
345	ct0ca = V3D_READ(V3D_CTNCA(`0`));
346	ct1ca = V3D_READ(V3D_CTNCA(`1`));
347
348	/ If we've made any progress in execution, rearm the timer*
349	* and wait.
350	*/
351	if ((bin_exec && ct0ca != bin_exec->last_ct0ca) \|\|
352	(render_exec && ct1ca != render_exec->last_ct1ca)) {
353	if (bin_exec)
354	bin_exec->last_ct0ca = ct0ca;
355	if (render_exec)
356	render_exec->last_ct1ca = ct1ca;
357	spin_unlock_irqrestore(lock: &vc4->job_lock, flags: irqflags);
358	vc4_queue_hangcheck(dev);
359	return;
360	}
361
362	spin_unlock_irqrestore(lock: &vc4->job_lock, flags: irqflags);
363
364	/ We've gone too long with no progress, reset. This has to*
365	* be done from a work struct, since resetting can sleep and
366	* this timer hook isn't allowed to.
367	*/
368	schedule_work(work: &vc4->hangcheck.reset_work);
369	}
370
371	static void
372	submit_cl(struct drm_device *dev, uint32_t thread, uint32_t start, uint32_t end)
373	{
374	struct vc4_dev *vc4 = to_vc4_dev(dev);
375
376	/ Set the current and end address of the control list.*
377	* Writing the end register is what starts the job.
378	*/
379	V3D_WRITE(V3D_CTNCA(thread), start);
380	V3D_WRITE(V3D_CTNEA(thread), end);
381	}
382
383	int
384	vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno, uint64_t timeout_ns,
385	bool interruptible)
386	{
387	struct vc4_dev *vc4 = to_vc4_dev(dev);
388	int ret = `0`;
389	unsigned long timeout_expire;
390	DEFINE_WAIT(wait);
391
392	if (WARN_ON_ONCE(vc4->is_vc5))
393	return -ENODEV;
394
395	if (vc4->finished_seqno >= seqno)
396	return `0`;
397
398	if (timeout_ns == `0`)
399	return -ETIME;
400
401	timeout_expire = jiffies + nsecs_to_jiffies(n: timeout_ns);
402
403	trace_vc4_wait_for_seqno_begin(dev, seqno, timeout: timeout_ns);
404	for (;;) {
405	prepare_to_wait(wq_head: &vc4->job_wait_queue, wq_entry: &wait,
406	state: interruptible ? TASK_INTERRUPTIBLE :
407	TASK_UNINTERRUPTIBLE);
408
409	if (interruptible && signal_pending(current)) {
410	ret = -ERESTARTSYS;
411	break;
412	}
413
414	if (vc4->finished_seqno >= seqno)
415	break;
416
417	if (timeout_ns != ~`0ull`) {
418	if (time_after_eq(jiffies, timeout_expire)) {
419	ret = -ETIME;
420	break;
421	}
422	schedule_timeout(timeout: timeout_expire - jiffies);
423	} else {
424	schedule();
425	}
426	}
427
428	finish_wait(wq_head: &vc4->job_wait_queue, wq_entry: &wait);
429	trace_vc4_wait_for_seqno_end(dev, seqno);
430
431	return ret;
432	}
433
434	static void
435	vc4_flush_caches(struct drm_device *dev)
436	{
437	struct vc4_dev *vc4 = to_vc4_dev(dev);
438
439	/ Flush the GPU L2 caches. These caches sit on top of system*
440	* L3 (the 128kb or so shared with the CPU), and are
441	* non-allocating in the L3.
442	*/
443	V3D_WRITE(V3D_L2CACTL,
444	V3D_L2CACTL_L2CCLR);
445
446	V3D_WRITE(V3D_SLCACTL,
447	VC4_SET_FIELD(`0xf`, V3D_SLCACTL_T1CC) \|
448	VC4_SET_FIELD(`0xf`, V3D_SLCACTL_T0CC) \|
449	VC4_SET_FIELD(`0xf`, V3D_SLCACTL_UCC) \|
450	VC4_SET_FIELD(`0xf`, V3D_SLCACTL_ICC));
451	}
452
453	static void
454	vc4_flush_texture_caches(struct drm_device *dev)
455	{
456	struct vc4_dev *vc4 = to_vc4_dev(dev);
457
458	V3D_WRITE(V3D_L2CACTL,
459	V3D_L2CACTL_L2CCLR);
460
461	V3D_WRITE(V3D_SLCACTL,
462	VC4_SET_FIELD(`0xf`, V3D_SLCACTL_T1CC) \|
463	VC4_SET_FIELD(`0xf`, V3D_SLCACTL_T0CC));
464	}
465
466	/ Sets the registers for the next job to be actually be executed in*
467	* the hardware.
468	*
469	* The job_lock should be held during this.
470	*/
471	void
472	vc4_submit_next_bin_job(struct drm_device *dev)
473	{
474	struct vc4_dev *vc4 = to_vc4_dev(dev);
475	struct vc4_exec_info *exec;
476
477	if (WARN_ON_ONCE(vc4->is_vc5))
478	return;
479
480	again:
481	exec = vc4_first_bin_job(vc4);
482	if (!exec)
483	return;
484
485	vc4_flush_caches(dev);
486
487	/ Only start the perfmon if it was not already started by a previous*
488	* job.
489	*/
490	if (exec->perfmon && vc4->active_perfmon != exec->perfmon)
491	vc4_perfmon_start(vc4, perfmon: exec->perfmon);
492
493	/ Either put the job in the binner if it uses the binner, or*
494	* immediately move it to the to-be-rendered queue.
495	*/
496	if (exec->ct0ca != exec->ct0ea) {
497	trace_vc4_submit_cl(dev, is_render: false, seqno: exec->seqno, ctnqba: exec->ct0ca,
498	ctnqea: exec->ct0ea);
499	submit_cl(dev, thread: `0`, start: exec->ct0ca, end: exec->ct0ea);
500	} else {
501	struct vc4_exec_info *next;
502
503	vc4_move_job_to_render(dev, exec);
504	next = vc4_first_bin_job(vc4);
505
506	/ We can't start the next bin job if the previous job had a*
507	* different perfmon instance attached to it. The same goes
508	* if one of them had a perfmon attached to it and the other
509	* one doesn't.
510	*/
511	if (next && next->perfmon == exec->perfmon)
512	goto again;
513	}
514	}
515
516	void
517	vc4_submit_next_render_job(struct drm_device *dev)
518	{
519	struct vc4_dev *vc4 = to_vc4_dev(dev);
520	struct vc4_exec_info *exec = vc4_first_render_job(vc4);
521
522	if (!exec)
523	return;
524
525	if (WARN_ON_ONCE(vc4->is_vc5))
526	return;
527
528	/ A previous RCL may have written to one of our textures, and*
529	* our full cache flush at bin time may have occurred before
530	* that RCL completed. Flush the texture cache now, but not
531	* the instructions or uniforms (since we don't write those
532	* from an RCL).
533	*/
534	vc4_flush_texture_caches(dev);
535
536	trace_vc4_submit_cl(dev, is_render: true, seqno: exec->seqno, ctnqba: exec->ct1ca, ctnqea: exec->ct1ea);
537	submit_cl(dev, thread: `1`, start: exec->ct1ca, end: exec->ct1ea);
538	}
539
540	void
541	vc4_move_job_to_render(struct drm_device dev, struct* vc4_exec_info *exec)
542	{
543	struct vc4_dev *vc4 = to_vc4_dev(dev);
544	bool was_empty = list_empty(head: &vc4->render_job_list);
545
546	if (WARN_ON_ONCE(vc4->is_vc5))
547	return;
548
549	list_move_tail(list: &exec->head, head: &vc4->render_job_list);
550	if (was_empty)
551	vc4_submit_next_render_job(dev);
552	}
553
554	static void
555	vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno)
556	{
557	struct vc4_bo *bo;
558	unsigned i;
559
560	for (i = `0`; i < exec->bo_count; i++) {
561	bo = to_vc4_bo(exec->bo[i]);
562	bo->seqno = seqno;
563
564	dma_resv_add_fence(obj: bo->base.base.resv, fence: exec->fence,
565	usage: DMA_RESV_USAGE_READ);
566	}
567
568	list_for_each_entry(bo, &exec->unref_list, unref_head) {
569	bo->seqno = seqno;
570	}
571
572	for (i = `0`; i < exec->rcl_write_bo_count; i++) {
573	bo = to_vc4_bo(&exec->rcl_write_bo[i]->base);
574	bo->write_seqno = seqno;
575
576	dma_resv_add_fence(obj: bo->base.base.resv, fence: exec->fence,
577	usage: DMA_RESV_USAGE_WRITE);
578	}
579	}
580
581	static void
582	vc4_unlock_bo_reservations(struct drm_device *dev,
583	struct vc4_exec_info *exec,
584	struct ww_acquire_ctx *acquire_ctx)
585	{
586	int i;
587
588	for (i = `0`; i < exec->bo_count; i++)
589	dma_resv_unlock(obj: exec->bo[i]->resv);
590
591	ww_acquire_fini(ctx: acquire_ctx);
592	}
593
594	/ Takes the reservation lock on all the BOs being referenced, so that*
595	* at queue submit time we can update the reservations.
596	*
597	* We don't lock the RCL the tile alloc/state BOs, or overflow memory
598	* (all of which are on exec->unref_list). They're entirely private
599	* to vc4, so we don't attach dma-buf fences to them.
600	*/
601	static int
602	vc4_lock_bo_reservations(struct drm_device *dev,
603	struct vc4_exec_info *exec,
604	struct ww_acquire_ctx *acquire_ctx)
605	{
606	int contended_lock = -`1`;
607	int i, ret;
608	struct drm_gem_object *bo;
609
610	ww_acquire_init(ctx: acquire_ctx, ww_class: &reservation_ww_class);
611
612	retry:
613	if (contended_lock != -`1`) {
614	bo = exec->bo[contended_lock];
615	ret = dma_resv_lock_slow_interruptible(obj: bo->resv, ctx: acquire_ctx);
616	if (ret) {
617	ww_acquire_done(ctx: acquire_ctx);
618	return ret;
619	}
620	}
621
622	for (i = `0`; i < exec->bo_count; i++) {
623	if (i == contended_lock)
624	continue;
625
626	bo = exec->bo[i];
627
628	ret = dma_resv_lock_interruptible(obj: bo->resv, ctx: acquire_ctx);
629	if (ret) {
630	int j;
631
632	for (j = `0`; j < i; j++) {
633	bo = exec->bo[j];
634	dma_resv_unlock(obj: bo->resv);
635	}
636
637	if (contended_lock != -`1` && contended_lock >= i) {
638	bo = exec->bo[contended_lock];
639
640	dma_resv_unlock(obj: bo->resv);
641	}
642
643	if (ret == -EDEADLK) {
644	contended_lock = i;
645	goto retry;
646	}
647
648	ww_acquire_done(ctx: acquire_ctx);
649	return ret;
650	}
651	}
652
653	ww_acquire_done(ctx: acquire_ctx);
654
655	/ Reserve space for our shared (read-only) fence references,*
656	* before we commit the CL to the hardware.
657	*/
658	for (i = `0`; i < exec->bo_count; i++) {
659	bo = exec->bo[i];
660
661	ret = dma_resv_reserve_fences(obj: bo->resv, num_fences: `1`);
662	if (ret) {
663	vc4_unlock_bo_reservations(dev, exec, acquire_ctx);
664	return ret;
665	}
666	}
667
668	return `0`;
669	}
670
671	/ Queues a struct vc4_exec_info for execution. If no job is*
672	* currently executing, then submits it.
673	*
674	* Unlike most GPUs, our hardware only handles one command list at a
675	* time. To queue multiple jobs at once, we'd need to edit the
676	* previous command list to have a jump to the new one at the end, and
677	* then bump the end address. That's a change for a later date,
678	* though.
679	*/
680	static int
681	vc4_queue_submit(struct drm_device dev, struct* vc4_exec_info *exec,
682	struct ww_acquire_ctx *acquire_ctx,
683	struct drm_syncobj *out_sync)
684	{
685	struct vc4_dev *vc4 = to_vc4_dev(dev);
686	struct vc4_exec_info *renderjob;
687	uint64_t seqno;
688	unsigned long irqflags;
689	struct vc4_fence *fence;
690
691	fence = kzalloc(size: sizeof(*fence), GFP_KERNEL);
692	if (!fence)
693	return -ENOMEM;
694	fence->dev = dev;
695
696	spin_lock_irqsave(&vc4->job_lock, irqflags);
697
698	seqno = ++vc4->emit_seqno;
699	exec->seqno = seqno;
700
701	dma_fence_init(fence: &fence->base, ops: &vc4_fence_ops, lock: &vc4->job_lock,
702	context: vc4->dma_fence_context, seqno: exec->seqno);
703	fence->seqno = exec->seqno;
704	exec->fence = &fence->base;
705
706	if (out_sync)
707	drm_syncobj_replace_fence(syncobj: out_sync, fence: exec->fence);
708
709	vc4_update_bo_seqnos(exec, seqno);
710
711	vc4_unlock_bo_reservations(dev, exec, acquire_ctx);
712
713	list_add_tail(new: &exec->head, head: &vc4->bin_job_list);
714
715	/ If no bin job was executing and if the render job (if any) has the*
716	* same perfmon as our job attached to it (or if both jobs don't have
717	* perfmon activated), then kick ours off. Otherwise, it'll get
718	* started when the previous job's flush/render done interrupt occurs.
719	*/
720	renderjob = vc4_first_render_job(vc4);
721	if (vc4_first_bin_job(vc4) == exec &&
722	(!renderjob \|\| renderjob->perfmon == exec->perfmon)) {
723	vc4_submit_next_bin_job(dev);
724	vc4_queue_hangcheck(dev);
725	}
726
727	spin_unlock_irqrestore(lock: &vc4->job_lock, flags: irqflags);
728
729	return `0`;
730	}
731
732	/**
733	* vc4_cl_lookup_bos() - Sets up exec->bo[] with the GEM objects
734	* referenced by the job.
735	* @dev: DRM device
736	* @file_priv: DRM file for this fd
737	* @exec: V3D job being set up
738	*
739	* The command validator needs to reference BOs by their index within
740	* the submitted job's BO list. This does the validation of the job's
741	* BO list and reference counting for the lifetime of the job.
742	*/
743	static int
744	vc4_cl_lookup_bos(struct drm_device *dev,
745	struct drm_file *file_priv,
746	struct vc4_exec_info *exec)
747	{
748	struct drm_vc4_submit_cl *args = exec->args;
749	int ret = `0`;
750	int i;
751
752	exec->bo_count = args->bo_handle_count;
753
754	if (!exec->bo_count) {
755	/ See comment on bo_index for why we have to check*
756	* this.
757	*/
758	DRM_DEBUG("Rendering requires BOs to validate\n");
759	return -EINVAL;
760	}
761
762	ret = drm_gem_objects_lookup(filp: file_priv, u64_to_user_ptr(args->bo_handles),
763	count: exec->bo_count, objs_out: &exec->bo);
764
765	if (ret)
766	goto fail_put_bo;
767
768	for (i = `0`; i < exec->bo_count; i++) {
769	ret = vc4_bo_inc_usecnt(to_vc4_bo(exec->bo[i]));
770	if (ret)
771	goto fail_dec_usecnt;
772	}
773
774	return `0`;
775
776	fail_dec_usecnt:
777	/ Decrease usecnt on acquired objects.*
778	* We cannot rely on vc4_complete_exec() to release resources here,
779	* because vc4_complete_exec() has no information about which BO has
780	* had its ->usecnt incremented.
781	* To make things easier we just free everything explicitly and set
782	* exec->bo to NULL so that vc4_complete_exec() skips the 'BO release'
783	* step.
784	*/
785	for (i-- ; i >= `0`; i--)
786	vc4_bo_dec_usecnt(to_vc4_bo(exec->bo[i]));
787
788	fail_put_bo:
789	/ Release any reference to acquired objects. /
790	for (i = `0`; i < exec->bo_count && exec->bo[i]; i++)
791	drm_gem_object_put(obj: exec->bo[i]);
792
793	kvfree(addr: exec->bo);
794	exec->bo = NULL;
795	return ret;
796	}
797
798	static int
799	vc4_get_bcl(struct drm_device dev, struct* vc4_exec_info *exec)
800	{
801	struct drm_vc4_submit_cl *args = exec->args;
802	struct vc4_dev *vc4 = to_vc4_dev(dev);
803	void *temp = NULL;
804	void *bin;
805	int ret = `0`;
806	uint32_t bin_offset = `0`;
807	uint32_t shader_rec_offset = roundup(bin_offset + args->bin_cl_size,
808	`16`);
809	uint32_t uniforms_offset = shader_rec_offset + args->shader_rec_size;
810	uint32_t exec_size = uniforms_offset + args->uniforms_size;
811	uint32_t temp_size = exec_size + (sizeof(struct vc4_shader_state) *
812	args->shader_rec_count);
813	struct vc4_bo *bo;
814
815	if (shader_rec_offset < args->bin_cl_size \|\|
816	uniforms_offset < shader_rec_offset \|\|
817	exec_size < uniforms_offset \|\|
818	args->shader_rec_count >= (UINT_MAX /
819	sizeof(struct vc4_shader_state)) \|\|
820	temp_size < exec_size) {
821	DRM_DEBUG("overflow in exec arguments\n");
822	ret = -EINVAL;
823	goto fail;
824	}
825
826	/ Allocate space where we'll store the copied in user command lists*
827	* and shader records.
828	*
829	* We don't just copy directly into the BOs because we need to
830	* read the contents back for validation, and I think the
831	* bo->vaddr is uncached access.
832	*/
833	temp = kvmalloc_array(n: temp_size, size: `1`, GFP_KERNEL);
834	if (!temp) {
835	DRM_ERROR("Failed to allocate storage for copying "
836	"in bin/render CLs.\n");
837	ret = -ENOMEM;
838	goto fail;
839	}
840	bin = temp + bin_offset;
841	exec->shader_rec_u = temp + shader_rec_offset;
842	exec->uniforms_u = temp + uniforms_offset;
843	exec->shader_state = temp + exec_size;
844	exec->shader_state_size = args->shader_rec_count;
845
846	if (copy_from_user(to: bin,
847	u64_to_user_ptr(args->bin_cl),
848	n: args->bin_cl_size)) {
849	ret = -EFAULT;
850	goto fail;
851	}
852
853	if (copy_from_user(to: exec->shader_rec_u,
854	u64_to_user_ptr(args->shader_rec),
855	n: args->shader_rec_size)) {
856	ret = -EFAULT;
857	goto fail;
858	}
859
860	if (copy_from_user(to: exec->uniforms_u,
861	u64_to_user_ptr(args->uniforms),
862	n: args->uniforms_size)) {
863	ret = -EFAULT;
864	goto fail;
865	}
866
867	bo = vc4_bo_create(dev, size: exec_size, from_cache: true, type: VC4_BO_TYPE_BCL);
868	if (IS_ERR(ptr: bo)) {
869	DRM_ERROR("Couldn't allocate BO for binning\n");
870	ret = PTR_ERR(ptr: bo);
871	goto fail;
872	}
873	exec->exec_bo = &bo->base;
874
875	list_add_tail(new: &to_vc4_bo(&exec->exec_bo->base)->unref_head,
876	head: &exec->unref_list);
877
878	exec->ct0ca = exec->exec_bo->dma_addr + bin_offset;
879
880	exec->bin_u = bin;
881
882	exec->shader_rec_v = exec->exec_bo->vaddr + shader_rec_offset;
883	exec->shader_rec_p = exec->exec_bo->dma_addr + shader_rec_offset;
884	exec->shader_rec_size = args->shader_rec_size;
885
886	exec->uniforms_v = exec->exec_bo->vaddr + uniforms_offset;
887	exec->uniforms_p = exec->exec_bo->dma_addr + uniforms_offset;
888	exec->uniforms_size = args->uniforms_size;
889
890	ret = vc4_validate_bin_cl(dev,
891	validated: exec->exec_bo->vaddr + bin_offset,
892	unvalidated: bin,
893	exec);
894	if (ret)
895	goto fail;
896
897	ret = vc4_validate_shader_recs(dev, exec);
898	if (ret)
899	goto fail;
900
901	if (exec->found_tile_binning_mode_config_packet) {
902	ret = vc4_v3d_bin_bo_get(vc4, used: &exec->bin_bo_used);
903	if (ret)
904	goto fail;
905	}
906
907	/ Block waiting on any previous rendering into the CS's VBO,*
908	* IB, or textures, so that pixels are actually written by the
909	* time we try to read them.
910	*/
911	ret = vc4_wait_for_seqno(dev, seqno: exec->bin_dep_seqno, timeout_ns: ~`0ull`, interruptible: true);
912
913	fail:
914	kvfree(addr: temp);
915	return ret;
916	}
917
918	static void
919	vc4_complete_exec(struct drm_device dev, struct* vc4_exec_info *exec)
920	{
921	struct vc4_dev *vc4 = to_vc4_dev(dev);
922	unsigned long irqflags;
923	unsigned i;
924
925	/ If we got force-completed because of GPU reset rather than*
926	* through our IRQ handler, signal the fence now.
927	*/
928	if (exec->fence) {
929	dma_fence_signal(fence: exec->fence);
930	dma_fence_put(fence: exec->fence);
931	}
932
933	if (exec->bo) {
934	for (i = `0`; i < exec->bo_count; i++) {
935	struct vc4_bo *bo = to_vc4_bo(exec->bo[i]);
936
937	vc4_bo_dec_usecnt(bo);
938	drm_gem_object_put(obj: exec->bo[i]);
939	}
940	kvfree(addr: exec->bo);
941	}
942
943	while (!list_empty(head: &exec->unref_list)) {
944	struct vc4_bo *bo = list_first_entry(&exec->unref_list,
945	struct vc4_bo, unref_head);
946	list_del(entry: &bo->unref_head);
947	drm_gem_object_put(obj: &bo->base.base);
948	}
949
950	/ Free up the allocation of any bin slots we used. /
951	spin_lock_irqsave(&vc4->job_lock, irqflags);
952	vc4->bin_alloc_used &= ~exec->bin_slots;
953	spin_unlock_irqrestore(lock: &vc4->job_lock, flags: irqflags);
954
955	/ Release the reference on the binner BO if needed. /
956	if (exec->bin_bo_used)
957	vc4_v3d_bin_bo_put(vc4);
958
959	/ Release the reference we had on the perf monitor. /
960	vc4_perfmon_put(perfmon: exec->perfmon);
961
962	vc4_v3d_pm_put(vc4);
963
964	kfree(objp: exec);
965	}
966
967	void
968	vc4_job_handle_completed(struct vc4_dev *vc4)
969	{
970	unsigned long irqflags;
971	struct vc4_seqno_cb cb, cb_temp;
972
973	if (WARN_ON_ONCE(vc4->is_vc5))
974	return;
975
976	spin_lock_irqsave(&vc4->job_lock, irqflags);
977	while (!list_empty(head: &vc4->job_done_list)) {
978	struct vc4_exec_info *exec =
979	list_first_entry(&vc4->job_done_list,
980	struct vc4_exec_info, head);
981	list_del(entry: &exec->head);
982
983	spin_unlock_irqrestore(lock: &vc4->job_lock, flags: irqflags);
984	vc4_complete_exec(dev: &vc4->base, exec);
985	spin_lock_irqsave(&vc4->job_lock, irqflags);
986	}
987
988	list_for_each_entry_safe(cb, cb_temp, &vc4->seqno_cb_list, work.entry) {
989	if (cb->seqno <= vc4->finished_seqno) {
990	list_del_init(entry: &cb->work.entry);
991	schedule_work(work: &cb->work);
992	}
993	}
994
995	spin_unlock_irqrestore(lock: &vc4->job_lock, flags: irqflags);
996	}
997
998	static void vc4_seqno_cb_work(struct work_struct *work)
999	{
1000	struct vc4_seqno_cb cb = container_of(work, struct* vc4_seqno_cb, work);
1001
1002	cb->func(cb);
1003	}
1004
1005	int vc4_queue_seqno_cb(struct drm_device *dev,
1006	struct vc4_seqno_cb *cb, uint64_t seqno,
1007	void (func)(struct* vc4_seqno_cb *cb))
1008	{
1009	struct vc4_dev *vc4 = to_vc4_dev(dev);
1010	unsigned long irqflags;
1011
1012	if (WARN_ON_ONCE(vc4->is_vc5))
1013	return -ENODEV;
1014
1015	cb->func = func;
1016	INIT_WORK(&cb->work, vc4_seqno_cb_work);
1017
1018	spin_lock_irqsave(&vc4->job_lock, irqflags);
1019	if (seqno > vc4->finished_seqno) {
1020	cb->seqno = seqno;
1021	list_add_tail(new: &cb->work.entry, head: &vc4->seqno_cb_list);
1022	} else {
1023	schedule_work(work: &cb->work);
1024	}
1025	spin_unlock_irqrestore(lock: &vc4->job_lock, flags: irqflags);
1026
1027	return `0`;
1028	}
1029
1030	/ Scheduled when any job has been completed, this walks the list of*
1031	* jobs that had completed and unrefs their BOs and frees their exec
1032	* structs.
1033	*/
1034	static void
1035	vc4_job_done_work(struct work_struct *work)
1036	{
1037	struct vc4_dev *vc4 =
1038	container_of(work, struct vc4_dev, job_done_work);
1039
1040	vc4_job_handle_completed(vc4);
1041	}
1042
1043	static int
1044	vc4_wait_for_seqno_ioctl_helper(struct drm_device *dev,
1045	uint64_t seqno,
1046	uint64_t *timeout_ns)
1047	{
1048	unsigned long start = jiffies;
1049	int ret = vc4_wait_for_seqno(dev, seqno, timeout_ns: *timeout_ns, interruptible: true);
1050
1051	if ((ret == -EINTR \|\| ret == -ERESTARTSYS) && *timeout_ns != ~`0ull`) {
1052	uint64_t delta = jiffies_to_nsecs(j: jiffies - start);
1053
1054	if (*timeout_ns >= delta)
1055	*timeout_ns -= delta;
1056	}
1057
1058	return ret;
1059	}
1060
1061	int
1062	vc4_wait_seqno_ioctl(struct drm_device dev, void* *data,
1063	struct drm_file *file_priv)
1064	{
1065	struct vc4_dev *vc4 = to_vc4_dev(dev);
1066	struct drm_vc4_wait_seqno *args = data;
1067
1068	if (WARN_ON_ONCE(vc4->is_vc5))
1069	return -ENODEV;
1070
1071	return vc4_wait_for_seqno_ioctl_helper(dev, seqno: args->seqno,
1072	timeout_ns: &args->timeout_ns);
1073	}
1074
1075	int
1076	vc4_wait_bo_ioctl(struct drm_device dev, void* *data,
1077	struct drm_file *file_priv)
1078	{
1079	struct vc4_dev *vc4 = to_vc4_dev(dev);
1080	int ret;
1081	struct drm_vc4_wait_bo *args = data;
1082	struct drm_gem_object *gem_obj;
1083	struct vc4_bo *bo;
1084
1085	if (WARN_ON_ONCE(vc4->is_vc5))
1086	return -ENODEV;
1087
1088	if (args->pad != `0`)
1089	return -EINVAL;
1090
1091	gem_obj = drm_gem_object_lookup(filp: file_priv, handle: args->handle);
1092	if (!gem_obj) {
1093	DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle);
1094	return -EINVAL;
1095	}
1096	bo = to_vc4_bo(gem_obj);
1097
1098	ret = vc4_wait_for_seqno_ioctl_helper(dev, seqno: bo->seqno,
1099	timeout_ns: &args->timeout_ns);
1100
1101	drm_gem_object_put(obj: gem_obj);
1102	return ret;
1103	}
1104
1105	/**
1106	* vc4_submit_cl_ioctl() - Submits a job (frame) to the VC4.
1107	* @dev: DRM device
1108	* @data: ioctl argument
1109	* @file_priv: DRM file for this fd
1110	*
1111	* This is the main entrypoint for userspace to submit a 3D frame to
1112	* the GPU. Userspace provides the binner command list (if
1113	* applicable), and the kernel sets up the render command list to draw
1114	* to the framebuffer described in the ioctl, using the command lists
1115	* that the 3D engine's binner will produce.
1116	*/
1117	int
1118	vc4_submit_cl_ioctl(struct drm_device dev, void* *data,
1119	struct drm_file *file_priv)
1120	{
1121	struct vc4_dev *vc4 = to_vc4_dev(dev);
1122	struct vc4_file *vc4file = file_priv->driver_priv;
1123	struct drm_vc4_submit_cl *args = data;
1124	struct drm_syncobj *out_sync = NULL;
1125	struct vc4_exec_info *exec;
1126	struct ww_acquire_ctx acquire_ctx;
1127	struct dma_fence *in_fence;
1128	int ret = `0`;
1129
1130	trace_vc4_submit_cl_ioctl(dev, bin_cl_size: args->bin_cl_size,
1131	shader_rec_size: args->shader_rec_size,
1132	bo_count: args->bo_handle_count);
1133
1134	if (WARN_ON_ONCE(vc4->is_vc5))
1135	return -ENODEV;
1136
1137	if (!vc4->v3d) {
1138	DRM_DEBUG("VC4_SUBMIT_CL with no VC4 V3D probed\n");
1139	return -ENODEV;
1140	}
1141
1142	if ((args->flags & ~(VC4_SUBMIT_CL_USE_CLEAR_COLOR \|
1143	VC4_SUBMIT_CL_FIXED_RCL_ORDER \|
1144	VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X \|
1145	VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y)) != `0`) {
1146	DRM_DEBUG("Unknown flags: 0x%02x\n", args->flags);
1147	return -EINVAL;
1148	}
1149
1150	if (args->pad2 != `0`) {
1151	DRM_DEBUG("Invalid pad: 0x%08x\n", args->pad2);
1152	return -EINVAL;
1153	}
1154
1155	exec = kcalloc(n: `1`, size: sizeof(*exec), GFP_KERNEL);
1156	if (!exec) {
1157	DRM_ERROR("malloc failure on exec struct\n");
1158	return -ENOMEM;
1159	}
1160	exec->dev = vc4;
1161
1162	ret = vc4_v3d_pm_get(vc4);
1163	if (ret) {
1164	kfree(objp: exec);
1165	return ret;
1166	}
1167
1168	exec->args = args;
1169	INIT_LIST_HEAD(list: &exec->unref_list);
1170
1171	ret = vc4_cl_lookup_bos(dev, file_priv, exec);
1172	if (ret)
1173	goto fail;
1174
1175	if (args->perfmonid) {
1176	exec->perfmon = vc4_perfmon_find(vc4file,
1177	id: args->perfmonid);
1178	if (!exec->perfmon) {
1179	ret = -ENOENT;
1180	goto fail;
1181	}
1182	}
1183
1184	if (args->in_sync) {
1185	ret = drm_syncobj_find_fence(file_private: file_priv, handle: args->in_sync,
1186	point: `0`, flags: `0`, fence: &in_fence);
1187	if (ret)
1188	goto fail;
1189
1190	/ When the fence (or fence array) is exclusively from our*
1191	* context we can skip the wait since jobs are executed in
1192	* order of their submission through this ioctl and this can
1193	* only have fences from a prior job.
1194	*/
1195	if (!dma_fence_match_context(fence: in_fence,
1196	context: vc4->dma_fence_context)) {
1197	ret = dma_fence_wait(fence: in_fence, intr: true);
1198	if (ret) {
1199	dma_fence_put(fence: in_fence);
1200	goto fail;
1201	}
1202	}
1203
1204	dma_fence_put(fence: in_fence);
1205	}
1206
1207	if (exec->args->bin_cl_size != `0`) {
1208	ret = vc4_get_bcl(dev, exec);
1209	if (ret)
1210	goto fail;
1211	} else {
1212	exec->ct0ca = `0`;
1213	exec->ct0ea = `0`;
1214	}
1215
1216	ret = vc4_get_rcl(dev, exec);
1217	if (ret)
1218	goto fail;
1219
1220	ret = vc4_lock_bo_reservations(dev, exec, acquire_ctx: &acquire_ctx);
1221	if (ret)
1222	goto fail;
1223
1224	if (args->out_sync) {
1225	out_sync = drm_syncobj_find(file_private: file_priv, handle: args->out_sync);
1226	if (!out_sync) {
1227	ret = -EINVAL;
1228	goto fail;
1229	}
1230
1231	/ We replace the fence in out_sync in vc4_queue_submit since*
1232	* the render job could execute immediately after that call.
1233	* If it finishes before our ioctl processing resumes the
1234	* render job fence could already have been freed.
1235	*/
1236	}
1237
1238	/ Clear this out of the struct we'll be putting in the queue,*
1239	* since it's part of our stack.
1240	*/
1241	exec->args = NULL;
1242
1243	ret = vc4_queue_submit(dev, exec, acquire_ctx: &acquire_ctx, out_sync);
1244
1245	/ The syncobj isn't part of the exec data and we need to free our*
1246	* reference even if job submission failed.
1247	*/
1248	if (out_sync)
1249	drm_syncobj_put(obj: out_sync);
1250
1251	if (ret)
1252	goto fail;
1253
1254	/ Return the seqno for our job. /
1255	args->seqno = vc4->emit_seqno;
1256
1257	return `0`;
1258
1259	fail:
1260	vc4_complete_exec(dev: &vc4->base, exec);
1261
1262	return ret;
1263	}
1264
1265	static void vc4_gem_destroy(struct drm_device dev, void* *unused);
1266	int vc4_gem_init(struct drm_device *dev)
1267	{
1268	struct vc4_dev *vc4 = to_vc4_dev(dev);
1269	int ret;
1270
1271	if (WARN_ON_ONCE(vc4->is_vc5))
1272	return -ENODEV;
1273
1274	vc4->dma_fence_context = dma_fence_context_alloc(num: `1`);
1275
1276	INIT_LIST_HEAD(list: &vc4->bin_job_list);
1277	INIT_LIST_HEAD(list: &vc4->render_job_list);
1278	INIT_LIST_HEAD(list: &vc4->job_done_list);
1279	INIT_LIST_HEAD(list: &vc4->seqno_cb_list);
1280	spin_lock_init(&vc4->job_lock);
1281
1282	INIT_WORK(&vc4->hangcheck.reset_work, vc4_reset_work);
1283	timer_setup(&vc4->hangcheck.timer, vc4_hangcheck_elapsed, `0`);
1284
1285	INIT_WORK(&vc4->job_done_work, vc4_job_done_work);
1286
1287	ret = drmm_mutex_init(dev, &vc4->power_lock);
1288	if (ret)
1289	return ret;
1290
1291	INIT_LIST_HEAD(list: &vc4->purgeable.list);
1292
1293	ret = drmm_mutex_init(dev, &vc4->purgeable.lock);
1294	if (ret)
1295	return ret;
1296
1297	return drmm_add_action_or_reset(dev, vc4_gem_destroy, NULL);
1298	}
1299
1300	static void vc4_gem_destroy(struct drm_device dev, void* *unused)
1301	{
1302	struct vc4_dev *vc4 = to_vc4_dev(dev);
1303
1304	/ Waiting for exec to finish would need to be done before*
1305	* unregistering V3D.
1306	*/
1307	WARN_ON(vc4->emit_seqno != vc4->finished_seqno);
1308
1309	/ V3D should already have disabled its interrupt and cleared*
1310	* the overflow allocation registers. Now free the object.
1311	*/
1312	if (vc4->bin_bo) {
1313	drm_gem_object_put(obj: &vc4->bin_bo->base.base);
1314	vc4->bin_bo = NULL;
1315	}
1316
1317	if (vc4->hang_state)
1318	vc4_free_hang_state(dev, state: vc4->hang_state);
1319	}
1320
1321	int vc4_gem_madvise_ioctl(struct drm_device dev, void* *data,
1322	struct drm_file *file_priv)
1323	{
1324	struct vc4_dev *vc4 = to_vc4_dev(dev);
1325	struct drm_vc4_gem_madvise *args = data;
1326	struct drm_gem_object *gem_obj;
1327	struct vc4_bo *bo;
1328	int ret;
1329
1330	if (WARN_ON_ONCE(vc4->is_vc5))
1331	return -ENODEV;
1332
1333	switch (args->madv) {
1334	case VC4_MADV_DONTNEED:
1335	case VC4_MADV_WILLNEED:
1336	break;
1337	default:
1338	return -EINVAL;
1339	}
1340
1341	if (args->pad != `0`)
1342	return -EINVAL;
1343
1344	gem_obj = drm_gem_object_lookup(filp: file_priv, handle: args->handle);
1345	if (!gem_obj) {
1346	DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle);
1347	return -ENOENT;
1348	}
1349
1350	bo = to_vc4_bo(gem_obj);
1351
1352	/ Only BOs exposed to userspace can be purged. /
1353	if (bo->madv == __VC4_MADV_NOTSUPP) {
1354	DRM_DEBUG("madvise not supported on this BO\n");
1355	ret = -EINVAL;
1356	goto out_put_gem;
1357	}
1358
1359	/ Not sure it's safe to purge imported BOs. Let's just assume it's*
1360	* not until proven otherwise.
1361	*/
1362	if (gem_obj->import_attach) {
1363	DRM_DEBUG("madvise not supported on imported BOs\n");
1364	ret = -EINVAL;
1365	goto out_put_gem;
1366	}
1367
1368	mutex_lock(&bo->madv_lock);
1369
1370	if (args->madv == VC4_MADV_DONTNEED && bo->madv == VC4_MADV_WILLNEED &&
1371	!refcount_read(r: &bo->usecnt)) {
1372	/ If the BO is about to be marked as purgeable, is not used*
1373	* and is not already purgeable or purged, add it to the
1374	* purgeable list.
1375	*/
1376	vc4_bo_add_to_purgeable_pool(bo);
1377	} else if (args->madv == VC4_MADV_WILLNEED &&
1378	bo->madv == VC4_MADV_DONTNEED &&
1379	!refcount_read(r: &bo->usecnt)) {
1380	/ The BO has not been purged yet, just remove it from*
1381	* the purgeable list.
1382	*/
1383	vc4_bo_remove_from_purgeable_pool(bo);
1384	}
1385
1386	/ Save the purged state. /
1387	args->retained = bo->madv != __VC4_MADV_PURGED;
1388
1389	/ Update internal madv state only if the bo was not purged. /
1390	if (bo->madv != __VC4_MADV_PURGED)
1391	bo->madv = args->madv;
1392
1393	mutex_unlock(lock: &bo->madv_lock);
1394
1395	ret = `0`;
1396
1397	out_put_gem:
1398	drm_gem_object_put(obj: gem_obj);
1399
1400	return ret;
1401	}
1402

source code of linux/drivers/gpu/drm/vc4/vc4_gem.c