a5xx_gpu.c source code [linux/drivers/gpu/drm/msm/adreno/a5xx_gpu.c]

1	// SPDX-License-Identifier: GPL-2.0-only
2	/ Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.*
3	*/
4
5	#include <linux/kernel.h>
6	#include <linux/types.h>
7	#include <linux/cpumask.h>
8	#include <linux/firmware/qcom/qcom_scm.h>
9	#include <linux/pm_opp.h>
10	#include <linux/nvmem-consumer.h>
11	#include <linux/slab.h>
12	#include "msm_gem.h"
13	#include "msm_mmu.h"
14	#include "a5xx_gpu.h"
15
16	extern bool hang_debug;
17	static void a5xx_dump(struct msm_gpu *gpu);
18
19	#define GPU_PAS_ID 13
20
21	static void update_shadow_rptr(struct msm_gpu gpu, struct* msm_ringbuffer *ring)
22	{
23	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
24	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
25
26	if (a5xx_gpu->has_whereami) {
27	OUT_PKT7(ring, opcode: CP_WHERE_AM_I, cnt: `2`);
28	OUT_RING(ring, lower_32_bits(shadowptr(a5xx_gpu, ring)));
29	OUT_RING(ring, upper_32_bits(shadowptr(a5xx_gpu, ring)));
30	}
31	}
32
33	void a5xx_flush(struct msm_gpu gpu, struct* msm_ringbuffer *ring,
34	bool sync)
35	{
36	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
37	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
38	uint32_t wptr;
39	unsigned long flags;
40
41	/*
42	* Most flush operations need to issue a WHERE_AM_I opcode to sync up
43	* the rptr shadow
44	*/
45	if (sync)
46	update_shadow_rptr(gpu, ring);
47
48	spin_lock_irqsave(&ring->preempt_lock, flags);
49
50	/ Copy the shadow to the actual register /
51	ring->cur = ring->next;
52
53	/ Make sure to wrap wptr if we need to /
54	wptr = get_wptr(ring);
55
56	spin_unlock_irqrestore(lock: &ring->preempt_lock, flags);
57
58	/ Make sure everything is posted before making a decision /
59	mb();
60
61	/ Update HW if this is the current ring and we are not in preempt /
62	if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu))
63	gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
64	}
65
66	static void a5xx_submit_in_rb(struct msm_gpu gpu, struct* msm_gem_submit *submit)
67	{
68	struct msm_ringbuffer *ring = submit->ring;
69	struct drm_gem_object *obj;
70	uint32_t *ptr, dwords;
71	unsigned int i;
72
73	for (i = `0`; i < submit->nr_cmds; i++) {
74	switch (submit->cmd[i].type) {
75	case MSM_SUBMIT_CMD_IB_TARGET_BUF:
76	break;
77	case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
78	if (gpu->cur_ctx_seqno == submit->queue->ctx->seqno)
79	break;
80	fallthrough;
81	case MSM_SUBMIT_CMD_BUF:
82	/ copy commands into RB: /
83	obj = submit->bos[submit->cmd[i].idx].obj;
84	dwords = submit->cmd[i].size;
85
86	ptr = msm_gem_get_vaddr(obj);
87
88	/ _get_vaddr() shouldn't fail at this point,*
89	* since we've already mapped it once in
90	* submit_reloc()
91	*/
92	if (WARN_ON(IS_ERR_OR_NULL(ptr)))
93	return;
94
95	for (i = `0`; i < dwords; i++) {
96	/ normally the OUT_PKTn() would wait*
97	* for space for the packet. But since
98	* we just OUT_RING() the whole thing,
99	* need to call adreno_wait_ring()
100	* ourself:
101	*/
102	adreno_wait_ring(ring, ndwords: `1`);
103	OUT_RING(ring, ptr[i]);
104	}
105
106	msm_gem_put_vaddr(obj);
107
108	break;
109	}
110	}
111
112	a5xx_flush(gpu, ring, sync: true);
113	a5xx_preempt_trigger(gpu);
114
115	/ we might not necessarily have a cmd from userspace to*
116	* trigger an event to know that submit has completed, so
117	* do this manually:
118	*/
119	a5xx_idle(gpu, ring);
120	ring->memptrs->fence = submit->seqno;
121	msm_gpu_retire(gpu);
122	}
123
124	static void a5xx_submit(struct msm_gpu gpu, struct* msm_gem_submit *submit)
125	{
126	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
127	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
128	struct msm_ringbuffer *ring = submit->ring;
129	unsigned int i, ibs = `0`;
130
131	if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) {
132	gpu->cur_ctx_seqno = `0`;
133	a5xx_submit_in_rb(gpu, submit);
134	return;
135	}
136
137	OUT_PKT7(ring, opcode: CP_PREEMPT_ENABLE_GLOBAL, cnt: `1`);
138	OUT_RING(ring, `0x02`);
139
140	/ Turn off protected mode to write to special registers /
141	OUT_PKT7(ring, opcode: CP_SET_PROTECTED_MODE, cnt: `1`);
142	OUT_RING(ring, `0`);
143
144	/ Set the save preemption record for the ring/command /
145	OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, cnt: `2`);
146	OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
147	OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
148
149	/ Turn back on protected mode /
150	OUT_PKT7(ring, opcode: CP_SET_PROTECTED_MODE, cnt: `1`);
151	OUT_RING(ring, `1`);
152
153	/ Enable local preemption for finegrain preemption /
154	OUT_PKT7(ring, opcode: CP_PREEMPT_ENABLE_LOCAL, cnt: `1`);
155	OUT_RING(ring, `0x1`);
156
157	/ Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 /
158	OUT_PKT7(ring, opcode: CP_YIELD_ENABLE, cnt: `1`);
159	OUT_RING(ring, `0x02`);
160
161	/ Submit the commands /
162	for (i = `0`; i < submit->nr_cmds; i++) {
163	switch (submit->cmd[i].type) {
164	case MSM_SUBMIT_CMD_IB_TARGET_BUF:
165	break;
166	case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
167	if (gpu->cur_ctx_seqno == submit->queue->ctx->seqno)
168	break;
169	fallthrough;
170	case MSM_SUBMIT_CMD_BUF:
171	OUT_PKT7(ring, opcode: CP_INDIRECT_BUFFER_PFE, cnt: `3`);
172	OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
173	OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
174	OUT_RING(ring, submit->cmd[i].size);
175	ibs++;
176	break;
177	}
178
179	/*
180	* Periodically update shadow-wptr if needed, so that we
181	* can see partial progress of submits with large # of
182	* cmds.. otherwise we could needlessly stall waiting for
183	* ringbuffer state, simply due to looking at a shadow
184	* rptr value that has not been updated
185	*/
186	if ((ibs % `32`) == `0`)
187	update_shadow_rptr(gpu, ring);
188	}
189
190	/*
191	* Write the render mode to NULL (0) to indicate to the CP that the IBs
192	* are done rendering - otherwise a lucky preemption would start
193	* replaying from the last checkpoint
194	*/
195	OUT_PKT7(ring, opcode: CP_SET_RENDER_MODE, cnt: `5`);
196	OUT_RING(ring, `0`);
197	OUT_RING(ring, `0`);
198	OUT_RING(ring, `0`);
199	OUT_RING(ring, `0`);
200	OUT_RING(ring, `0`);
201
202	/ Turn off IB level preemptions /
203	OUT_PKT7(ring, opcode: CP_YIELD_ENABLE, cnt: `1`);
204	OUT_RING(ring, `0x01`);
205
206	/ Write the fence to the scratch register /
207	OUT_PKT4(ring, regindx: REG_A5XX_CP_SCRATCH_REG(i0: `2`), cnt: `1`);
208	OUT_RING(ring, submit->seqno);
209
210	/*
211	* Execute a CACHE_FLUSH_TS event. This will ensure that the
212	* timestamp is written to the memory and then triggers the interrupt
213	*/
214	OUT_PKT7(ring, opcode: CP_EVENT_WRITE, cnt: `4`);
215	OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(val: CACHE_FLUSH_TS) \|
216	CP_EVENT_WRITE_0_IRQ);
217	OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
218	OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
219	OUT_RING(ring, submit->seqno);
220
221	/ Yield the floor on command completion /
222	OUT_PKT7(ring, opcode: CP_CONTEXT_SWITCH_YIELD, cnt: `4`);
223	/*
224	* If dword[2:1] are non zero, they specify an address for the CP to
225	* write the value of dword[3] to on preemption complete. Write 0 to
226	* skip the write
227	*/
228	OUT_RING(ring, `0x00`);
229	OUT_RING(ring, `0x00`);
230	/ Data value - not used if the address above is 0 /
231	OUT_RING(ring, `0x01`);
232	/ Set bit 0 to trigger an interrupt on preempt complete /
233	OUT_RING(ring, `0x01`);
234
235	/ A WHERE_AM_I packet is not needed after a YIELD /
236	a5xx_flush(gpu, ring, sync: false);
237
238	/ Check to see if we need to start preemption /
239	a5xx_preempt_trigger(gpu);
240	}
241
242	static const struct adreno_five_hwcg_regs {
243	u32 offset;
244	u32 value;
245	} a5xx_hwcg[] = {
246	{REG_A5XX_RBBM_CLOCK_CNTL_SP0, `0x02222222`},
247	{REG_A5XX_RBBM_CLOCK_CNTL_SP1, `0x02222222`},
248	{REG_A5XX_RBBM_CLOCK_CNTL_SP2, `0x02222222`},
249	{REG_A5XX_RBBM_CLOCK_CNTL_SP3, `0x02222222`},
250	{REG_A5XX_RBBM_CLOCK_CNTL2_SP0, `0x02222220`},
251	{REG_A5XX_RBBM_CLOCK_CNTL2_SP1, `0x02222220`},
252	{REG_A5XX_RBBM_CLOCK_CNTL2_SP2, `0x02222220`},
253	{REG_A5XX_RBBM_CLOCK_CNTL2_SP3, `0x02222220`},
254	{REG_A5XX_RBBM_CLOCK_HYST_SP0, `0x0000F3CF`},
255	{REG_A5XX_RBBM_CLOCK_HYST_SP1, `0x0000F3CF`},
256	{REG_A5XX_RBBM_CLOCK_HYST_SP2, `0x0000F3CF`},
257	{REG_A5XX_RBBM_CLOCK_HYST_SP3, `0x0000F3CF`},
258	{REG_A5XX_RBBM_CLOCK_DELAY_SP0, `0x00000080`},
259	{REG_A5XX_RBBM_CLOCK_DELAY_SP1, `0x00000080`},
260	{REG_A5XX_RBBM_CLOCK_DELAY_SP2, `0x00000080`},
261	{REG_A5XX_RBBM_CLOCK_DELAY_SP3, `0x00000080`},
262	{REG_A5XX_RBBM_CLOCK_CNTL_TP0, `0x22222222`},
263	{REG_A5XX_RBBM_CLOCK_CNTL_TP1, `0x22222222`},
264	{REG_A5XX_RBBM_CLOCK_CNTL_TP2, `0x22222222`},
265	{REG_A5XX_RBBM_CLOCK_CNTL_TP3, `0x22222222`},
266	{REG_A5XX_RBBM_CLOCK_CNTL2_TP0, `0x22222222`},
267	{REG_A5XX_RBBM_CLOCK_CNTL2_TP1, `0x22222222`},
268	{REG_A5XX_RBBM_CLOCK_CNTL2_TP2, `0x22222222`},
269	{REG_A5XX_RBBM_CLOCK_CNTL2_TP3, `0x22222222`},
270	{REG_A5XX_RBBM_CLOCK_CNTL3_TP0, `0x00002222`},
271	{REG_A5XX_RBBM_CLOCK_CNTL3_TP1, `0x00002222`},
272	{REG_A5XX_RBBM_CLOCK_CNTL3_TP2, `0x00002222`},
273	{REG_A5XX_RBBM_CLOCK_CNTL3_TP3, `0x00002222`},
274	{REG_A5XX_RBBM_CLOCK_HYST_TP0, `0x77777777`},
275	{REG_A5XX_RBBM_CLOCK_HYST_TP1, `0x77777777`},
276	{REG_A5XX_RBBM_CLOCK_HYST_TP2, `0x77777777`},
277	{REG_A5XX_RBBM_CLOCK_HYST_TP3, `0x77777777`},
278	{REG_A5XX_RBBM_CLOCK_HYST2_TP0, `0x77777777`},
279	{REG_A5XX_RBBM_CLOCK_HYST2_TP1, `0x77777777`},
280	{REG_A5XX_RBBM_CLOCK_HYST2_TP2, `0x77777777`},
281	{REG_A5XX_RBBM_CLOCK_HYST2_TP3, `0x77777777`},
282	{REG_A5XX_RBBM_CLOCK_HYST3_TP0, `0x00007777`},
283	{REG_A5XX_RBBM_CLOCK_HYST3_TP1, `0x00007777`},
284	{REG_A5XX_RBBM_CLOCK_HYST3_TP2, `0x00007777`},
285	{REG_A5XX_RBBM_CLOCK_HYST3_TP3, `0x00007777`},
286	{REG_A5XX_RBBM_CLOCK_DELAY_TP0, `0x11111111`},
287	{REG_A5XX_RBBM_CLOCK_DELAY_TP1, `0x11111111`},
288	{REG_A5XX_RBBM_CLOCK_DELAY_TP2, `0x11111111`},
289	{REG_A5XX_RBBM_CLOCK_DELAY_TP3, `0x11111111`},
290	{REG_A5XX_RBBM_CLOCK_DELAY2_TP0, `0x11111111`},
291	{REG_A5XX_RBBM_CLOCK_DELAY2_TP1, `0x11111111`},
292	{REG_A5XX_RBBM_CLOCK_DELAY2_TP2, `0x11111111`},
293	{REG_A5XX_RBBM_CLOCK_DELAY2_TP3, `0x11111111`},
294	{REG_A5XX_RBBM_CLOCK_DELAY3_TP0, `0x00001111`},
295	{REG_A5XX_RBBM_CLOCK_DELAY3_TP1, `0x00001111`},
296	{REG_A5XX_RBBM_CLOCK_DELAY3_TP2, `0x00001111`},
297	{REG_A5XX_RBBM_CLOCK_DELAY3_TP3, `0x00001111`},
298	{REG_A5XX_RBBM_CLOCK_CNTL_UCHE, `0x22222222`},
299	{REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, `0x22222222`},
300	{REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, `0x22222222`},
301	{REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, `0x00222222`},
302	{REG_A5XX_RBBM_CLOCK_HYST_UCHE, `0x00444444`},
303	{REG_A5XX_RBBM_CLOCK_DELAY_UCHE, `0x00000002`},
304	{REG_A5XX_RBBM_CLOCK_CNTL_RB0, `0x22222222`},
305	{REG_A5XX_RBBM_CLOCK_CNTL_RB1, `0x22222222`},
306	{REG_A5XX_RBBM_CLOCK_CNTL_RB2, `0x22222222`},
307	{REG_A5XX_RBBM_CLOCK_CNTL_RB3, `0x22222222`},
308	{REG_A5XX_RBBM_CLOCK_CNTL2_RB0, `0x00222222`},
309	{REG_A5XX_RBBM_CLOCK_CNTL2_RB1, `0x00222222`},
310	{REG_A5XX_RBBM_CLOCK_CNTL2_RB2, `0x00222222`},
311	{REG_A5XX_RBBM_CLOCK_CNTL2_RB3, `0x00222222`},
312	{REG_A5XX_RBBM_CLOCK_CNTL_CCU0, `0x00022220`},
313	{REG_A5XX_RBBM_CLOCK_CNTL_CCU1, `0x00022220`},
314	{REG_A5XX_RBBM_CLOCK_CNTL_CCU2, `0x00022220`},
315	{REG_A5XX_RBBM_CLOCK_CNTL_CCU3, `0x00022220`},
316	{REG_A5XX_RBBM_CLOCK_CNTL_RAC, `0x05522222`},
317	{REG_A5XX_RBBM_CLOCK_CNTL2_RAC, `0x00505555`},
318	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, `0x04040404`},
319	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, `0x04040404`},
320	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, `0x04040404`},
321	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, `0x04040404`},
322	{REG_A5XX_RBBM_CLOCK_HYST_RAC, `0x07444044`},
323	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, `0x00000002`},
324	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, `0x00000002`},
325	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, `0x00000002`},
326	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, `0x00000002`},
327	{REG_A5XX_RBBM_CLOCK_DELAY_RAC, `0x00010011`},
328	{REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, `0x04222222`},
329	{REG_A5XX_RBBM_CLOCK_MODE_GPC, `0x02222222`},
330	{REG_A5XX_RBBM_CLOCK_MODE_VFD, `0x00002222`},
331	{REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, `0x00000000`},
332	{REG_A5XX_RBBM_CLOCK_HYST_GPC, `0x04104004`},
333	{REG_A5XX_RBBM_CLOCK_HYST_VFD, `0x00000000`},
334	{REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, `0x00000000`},
335	{REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, `0x00004000`},
336	{REG_A5XX_RBBM_CLOCK_DELAY_GPC, `0x00000200`},
337	{REG_A5XX_RBBM_CLOCK_DELAY_VFD, `0x00002222`}
338	}, a50x_hwcg[] = {
339	{REG_A5XX_RBBM_CLOCK_CNTL_SP0, `0x02222222`},
340	{REG_A5XX_RBBM_CLOCK_CNTL2_SP0, `0x02222220`},
341	{REG_A5XX_RBBM_CLOCK_HYST_SP0, `0x0000F3CF`},
342	{REG_A5XX_RBBM_CLOCK_DELAY_SP0, `0x00000080`},
343	{REG_A5XX_RBBM_CLOCK_CNTL_TP0, `0x22222222`},
344	{REG_A5XX_RBBM_CLOCK_CNTL2_TP0, `0x22222222`},
345	{REG_A5XX_RBBM_CLOCK_CNTL3_TP0, `0x00002222`},
346	{REG_A5XX_RBBM_CLOCK_HYST_TP0, `0x77777777`},
347	{REG_A5XX_RBBM_CLOCK_HYST2_TP0, `0x77777777`},
348	{REG_A5XX_RBBM_CLOCK_HYST3_TP0, `0x00007777`},
349	{REG_A5XX_RBBM_CLOCK_DELAY_TP0, `0x11111111`},
350	{REG_A5XX_RBBM_CLOCK_DELAY2_TP0, `0x11111111`},
351	{REG_A5XX_RBBM_CLOCK_DELAY3_TP0, `0x00001111`},
352	{REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, `0x22222222`},
353	{REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, `0x22222222`},
354	{REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, `0x00222222`},
355	{REG_A5XX_RBBM_CLOCK_CNTL_UCHE, `0x22222222`},
356	{REG_A5XX_RBBM_CLOCK_HYST_UCHE, `0x00FFFFF4`},
357	{REG_A5XX_RBBM_CLOCK_DELAY_UCHE, `0x00000002`},
358	{REG_A5XX_RBBM_CLOCK_CNTL_RB0, `0x22222222`},
359	{REG_A5XX_RBBM_CLOCK_CNTL2_RB0, `0x00222222`},
360	{REG_A5XX_RBBM_CLOCK_CNTL_CCU0, `0x00022220`},
361	{REG_A5XX_RBBM_CLOCK_CNTL_RAC, `0x05522222`},
362	{REG_A5XX_RBBM_CLOCK_CNTL2_RAC, `0x00505555`},
363	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, `0x04040404`},
364	{REG_A5XX_RBBM_CLOCK_HYST_RAC, `0x07444044`},
365	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, `0x00000002`},
366	{REG_A5XX_RBBM_CLOCK_DELAY_RAC, `0x00010011`},
367	{REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, `0x04222222`},
368	{REG_A5XX_RBBM_CLOCK_MODE_GPC, `0x02222222`},
369	{REG_A5XX_RBBM_CLOCK_MODE_VFD, `0x00002222`},
370	{REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, `0x00000000`},
371	{REG_A5XX_RBBM_CLOCK_HYST_GPC, `0x04104004`},
372	{REG_A5XX_RBBM_CLOCK_HYST_VFD, `0x00000000`},
373	{REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, `0x00000000`},
374	{REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, `0x00004000`},
375	{REG_A5XX_RBBM_CLOCK_DELAY_GPC, `0x00000200`},
376	{REG_A5XX_RBBM_CLOCK_DELAY_VFD, `0x00002222`},
377	}, a512_hwcg[] = {
378	{REG_A5XX_RBBM_CLOCK_CNTL_SP0, `0x02222222`},
379	{REG_A5XX_RBBM_CLOCK_CNTL_SP1, `0x02222222`},
380	{REG_A5XX_RBBM_CLOCK_CNTL2_SP0, `0x02222220`},
381	{REG_A5XX_RBBM_CLOCK_CNTL2_SP1, `0x02222220`},
382	{REG_A5XX_RBBM_CLOCK_HYST_SP0, `0x0000F3CF`},
383	{REG_A5XX_RBBM_CLOCK_HYST_SP1, `0x0000F3CF`},
384	{REG_A5XX_RBBM_CLOCK_DELAY_SP0, `0x00000080`},
385	{REG_A5XX_RBBM_CLOCK_DELAY_SP1, `0x00000080`},
386	{REG_A5XX_RBBM_CLOCK_CNTL_TP0, `0x22222222`},
387	{REG_A5XX_RBBM_CLOCK_CNTL_TP1, `0x22222222`},
388	{REG_A5XX_RBBM_CLOCK_CNTL2_TP0, `0x22222222`},
389	{REG_A5XX_RBBM_CLOCK_CNTL2_TP1, `0x22222222`},
390	{REG_A5XX_RBBM_CLOCK_CNTL3_TP0, `0x00002222`},
391	{REG_A5XX_RBBM_CLOCK_CNTL3_TP1, `0x00002222`},
392	{REG_A5XX_RBBM_CLOCK_HYST_TP0, `0x77777777`},
393	{REG_A5XX_RBBM_CLOCK_HYST_TP1, `0x77777777`},
394	{REG_A5XX_RBBM_CLOCK_HYST2_TP0, `0x77777777`},
395	{REG_A5XX_RBBM_CLOCK_HYST2_TP1, `0x77777777`},
396	{REG_A5XX_RBBM_CLOCK_HYST3_TP0, `0x00007777`},
397	{REG_A5XX_RBBM_CLOCK_HYST3_TP1, `0x00007777`},
398	{REG_A5XX_RBBM_CLOCK_DELAY_TP0, `0x11111111`},
399	{REG_A5XX_RBBM_CLOCK_DELAY_TP1, `0x11111111`},
400	{REG_A5XX_RBBM_CLOCK_DELAY2_TP0, `0x11111111`},
401	{REG_A5XX_RBBM_CLOCK_DELAY2_TP1, `0x11111111`},
402	{REG_A5XX_RBBM_CLOCK_DELAY3_TP0, `0x00001111`},
403	{REG_A5XX_RBBM_CLOCK_DELAY3_TP1, `0x00001111`},
404	{REG_A5XX_RBBM_CLOCK_CNTL_UCHE, `0x22222222`},
405	{REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, `0x22222222`},
406	{REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, `0x22222222`},
407	{REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, `0x00222222`},
408	{REG_A5XX_RBBM_CLOCK_HYST_UCHE, `0x00444444`},
409	{REG_A5XX_RBBM_CLOCK_DELAY_UCHE, `0x00000002`},
410	{REG_A5XX_RBBM_CLOCK_CNTL_RB0, `0x22222222`},
411	{REG_A5XX_RBBM_CLOCK_CNTL_RB1, `0x22222222`},
412	{REG_A5XX_RBBM_CLOCK_CNTL2_RB0, `0x00222222`},
413	{REG_A5XX_RBBM_CLOCK_CNTL2_RB1, `0x00222222`},
414	{REG_A5XX_RBBM_CLOCK_CNTL_CCU0, `0x00022220`},
415	{REG_A5XX_RBBM_CLOCK_CNTL_CCU1, `0x00022220`},
416	{REG_A5XX_RBBM_CLOCK_CNTL_RAC, `0x05522222`},
417	{REG_A5XX_RBBM_CLOCK_CNTL2_RAC, `0x00505555`},
418	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, `0x04040404`},
419	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, `0x04040404`},
420	{REG_A5XX_RBBM_CLOCK_HYST_RAC, `0x07444044`},
421	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, `0x00000002`},
422	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, `0x00000002`},
423	{REG_A5XX_RBBM_CLOCK_DELAY_RAC, `0x00010011`},
424	{REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, `0x04222222`},
425	{REG_A5XX_RBBM_CLOCK_MODE_GPC, `0x02222222`},
426	{REG_A5XX_RBBM_CLOCK_MODE_VFD, `0x00002222`},
427	{REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, `0x00000000`},
428	{REG_A5XX_RBBM_CLOCK_HYST_GPC, `0x04104004`},
429	{REG_A5XX_RBBM_CLOCK_HYST_VFD, `0x00000000`},
430	{REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, `0x00000000`},
431	{REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, `0x00004000`},
432	{REG_A5XX_RBBM_CLOCK_DELAY_GPC, `0x00000200`},
433	{REG_A5XX_RBBM_CLOCK_DELAY_VFD, `0x00002222`},
434	};
435
436	void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
437	{
438	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
439	const struct adreno_five_hwcg_regs *regs;
440	unsigned int i, sz;
441
442	if (adreno_is_a506(gpu: adreno_gpu) \|\| adreno_is_a508(gpu: adreno_gpu)) {
443	regs = a50x_hwcg;
444	sz = ARRAY_SIZE(a50x_hwcg);
445	} else if (adreno_is_a509(gpu: adreno_gpu) \|\| adreno_is_a512(gpu: adreno_gpu)) {
446	regs = a512_hwcg;
447	sz = ARRAY_SIZE(a512_hwcg);
448	} else {
449	regs = a5xx_hwcg;
450	sz = ARRAY_SIZE(a5xx_hwcg);
451	}
452
453	for (i = `0`; i < sz; i++)
454	gpu_write(gpu, regs[i].offset,
455	state ? regs[i].value : `0`);
456
457	if (adreno_is_a540(gpu: adreno_gpu)) {
458	gpu_write(gpu, REG_A5XX_RBBM_CLOCK_DELAY_GPMU, state ? `0x00000770` : `0`);
459	gpu_write(gpu, REG_A5XX_RBBM_CLOCK_HYST_GPMU, state ? `0x00000004` : `0`);
460	}
461
462	gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? `0xAAA8AA00` : `0`);
463	gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? `0x182` : `0x180`);
464	}
465
466	static int a5xx_me_init(struct msm_gpu *gpu)
467	{
468	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
469	struct msm_ringbuffer *ring = gpu->rb[`0`];
470
471	OUT_PKT7(ring, opcode: CP_ME_INIT, cnt: `8`);
472
473	OUT_RING(ring, `0x0000002F`);
474
475	/ Enable multiple hardware contexts /
476	OUT_RING(ring, `0x00000003`);
477
478	/ Enable error detection /
479	OUT_RING(ring, `0x20000000`);
480
481	/ Don't enable header dump /
482	OUT_RING(ring, `0x00000000`);
483	OUT_RING(ring, `0x00000000`);
484
485	/ Specify workarounds for various microcode issues /
486	if (adreno_is_a506(gpu: adreno_gpu) \|\| adreno_is_a530(gpu: adreno_gpu)) {
487	/ Workaround for token end syncs*
488	* Force a WFI after every direct-render 3D mode draw and every
489	* 2D mode 3 draw
490	*/
491	OUT_RING(ring, `0x0000000B`);
492	} else if (adreno_is_a510(gpu: adreno_gpu)) {
493	/ Workaround for token and syncs /
494	OUT_RING(ring, `0x00000001`);
495	} else {
496	/ No workarounds enabled /
497	OUT_RING(ring, `0x00000000`);
498	}
499
500	OUT_RING(ring, `0x00000000`);
501	OUT_RING(ring, `0x00000000`);
502
503	a5xx_flush(gpu, ring, sync: true);
504	return a5xx_idle(gpu, ring) ? `0` : -EINVAL;
505	}
506
507	static int a5xx_preempt_start(struct msm_gpu *gpu)
508	{
509	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
510	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
511	struct msm_ringbuffer *ring = gpu->rb[`0`];
512
513	if (gpu->nr_rings == `1`)
514	return `0`;
515
516	/ Turn off protected mode to write to special registers /
517	OUT_PKT7(ring, opcode: CP_SET_PROTECTED_MODE, cnt: `1`);
518	OUT_RING(ring, `0`);
519
520	/ Set the save preemption record for the ring/command /
521	OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, cnt: `2`);
522	OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id]));
523	OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id]));
524
525	/ Turn back on protected mode /
526	OUT_PKT7(ring, opcode: CP_SET_PROTECTED_MODE, cnt: `1`);
527	OUT_RING(ring, `1`);
528
529	OUT_PKT7(ring, opcode: CP_PREEMPT_ENABLE_GLOBAL, cnt: `1`);
530	OUT_RING(ring, `0x00`);
531
532	OUT_PKT7(ring, opcode: CP_PREEMPT_ENABLE_LOCAL, cnt: `1`);
533	OUT_RING(ring, `0x01`);
534
535	OUT_PKT7(ring, opcode: CP_YIELD_ENABLE, cnt: `1`);
536	OUT_RING(ring, `0x01`);
537
538	/ Yield the floor on command completion /
539	OUT_PKT7(ring, opcode: CP_CONTEXT_SWITCH_YIELD, cnt: `4`);
540	OUT_RING(ring, `0x00`);
541	OUT_RING(ring, `0x00`);
542	OUT_RING(ring, `0x01`);
543	OUT_RING(ring, `0x01`);
544
545	/ The WHERE_AMI_I packet is not needed after a YIELD is issued /
546	a5xx_flush(gpu, ring, sync: false);
547
548	return a5xx_idle(gpu, ring) ? `0` : -EINVAL;
549	}
550
551	static void a5xx_ucode_check_version(struct a5xx_gpu *a5xx_gpu,
552	struct drm_gem_object *obj)
553	{
554	u32 *buf = msm_gem_get_vaddr(obj);
555
556	if (IS_ERR(ptr: buf))
557	return;
558
559	/*
560	* If the lowest nibble is 0xa that is an indication that this microcode
561	* has been patched. The actual version is in dword [3] but we only care
562	* about the patchlevel which is the lowest nibble of dword [3]
563	*/
564	if (((buf[`0`] & `0xf`) == `0xa`) && (buf[`2`] & `0xf`) >= `1`)
565	a5xx_gpu->has_whereami = true;
566
567	msm_gem_put_vaddr(obj);
568	}
569
570	static int a5xx_ucode_load(struct msm_gpu *gpu)
571	{
572	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
573	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
574	int ret;
575
576	if (!a5xx_gpu->pm4_bo) {
577	a5xx_gpu->pm4_bo = adreno_fw_create_bo(gpu,
578	fw: adreno_gpu->fw[ADRENO_FW_PM4], iova: &a5xx_gpu->pm4_iova);
579
580
581	if (IS_ERR(ptr: a5xx_gpu->pm4_bo)) {
582	ret = PTR_ERR(ptr: a5xx_gpu->pm4_bo);
583	a5xx_gpu->pm4_bo = NULL;
584	DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PM4: %d\n",
585	ret);
586	return ret;
587	}
588
589	msm_gem_object_set_name(a5xx_gpu->pm4_bo, "pm4fw");
590	}
591
592	if (!a5xx_gpu->pfp_bo) {
593	a5xx_gpu->pfp_bo = adreno_fw_create_bo(gpu,
594	fw: adreno_gpu->fw[ADRENO_FW_PFP], iova: &a5xx_gpu->pfp_iova);
595
596	if (IS_ERR(ptr: a5xx_gpu->pfp_bo)) {
597	ret = PTR_ERR(ptr: a5xx_gpu->pfp_bo);
598	a5xx_gpu->pfp_bo = NULL;
599	DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PFP: %d\n",
600	ret);
601	return ret;
602	}
603
604	msm_gem_object_set_name(a5xx_gpu->pfp_bo, "pfpfw");
605	a5xx_ucode_check_version(a5xx_gpu, obj: a5xx_gpu->pfp_bo);
606	}
607
608	if (a5xx_gpu->has_whereami) {
609	if (!a5xx_gpu->shadow_bo) {
610	a5xx_gpu->shadow = msm_gem_kernel_new(gpu->dev,
611	sizeof(u32) * gpu->nr_rings,
612	MSM_BO_WC \| MSM_BO_MAP_PRIV,
613	gpu->aspace, &a5xx_gpu->shadow_bo,
614	&a5xx_gpu->shadow_iova);
615
616	if (IS_ERR(ptr: a5xx_gpu->shadow))
617	return PTR_ERR(ptr: a5xx_gpu->shadow);
618
619	msm_gem_object_set_name(a5xx_gpu->shadow_bo, "shadow");
620	}
621	} else if (gpu->nr_rings > `1`) {
622	/ Disable preemption if WHERE_AM_I isn't available /
623	a5xx_preempt_fini(gpu);
624	gpu->nr_rings = `1`;
625	}
626
627	return `0`;
628	}
629
630	#define SCM_GPU_ZAP_SHADER_RESUME 0
631
632	static int a5xx_zap_shader_resume(struct msm_gpu *gpu)
633	{
634	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
635	int ret;
636
637	/*
638	* Adreno 506 have CPZ Retention feature and doesn't require
639	* to resume zap shader
640	*/
641	if (adreno_is_a506(gpu: adreno_gpu))
642	return `0`;
643
644	ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID);
645	if (ret)
646	DRM_ERROR("%s: zap-shader resume failed: %d\n",
647	gpu->name, ret);
648
649	return ret;
650	}
651
652	static int a5xx_zap_shader_init(struct msm_gpu *gpu)
653	{
654	static bool loaded;
655	int ret;
656
657	/*
658	* If the zap shader is already loaded into memory we just need to kick
659	* the remote processor to reinitialize it
660	*/
661	if (loaded)
662	return a5xx_zap_shader_resume(gpu);
663
664	ret = adreno_zap_shader_load(gpu, GPU_PAS_ID);
665
666	loaded = !ret;
667	return ret;
668	}
669
670	#define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR \| \
671	A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT \| \
672	A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT \| \
673	A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT \| \
674	A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT \| \
675	A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW \| \
676	A5XX_RBBM_INT_0_MASK_CP_HW_ERROR \| \
677	A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT \| \
678	A5XX_RBBM_INT_0_MASK_CP_SW \| \
679	A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS \| \
680	A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS \| \
681	A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
682
683	static int a5xx_hw_init(struct msm_gpu *gpu)
684	{
685	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
686	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
687	u32 hbb;
688	int ret;
689
690	gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, `0x00000003`);
691
692	if (adreno_is_a509(gpu: adreno_gpu) \|\| adreno_is_a512(gpu: adreno_gpu) \|\|
693	adreno_is_a540(gpu: adreno_gpu))
694	gpu_write(gpu, REG_A5XX_VBIF_GATE_OFF_WRREQ_EN, `0x00000009`);
695
696	/ Make all blocks contribute to the GPU BUSY perf counter /
697	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, `0xFFFFFFFF`);
698
699	/ Enable RBBM error reporting bits /
700	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, `0x00000001`);
701
702	if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) {
703	/*
704	* Mask out the activity signals from RB1-3 to avoid false
705	* positives
706	*/
707
708	gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11,
709	`0xF0000000`);
710	gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12,
711	`0xFFFFFFFF`);
712	gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13,
713	`0xFFFFFFFF`);
714	gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14,
715	`0xFFFFFFFF`);
716	gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15,
717	`0xFFFFFFFF`);
718	gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16,
719	`0xFFFFFFFF`);
720	gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17,
721	`0xFFFFFFFF`);
722	gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18,
723	`0xFFFFFFFF`);
724	}
725
726	/ Enable fault detection /
727	gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL,
728	(`1` << `30`) \| `0xFFFF`);
729
730	/ Turn on performance counters /
731	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, `0x01`);
732
733	/ Select CP0 to always count cycles /
734	gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
735
736	/ Select RBBM0 to countable 6 to get the busy status for devfreq /
737	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, `6`);
738
739	/ Increase VFD cache access so LRZ and other data gets evicted less /
740	gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, `0x02`);
741
742	/ Disable L2 bypass in the UCHE /
743	gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, `0xFFFF0000`);
744	gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, `0x0001FFFF`);
745	gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, `0xFFFF0000`);
746	gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, `0x0001FFFF`);
747
748	/ Set the GMEM VA range (0 to gpu->gmem) /
749	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, `0x00100000`);
750	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, `0x00000000`);
751	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO,
752	`0x00100000` + adreno_gpu->info->gmem - `1`);
753	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, `0x00000000`);
754
755	if (adreno_is_a506(gpu: adreno_gpu) \|\| adreno_is_a508(gpu: adreno_gpu) \|\|
756	adreno_is_a510(gpu: adreno_gpu)) {
757	gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, `0x20`);
758	if (adreno_is_a506(gpu: adreno_gpu) \|\| adreno_is_a508(gpu: adreno_gpu))
759	gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, `0x400`);
760	else
761	gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, `0x20`);
762	gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, `0x40000030`);
763	gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, `0x20100D0A`);
764	} else {
765	gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, `0x40`);
766	if (adreno_is_a530(gpu: adreno_gpu))
767	gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, `0x40`);
768	else
769	gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, `0x400`);
770	gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, `0x80000060`);
771	gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, `0x40201B16`);
772	}
773
774	if (adreno_is_a506(gpu: adreno_gpu) \|\| adreno_is_a508(gpu: adreno_gpu))
775	gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
776	(`0x100` << `11` \| `0x100` << `22`));
777	else if (adreno_is_a509(gpu: adreno_gpu) \|\| adreno_is_a510(gpu: adreno_gpu) \|\|
778	adreno_is_a512(gpu: adreno_gpu))
779	gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
780	(`0x200` << `11` \| `0x200` << `22`));
781	else
782	gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
783	(`0x400` << `11` \| `0x300` << `22`));
784
785	if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
786	gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, `0`, (`1` << `8`));
787
788	/*
789	* Disable the RB sampler datapath DP2 clock gating optimization
790	* for 1-SP GPUs, as it is enabled by default.
791	*/
792	if (adreno_is_a506(gpu: adreno_gpu) \|\| adreno_is_a508(gpu: adreno_gpu) \|\|
793	adreno_is_a509(gpu: adreno_gpu) \|\| adreno_is_a512(gpu: adreno_gpu))
794	gpu_rmw(gpu, REG_A5XX_RB_DBG_ECO_CNTL, `0`, (`1` << `9`));
795
796	/ Disable UCHE global filter as SP can invalidate/flush independently /
797	gpu_write(gpu, REG_A5XX_UCHE_MODE_CNTL, BIT(`29`));
798
799	/ Enable USE_RETENTION_FLOPS /
800	gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, `0x02000000`);
801
802	/ Enable ME/PFP split notification /
803	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, `0xA6FFFFFF`);
804
805	/*
806	* In A5x, CCU can send context_done event of a particular context to
807	* UCHE which ultimately reaches CP even when there is valid
808	* transaction of that context inside CCU. This can let CP to program
809	* config registers, which will make the "valid transaction" inside
810	* CCU to be interpreted differently. This can cause gpu fault. This
811	* bug is fixed in latest A510 revision. To enable this bug fix -
812	* bit[11] of RB_DBG_ECO_CNTL need to be set to 0, default is 1
813	* (disable). For older A510 version this bit is unused.
814	*/
815	if (adreno_is_a510(gpu: adreno_gpu))
816	gpu_rmw(gpu, REG_A5XX_RB_DBG_ECO_CNTL, (`1` << `11`), `0`);
817
818	/ Enable HWCG /
819	a5xx_set_hwcg(gpu, state: true);
820
821	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, `0x0000003F`);
822
823	BUG_ON(adreno_gpu->ubwc_config.highest_bank_bit < `13`);
824	hbb = adreno_gpu->ubwc_config.highest_bank_bit - `13`;
825
826	gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, hbb << `7`);
827	gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, hbb << `1`);
828
829	if (adreno_is_a509(gpu: adreno_gpu) \|\| adreno_is_a512(gpu: adreno_gpu) \|\|
830	adreno_is_a540(gpu: adreno_gpu))
831	gpu_write(gpu, REG_A5XX_UCHE_DBG_ECO_CNTL_2, hbb);
832
833	/ Disable All flat shading optimization (ALLFLATOPTDIS) /
834	gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, `0`, (`1` << `10`));
835
836	/ Protect registers from the CP /
837	gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, `0x00000007`);
838
839	/ RBBM /
840	gpu_write(gpu, REG_A5XX_CP_PROTECT(`0`), ADRENO_PROTECT_RW(`0x04`, `4`));
841	gpu_write(gpu, REG_A5XX_CP_PROTECT(`1`), ADRENO_PROTECT_RW(`0x08`, `8`));
842	gpu_write(gpu, REG_A5XX_CP_PROTECT(`2`), ADRENO_PROTECT_RW(`0x10`, `16`));
843	gpu_write(gpu, REG_A5XX_CP_PROTECT(`3`), ADRENO_PROTECT_RW(`0x20`, `32`));
844	gpu_write(gpu, REG_A5XX_CP_PROTECT(`4`), ADRENO_PROTECT_RW(`0x40`, `64`));
845	gpu_write(gpu, REG_A5XX_CP_PROTECT(`5`), ADRENO_PROTECT_RW(`0x80`, `64`));
846
847	/ Content protect /
848	gpu_write(gpu, REG_A5XX_CP_PROTECT(`6`),
849	ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
850	`16`));
851	gpu_write(gpu, REG_A5XX_CP_PROTECT(`7`),
852	ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, `2`));
853
854	/ CP /
855	gpu_write(gpu, REG_A5XX_CP_PROTECT(`8`), ADRENO_PROTECT_RW(`0x800`, `64`));
856	gpu_write(gpu, REG_A5XX_CP_PROTECT(`9`), ADRENO_PROTECT_RW(`0x840`, `8`));
857	gpu_write(gpu, REG_A5XX_CP_PROTECT(`10`), ADRENO_PROTECT_RW(`0x880`, `32`));
858	gpu_write(gpu, REG_A5XX_CP_PROTECT(`11`), ADRENO_PROTECT_RW(`0xAA0`, `1`));
859
860	/ RB /
861	gpu_write(gpu, REG_A5XX_CP_PROTECT(`12`), ADRENO_PROTECT_RW(`0xCC0`, `1`));
862	gpu_write(gpu, REG_A5XX_CP_PROTECT(`13`), ADRENO_PROTECT_RW(`0xCF0`, `2`));
863
864	/ VPC /
865	gpu_write(gpu, REG_A5XX_CP_PROTECT(`14`), ADRENO_PROTECT_RW(`0xE68`, `8`));
866	gpu_write(gpu, REG_A5XX_CP_PROTECT(`15`), ADRENO_PROTECT_RW(`0xE70`, `16`));
867
868	/ UCHE /
869	gpu_write(gpu, REG_A5XX_CP_PROTECT(`16`), ADRENO_PROTECT_RW(`0xE80`, `16`));
870
871	/ SMMU /
872	gpu_write(gpu, REG_A5XX_CP_PROTECT(`17`),
873	ADRENO_PROTECT_RW(`0x10000`, `0x8000`));
874
875	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, `0`);
876	/*
877	* Disable the trusted memory range - we don't actually supported secure
878	* memory rendering at this point in time and we don't want to block off
879	* part of the virtual memory space.
880	*/
881	gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, `0x00000000`);
882	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, `0x00000000`);
883
884	/ Put the GPU into 64 bit by default /
885	gpu_write(gpu, REG_A5XX_CP_ADDR_MODE_CNTL, `0x1`);
886	gpu_write(gpu, REG_A5XX_VSC_ADDR_MODE_CNTL, `0x1`);
887	gpu_write(gpu, REG_A5XX_GRAS_ADDR_MODE_CNTL, `0x1`);
888	gpu_write(gpu, REG_A5XX_RB_ADDR_MODE_CNTL, `0x1`);
889	gpu_write(gpu, REG_A5XX_PC_ADDR_MODE_CNTL, `0x1`);
890	gpu_write(gpu, REG_A5XX_HLSQ_ADDR_MODE_CNTL, `0x1`);
891	gpu_write(gpu, REG_A5XX_VFD_ADDR_MODE_CNTL, `0x1`);
892	gpu_write(gpu, REG_A5XX_VPC_ADDR_MODE_CNTL, `0x1`);
893	gpu_write(gpu, REG_A5XX_UCHE_ADDR_MODE_CNTL, `0x1`);
894	gpu_write(gpu, REG_A5XX_SP_ADDR_MODE_CNTL, `0x1`);
895	gpu_write(gpu, REG_A5XX_TPL1_ADDR_MODE_CNTL, `0x1`);
896	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, `0x1`);
897
898	/*
899	* VPC corner case with local memory load kill leads to corrupt
900	* internal state. Normal Disable does not work for all a5x chips.
901	* So do the following setting to disable it.
902	*/
903	if (adreno_gpu->info->quirks & ADRENO_QUIRK_LMLOADKILL_DISABLE) {
904	gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, `0`, BIT(`23`));
905	gpu_rmw(gpu, REG_A5XX_HLSQ_DBG_ECO_CNTL, BIT(`18`), `0`);
906	}
907
908	ret = adreno_hw_init(gpu);
909	if (ret)
910	return ret;
911
912	if (adreno_is_a530(gpu: adreno_gpu) \|\| adreno_is_a540(gpu: adreno_gpu))
913	a5xx_gpmu_ucode_init(gpu);
914
915	gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO, a5xx_gpu->pm4_iova);
916	gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO, a5xx_gpu->pfp_iova);
917
918	/ Set the ringbuffer address /
919	gpu_write64(gpu, REG_A5XX_CP_RB_BASE, gpu->rb[`0`]->iova);
920
921	/*
922	* If the microcode supports the WHERE_AM_I opcode then we can use that
923	* in lieu of the RPTR shadow and enable preemption. Otherwise, we
924	* can't safely use the RPTR shadow or preemption. In either case, the
925	* RPTR shadow should be disabled in hardware.
926	*/
927	gpu_write(gpu, REG_A5XX_CP_RB_CNTL,
928	MSM_GPU_RB_CNTL_DEFAULT \| AXXX_CP_RB_CNTL_NO_UPDATE);
929
930	/ Configure the RPTR shadow if needed: /
931	if (a5xx_gpu->shadow_bo) {
932	gpu_write64(gpu, REG_A5XX_CP_RB_RPTR_ADDR,
933	shadowptr(a5xx_gpu, gpu->rb[`0`]));
934	}
935
936	a5xx_preempt_hw_init(gpu);
937
938	/ Disable the interrupts through the initial bringup stage /
939	gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK);
940
941	/ Clear ME_HALT to start the micro engine /
942	gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, `0`);
943	ret = a5xx_me_init(gpu);
944	if (ret)
945	return ret;
946
947	ret = a5xx_power_init(gpu);
948	if (ret)
949	return ret;
950
951	/*
952	* Send a pipeline event stat to get misbehaving counters to start
953	* ticking correctly
954	*/
955	if (adreno_is_a530(gpu: adreno_gpu)) {
956	OUT_PKT7(ring: gpu->rb[`0`], opcode: CP_EVENT_WRITE, cnt: `1`);
957	OUT_RING(gpu->rb[`0`], CP_EVENT_WRITE_0_EVENT(val: STAT_EVENT));
958
959	a5xx_flush(gpu, ring: gpu->rb[`0`], sync: true);
960	if (!a5xx_idle(gpu, ring: gpu->rb[`0`]))
961	return -EINVAL;
962	}
963
964	/*
965	* If the chip that we are using does support loading one, then
966	* try to load a zap shader into the secure world. If successful
967	* we can use the CP to switch out of secure mode. If not then we
968	* have no resource but to try to switch ourselves out manually. If we
969	* guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
970	* be blocked and a permissions violation will soon follow.
971	*/
972	ret = a5xx_zap_shader_init(gpu);
973	if (!ret) {
974	OUT_PKT7(ring: gpu->rb[`0`], opcode: CP_SET_SECURE_MODE, cnt: `1`);
975	OUT_RING(gpu->rb[`0`], `0x00000000`);
976
977	a5xx_flush(gpu, ring: gpu->rb[`0`], sync: true);
978	if (!a5xx_idle(gpu, ring: gpu->rb[`0`]))
979	return -EINVAL;
980	} else if (ret == -ENODEV) {
981	/*
982	* This device does not use zap shader (but print a warning
983	* just in case someone got their dt wrong.. hopefully they
984	* have a debug UART to realize the error of their ways...
985	* if you mess this up you are about to crash horribly)
986	*/
987	dev_warn_once(gpu->dev->dev,
988	"Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
989	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, `0x0`);
990	} else {
991	return ret;
992	}
993
994	/ Last step - yield the ringbuffer /
995	a5xx_preempt_start(gpu);
996
997	return `0`;
998	}
999
1000	static void a5xx_recover(struct msm_gpu *gpu)
1001	{
1002	int i;
1003
1004	adreno_dump_info(gpu);
1005
1006	for (i = `0`; i < `8`; i++) {
1007	printk("CP_SCRATCH_REG%d: %u\n", i,
1008	gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i)));
1009	}
1010
1011	if (hang_debug)
1012	a5xx_dump(gpu);
1013
1014	gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, `1`);
1015	gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD);
1016	gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, `0`);
1017	adreno_recover(gpu);
1018	}
1019
1020	static void a5xx_destroy(struct msm_gpu *gpu)
1021	{
1022	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1023	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1024
1025	DBG("%s", gpu->name);
1026
1027	a5xx_preempt_fini(gpu);
1028
1029	if (a5xx_gpu->pm4_bo) {
1030	msm_gem_unpin_iova(a5xx_gpu->pm4_bo, gpu->aspace);
1031	drm_gem_object_put(a5xx_gpu->pm4_bo);
1032	}
1033
1034	if (a5xx_gpu->pfp_bo) {
1035	msm_gem_unpin_iova(a5xx_gpu->pfp_bo, gpu->aspace);
1036	drm_gem_object_put(a5xx_gpu->pfp_bo);
1037	}
1038
1039	if (a5xx_gpu->gpmu_bo) {
1040	msm_gem_unpin_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
1041	drm_gem_object_put(a5xx_gpu->gpmu_bo);
1042	}
1043
1044	if (a5xx_gpu->shadow_bo) {
1045	msm_gem_unpin_iova(a5xx_gpu->shadow_bo, gpu->aspace);
1046	drm_gem_object_put(a5xx_gpu->shadow_bo);
1047	}
1048
1049	adreno_gpu_cleanup(gpu: adreno_gpu);
1050	kfree(objp: a5xx_gpu);
1051	}
1052
1053	static inline bool _a5xx_check_idle(struct msm_gpu *gpu)
1054	{
1055	if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY)
1056	return false;
1057
1058	/*
1059	* Nearly every abnormality ends up pausing the GPU and triggering a
1060	* fault so we can safely just watch for this one interrupt to fire
1061	*/
1062	return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) &
1063	A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT);
1064	}
1065
1066	bool a5xx_idle(struct msm_gpu gpu, struct* msm_ringbuffer *ring)
1067	{
1068	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1069	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1070
1071	if (ring != a5xx_gpu->cur_ring) {
1072	WARN(`1`, "Tried to idle a non-current ringbuffer\n");
1073	return false;
1074	}
1075
1076	/ wait for CP to drain ringbuffer: /
1077	if (!adreno_idle(gpu, ring))
1078	return false;
1079
1080	if (spin_until(_a5xx_check_idle(gpu))) {
1081	DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
1082	gpu->name, __builtin_return_address(`0`),
1083	gpu_read(gpu, REG_A5XX_RBBM_STATUS),
1084	gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS),
1085	gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
1086	gpu_read(gpu, REG_A5XX_CP_RB_WPTR));
1087	return false;
1088	}
1089
1090	return true;
1091	}
1092
1093	static int a5xx_fault_handler(void arg, unsigned* long iova, int flags, void *data)
1094	{
1095	struct msm_gpu *gpu = arg;
1096	struct adreno_smmu_fault_info *info = data;
1097	char block[`12`] = "unknown";
1098	u32 scratch[] = {
1099	gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i0: `4`)),
1100	gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i0: `5`)),
1101	gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i0: `6`)),
1102	gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i0: `7`)),
1103	};
1104
1105	if (info)
1106	snprintf(buf: block, size: sizeof(block), fmt: "%x", info->fsynr1);
1107
1108	return adreno_fault_handler(gpu, iova, flags, info, block, scratch);
1109	}
1110
1111	static void a5xx_cp_err_irq(struct msm_gpu *gpu)
1112	{
1113	u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS);
1114
1115	if (status & A5XX_CP_INT_CP_OPCODE_ERROR) {
1116	u32 val;
1117
1118	gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, `0`);
1119
1120	/*
1121	* REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so
1122	* read it twice
1123	*/
1124
1125	gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
1126	val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
1127
1128	dev_err_ratelimited(gpu->dev->dev, "CP \| opcode error \| possible opcode=0x%8.8X\n",
1129	val);
1130	}
1131
1132	if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR)
1133	dev_err_ratelimited(gpu->dev->dev, "CP \| HW fault \| status=0x%8.8X\n",
1134	gpu_read(gpu, REG_A5XX_CP_HW_FAULT));
1135
1136	if (status & A5XX_CP_INT_CP_DMA_ERROR)
1137	dev_err_ratelimited(gpu->dev->dev, "CP \| DMA error\n");
1138
1139	if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
1140	u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS);
1141
1142	dev_err_ratelimited(gpu->dev->dev,
1143	"CP \| protected mode error \| %s \| addr=0x%8.8X \| status=0x%8.8X\n",
1144	val & (`1` << `24`) ? "WRITE" : "READ",
1145	(val & `0xFFFFF`) >> `2`, val);
1146	}
1147
1148	if (status & A5XX_CP_INT_CP_AHB_ERROR) {
1149	u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT);
1150	const char *access[`16`] = { "reserved", "reserved",
1151	"timestamp lo", "timestamp hi", "pfp read", "pfp write",
1152	"", "", "me read", "me write", "", "", "crashdump read",
1153	"crashdump write" };
1154
1155	dev_err_ratelimited(gpu->dev->dev,
1156	"CP \| AHB error \| addr=%X access=%s error=%d \| status=0x%8.8X\n",
1157	status & `0xFFFFF`, access[(status >> `24`) & `0xF`],
1158	(status & (`1` << `31`)), status);
1159	}
1160	}
1161
1162	static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status)
1163	{
1164	if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) {
1165	u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS);
1166
1167	dev_err_ratelimited(gpu->dev->dev,
1168	"RBBM \| AHB bus error \| %s \| addr=0x%X \| ports=0x%X:0x%X\n",
1169	val & (`1` << `28`) ? "WRITE" : "READ",
1170	(val & `0xFFFFF`) >> `2`, (val >> `20`) & `0x3`,
1171	(val >> `24`) & `0xF`);
1172
1173	/ Clear the error /
1174	gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (`1` << `4`));
1175
1176	/ Clear the interrupt /
1177	gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
1178	A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
1179	}
1180
1181	if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT)
1182	dev_err_ratelimited(gpu->dev->dev, "RBBM \| AHB transfer timeout\n");
1183
1184	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT)
1185	dev_err_ratelimited(gpu->dev->dev, "RBBM \| ME master split \| status=0x%X\n",
1186	gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS));
1187
1188	if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT)
1189	dev_err_ratelimited(gpu->dev->dev, "RBBM \| PFP master split \| status=0x%X\n",
1190	gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS));
1191
1192	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT)
1193	dev_err_ratelimited(gpu->dev->dev, "RBBM \| ETS master split \| status=0x%X\n",
1194	gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS));
1195
1196	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1197	dev_err_ratelimited(gpu->dev->dev, "RBBM \| ATB ASYNC overflow\n");
1198
1199	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
1200	dev_err_ratelimited(gpu->dev->dev, "RBBM \| ATB bus overflow\n");
1201	}
1202
1203	static void a5xx_uche_err_irq(struct msm_gpu *gpu)
1204	{
1205	uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI);
1206
1207	addr \|= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO);
1208
1209	dev_err_ratelimited(gpu->dev->dev, "UCHE \| Out of bounds access \| addr=0x%llX\n",
1210	addr);
1211	}
1212
1213	static void a5xx_gpmu_err_irq(struct msm_gpu *gpu)
1214	{
1215	dev_err_ratelimited(gpu->dev->dev, "GPMU \| voltage droop\n");
1216	}
1217
1218	static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
1219	{
1220	struct drm_device *dev = gpu->dev;
1221	struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
1222
1223	/*
1224	* If stalled on SMMU fault, we could trip the GPU's hang detection,
1225	* but the fault handler will trigger the devcore dump, and we want
1226	* to otherwise resume normally rather than killing the submit, so
1227	* just bail.
1228	*/
1229	if (gpu_read(gpu, REG_A5XX_RBBM_STATUS3) & BIT(`24`))
1230	return;
1231
1232	DRM_DEV_ERROR(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
1233	ring ? ring->id : -`1`, ring ? ring->fctx->last_fence : `0`,
1234	gpu_read(gpu, REG_A5XX_RBBM_STATUS),
1235	gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
1236	gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
1237	gpu_read64(gpu, REG_A5XX_CP_IB1_BASE),
1238	gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
1239	gpu_read64(gpu, REG_A5XX_CP_IB2_BASE),
1240	gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
1241
1242	/ Turn off the hangcheck timer to keep it from bothering us /
1243	del_timer(timer: &gpu->hangcheck_timer);
1244
1245	kthread_queue_work(gpu->worker, &gpu->recover_work);
1246	}
1247
1248	#define RBBM_ERROR_MASK \
1249	(A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR \| \
1250	A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT \| \
1251	A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT \| \
1252	A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT \| \
1253	A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT \| \
1254	A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1255
1256	static irqreturn_t a5xx_irq(struct msm_gpu *gpu)
1257	{
1258	struct msm_drm_private *priv = gpu->dev->dev_private;
1259	u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS);
1260
1261	/*
1262	* Clear all the interrupts except RBBM_AHB_ERROR - if we clear it
1263	* before the source is cleared the interrupt will storm.
1264	*/
1265	gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
1266	status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
1267
1268	if (priv->disable_err_irq) {
1269	status &= A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS \|
1270	A5XX_RBBM_INT_0_MASK_CP_SW;
1271	}
1272
1273	/ Pass status to a5xx_rbbm_err_irq because we've already cleared it /
1274	if (status & RBBM_ERROR_MASK)
1275	a5xx_rbbm_err_irq(gpu, status);
1276
1277	if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR)
1278	a5xx_cp_err_irq(gpu);
1279
1280	if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT)
1281	a5xx_fault_detect_irq(gpu);
1282
1283	if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1284	a5xx_uche_err_irq(gpu);
1285
1286	if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
1287	a5xx_gpmu_err_irq(gpu);
1288
1289	if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
1290	a5xx_preempt_trigger(gpu);
1291	msm_gpu_retire(gpu);
1292	}
1293
1294	if (status & A5XX_RBBM_INT_0_MASK_CP_SW)
1295	a5xx_preempt_irq(gpu);
1296
1297	return IRQ_HANDLED;
1298	}
1299
1300	static const u32 a5xx_registers[] = {
1301	`0x0000`, `0x0002`, `0x0004`, `0x0020`, `0x0022`, `0x0026`, `0x0029`, `0x002B`,
1302	`0x002E`, `0x0035`, `0x0038`, `0x0042`, `0x0044`, `0x0044`, `0x0047`, `0x0095`,
1303	`0x0097`, `0x00BB`, `0x03A0`, `0x0464`, `0x0469`, `0x046F`, `0x04D2`, `0x04D3`,
1304	`0x04E0`, `0x0533`, `0x0540`, `0x0555`, `0x0800`, `0x081A`, `0x081F`, `0x0841`,
1305	`0x0860`, `0x0860`, `0x0880`, `0x08A0`, `0x0B00`, `0x0B12`, `0x0B15`, `0x0B28`,
1306	`0x0B78`, `0x0B7F`, `0x0BB0`, `0x0BBD`, `0x0BC0`, `0x0BC6`, `0x0BD0`, `0x0C53`,
1307	`0x0C60`, `0x0C61`, `0x0C80`, `0x0C82`, `0x0C84`, `0x0C85`, `0x0C90`, `0x0C98`,
1308	`0x0CA0`, `0x0CA0`, `0x0CB0`, `0x0CB2`, `0x2180`, `0x2185`, `0x2580`, `0x2585`,
1309	`0x0CC1`, `0x0CC1`, `0x0CC4`, `0x0CC7`, `0x0CCC`, `0x0CCC`, `0x0CD0`, `0x0CD8`,
1310	`0x0CE0`, `0x0CE5`, `0x0CE8`, `0x0CE8`, `0x0CEC`, `0x0CF1`, `0x0CFB`, `0x0D0E`,
1311	`0x2100`, `0x211E`, `0x2140`, `0x2145`, `0x2500`, `0x251E`, `0x2540`, `0x2545`,
1312	`0x0D10`, `0x0D17`, `0x0D20`, `0x0D23`, `0x0D30`, `0x0D30`, `0x20C0`, `0x20C0`,
1313	`0x24C0`, `0x24C0`, `0x0E40`, `0x0E43`, `0x0E4A`, `0x0E4A`, `0x0E50`, `0x0E57`,
1314	`0x0E60`, `0x0E7C`, `0x0E80`, `0x0E8E`, `0x0E90`, `0x0E96`, `0x0EA0`, `0x0EA8`,
1315	`0x0EB0`, `0x0EB2`, `0xE140`, `0xE147`, `0xE150`, `0xE187`, `0xE1A0`, `0xE1A9`,
1316	`0xE1B0`, `0xE1B6`, `0xE1C0`, `0xE1C7`, `0xE1D0`, `0xE1D1`, `0xE200`, `0xE201`,
1317	`0xE210`, `0xE21C`, `0xE240`, `0xE268`, `0xE000`, `0xE006`, `0xE010`, `0xE09A`,
1318	`0xE0A0`, `0xE0A4`, `0xE0AA`, `0xE0EB`, `0xE100`, `0xE105`, `0xE380`, `0xE38F`,
1319	`0xE3B0`, `0xE3B0`, `0xE400`, `0xE405`, `0xE408`, `0xE4E9`, `0xE4F0`, `0xE4F0`,
1320	`0xE280`, `0xE280`, `0xE282`, `0xE2A3`, `0xE2A5`, `0xE2C2`, `0xE940`, `0xE947`,
1321	`0xE950`, `0xE987`, `0xE9A0`, `0xE9A9`, `0xE9B0`, `0xE9B6`, `0xE9C0`, `0xE9C7`,
1322	`0xE9D0`, `0xE9D1`, `0xEA00`, `0xEA01`, `0xEA10`, `0xEA1C`, `0xEA40`, `0xEA68`,
1323	`0xE800`, `0xE806`, `0xE810`, `0xE89A`, `0xE8A0`, `0xE8A4`, `0xE8AA`, `0xE8EB`,
1324	`0xE900`, `0xE905`, `0xEB80`, `0xEB8F`, `0xEBB0`, `0xEBB0`, `0xEC00`, `0xEC05`,
1325	`0xEC08`, `0xECE9`, `0xECF0`, `0xECF0`, `0xEA80`, `0xEA80`, `0xEA82`, `0xEAA3`,
1326	`0xEAA5`, `0xEAC2`, `0xA800`, `0xA800`, `0xA820`, `0xA828`, `0xA840`, `0xA87D`,
1327	`0XA880`, `0xA88D`, `0xA890`, `0xA8A3`, `0xA8D0`, `0xA8D8`, `0xA8E0`, `0xA8F5`,
1328	`0xAC60`, `0xAC60`, ~`0`,
1329	};
1330
1331	static void a5xx_dump(struct msm_gpu *gpu)
1332	{
1333	DRM_DEV_INFO(gpu->dev->dev, "status: %08x\n",
1334	gpu_read(gpu, REG_A5XX_RBBM_STATUS));
1335	adreno_dump(gpu);
1336	}
1337
1338	static int a5xx_pm_resume(struct msm_gpu *gpu)
1339	{
1340	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1341	int ret;
1342
1343	/ Turn on the core power /
1344	ret = msm_gpu_pm_resume(gpu);
1345	if (ret)
1346	return ret;
1347
1348	/ Adreno 506, 508, 509, 510, 512 needs manual RBBM sus/res control /
1349	if (!(adreno_is_a530(gpu: adreno_gpu) \|\| adreno_is_a540(gpu: adreno_gpu))) {
1350	/ Halt the sp_input_clk at HM level /
1351	gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, `0x00000055`);
1352	a5xx_set_hwcg(gpu, state: true);
1353	/ Turn on sp_input_clk at HM level /
1354	gpu_rmw(gpu, REG_A5XX_RBBM_CLOCK_CNTL, `0xff`, `0`);
1355	return `0`;
1356	}
1357
1358	/ Turn the RBCCU domain first to limit the chances of voltage droop /
1359	gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, `0x778000`);
1360
1361	/ Wait 3 usecs before polling /
1362	udelay(`3`);
1363
1364	ret = spin_usecs(gpu, usecs: `20`, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS,
1365	mask: (`1` << `20`), value: (`1` << `20`));
1366	if (ret) {
1367	DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n",
1368	gpu->name,
1369	gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS));
1370	return ret;
1371	}
1372
1373	/ Turn on the SP domain /
1374	gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, `0x778000`);
1375	ret = spin_usecs(gpu, usecs: `20`, REG_A5XX_GPMU_SP_PWR_CLK_STATUS,
1376	mask: (`1` << `20`), value: (`1` << `20`));
1377	if (ret)
1378	DRM_ERROR("%s: timeout waiting for SP GDSC enable\n",
1379	gpu->name);
1380
1381	return ret;
1382	}
1383
1384	static int a5xx_pm_suspend(struct msm_gpu *gpu)
1385	{
1386	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1387	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1388	u32 mask = `0xf`;
1389	int i, ret;
1390
1391	/ A506, A508, A510 have 3 XIN ports in VBIF /
1392	if (adreno_is_a506(gpu: adreno_gpu) \|\| adreno_is_a508(gpu: adreno_gpu) \|\|
1393	adreno_is_a510(gpu: adreno_gpu))
1394	mask = `0x7`;
1395
1396	/ Clear the VBIF pipe before shutting down /
1397	gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, mask);
1398	spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) &
1399	mask) == mask);
1400
1401	gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, `0`);
1402
1403	/*
1404	* Reset the VBIF before power collapse to avoid issue with FIFO
1405	* entries on Adreno A510 and A530 (the others will tend to lock up)
1406	*/
1407	if (adreno_is_a510(gpu: adreno_gpu) \|\| adreno_is_a530(gpu: adreno_gpu)) {
1408	gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, `0x003C0000`);
1409	gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, `0x00000000`);
1410	}
1411
1412	ret = msm_gpu_pm_suspend(gpu);
1413	if (ret)
1414	return ret;
1415
1416	if (a5xx_gpu->has_whereami)
1417	for (i = `0`; i < gpu->nr_rings; i++)
1418	a5xx_gpu->shadow[i] = `0`;
1419
1420	return `0`;
1421	}
1422
1423	static int a5xx_get_timestamp(struct msm_gpu gpu, uint64_t value)
1424	{
1425	*value = gpu_read64(gpu, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO);
1426
1427	return `0`;
1428	}
1429
1430	struct a5xx_crashdumper {
1431	void *ptr;
1432	struct drm_gem_object *bo;
1433	u64 iova;
1434	};
1435
1436	struct a5xx_gpu_state {
1437	struct msm_gpu_state base;
1438	u32 *hlsqregs;
1439	};
1440
1441	static int a5xx_crashdumper_init(struct msm_gpu *gpu,
1442	struct a5xx_crashdumper *dumper)
1443	{
1444	dumper->ptr = msm_gem_kernel_new(gpu->dev,
1445	SZ_1M, MSM_BO_WC, gpu->aspace,
1446	&dumper->bo, &dumper->iova);
1447
1448	if (!IS_ERR(ptr: dumper->ptr))
1449	msm_gem_object_set_name(dumper->bo, "crashdump");
1450
1451	return PTR_ERR_OR_ZERO(ptr: dumper->ptr);
1452	}
1453
1454	static int a5xx_crashdumper_run(struct msm_gpu *gpu,
1455	struct a5xx_crashdumper *dumper)
1456	{
1457	u32 val;
1458
1459	if (IS_ERR_OR_NULL(ptr: dumper->ptr))
1460	return -EINVAL;
1461
1462	gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO, dumper->iova);
1463
1464	gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, `1`);
1465
1466	return gpu_poll_timeout(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, val,
1467	val & `0x04`, `100`, `10000`);
1468	}
1469
1470	/*
1471	* These are a list of the registers that need to be read through the HLSQ
1472	* aperture through the crashdumper. These are not nominally accessible from
1473	* the CPU on a secure platform.
1474	*/
1475	static const struct {
1476	u32 type;
1477	u32 regoffset;
1478	u32 count;
1479	} a5xx_hlsq_aperture_regs[] = {
1480	{ `0x35`, `0xe00`, `0x32` }, / HSLQ non-context /
1481	{ `0x31`, `0x2080`, `0x1` }, / HLSQ 2D context 0 /
1482	{ `0x33`, `0x2480`, `0x1` }, / HLSQ 2D context 1 /
1483	{ `0x32`, `0xe780`, `0x62` }, / HLSQ 3D context 0 /
1484	{ `0x34`, `0xef80`, `0x62` }, / HLSQ 3D context 1 /
1485	{ `0x3f`, `0x0ec0`, `0x40` }, / SP non-context /
1486	{ `0x3d`, `0x2040`, `0x1` }, / SP 2D context 0 /
1487	{ `0x3b`, `0x2440`, `0x1` }, / SP 2D context 1 /
1488	{ `0x3e`, `0xe580`, `0x170` }, / SP 3D context 0 /
1489	{ `0x3c`, `0xed80`, `0x170` }, / SP 3D context 1 /
1490	{ `0x3a`, `0x0f00`, `0x1c` }, / TP non-context /
1491	{ `0x38`, `0x2000`, `0xa` }, / TP 2D context 0 /
1492	{ `0x36`, `0x2400`, `0xa` }, / TP 2D context 1 /
1493	{ `0x39`, `0xe700`, `0x80` }, / TP 3D context 0 /
1494	{ `0x37`, `0xef00`, `0x80` }, / TP 3D context 1 /
1495	};
1496
1497	static void a5xx_gpu_state_get_hlsq_regs(struct msm_gpu *gpu,
1498	struct a5xx_gpu_state *a5xx_state)
1499	{
1500	struct a5xx_crashdumper dumper = { `0` };
1501	u32 offset, count = `0`;
1502	u64 *ptr;
1503	int i;
1504
1505	if (a5xx_crashdumper_init(gpu, dumper: &dumper))
1506	return;
1507
1508	/ The script will be written at offset 0 /
1509	ptr = dumper.ptr;
1510
1511	/ Start writing the data at offset 256k /
1512	offset = dumper.iova + (`256` * SZ_1K);
1513
1514	/ Count how many additional registers to get from the HLSQ aperture /
1515	for (i = `0`; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++)
1516	count += a5xx_hlsq_aperture_regs[i].count;
1517
1518	a5xx_state->hlsqregs = kcalloc(n: count, size: sizeof(u32), GFP_KERNEL);
1519	if (!a5xx_state->hlsqregs)
1520	return;
1521
1522	/ Build the crashdump script /
1523	for (i = `0`; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1524	u32 type = a5xx_hlsq_aperture_regs[i].type;
1525	u32 c = a5xx_hlsq_aperture_regs[i].count;
1526
1527	/ Write the register to select the desired bank /
1528	*ptr++ = ((u64) type << `8`);
1529	*ptr++ = (((u64) REG_A5XX_HLSQ_DBG_READ_SEL) << `44`) \|
1530	(`1` << `21`) \| `1`;
1531
1532	*ptr++ = offset;
1533	*ptr++ = (((u64) REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE) << `44`)
1534	\| c;
1535
1536	offset += c * sizeof(u32);
1537	}
1538
1539	/ Write two zeros to close off the script /
1540	*ptr++ = `0`;
1541	*ptr++ = `0`;
1542
1543	if (a5xx_crashdumper_run(gpu, dumper: &dumper)) {
1544	kfree(objp: a5xx_state->hlsqregs);
1545	msm_gem_kernel_put(dumper.bo, gpu->aspace);
1546	return;
1547	}
1548
1549	/ Copy the data from the crashdumper to the state /
1550	memcpy(a5xx_state->hlsqregs, dumper.ptr + (`256` * SZ_1K),
1551	count * sizeof(u32));
1552
1553	msm_gem_kernel_put(dumper.bo, gpu->aspace);
1554	}
1555
1556	static struct msm_gpu_state a5xx_gpu_state_get(struct* msm_gpu *gpu)
1557	{
1558	struct a5xx_gpu_state a5xx_state = kzalloc(size: sizeof(a5xx_state),
1559	GFP_KERNEL);
1560	bool stalled = !!(gpu_read(gpu, REG_A5XX_RBBM_STATUS3) & BIT(`24`));
1561
1562	if (!a5xx_state)
1563	return ERR_PTR(error: -ENOMEM);
1564
1565	/ Temporarily disable hardware clock gating before reading the hw /
1566	a5xx_set_hwcg(gpu, state: false);
1567
1568	/ First get the generic state from the adreno core /
1569	adreno_gpu_state_get(gpu, state: &(a5xx_state->base));
1570
1571	a5xx_state->base.rbbm_status = gpu_read(gpu, REG_A5XX_RBBM_STATUS);
1572
1573	/*
1574	* Get the HLSQ regs with the help of the crashdumper, but only if
1575	* we are not stalled in an iommu fault (in which case the crashdumper
1576	* would not have access to memory)
1577	*/
1578	if (!stalled)
1579	a5xx_gpu_state_get_hlsq_regs(gpu, a5xx_state);
1580
1581	a5xx_set_hwcg(gpu, state: true);
1582
1583	return &a5xx_state->base;
1584	}
1585
1586	static void a5xx_gpu_state_destroy(struct kref *kref)
1587	{
1588	struct msm_gpu_state *state = container_of(kref,
1589	struct msm_gpu_state, ref);
1590	struct a5xx_gpu_state *a5xx_state = container_of(state,
1591	struct a5xx_gpu_state, base);
1592
1593	kfree(objp: a5xx_state->hlsqregs);
1594
1595	adreno_gpu_state_destroy(state);
1596	kfree(objp: a5xx_state);
1597	}
1598
1599	static int a5xx_gpu_state_put(struct msm_gpu_state *state)
1600	{
1601	if (IS_ERR_OR_NULL(ptr: state))
1602	return `1`;
1603
1604	return kref_put(kref: &state->ref, release: a5xx_gpu_state_destroy);
1605	}
1606
1607
1608	#if defined(CONFIG_DEBUG_FS) \|\| defined(CONFIG_DEV_COREDUMP)
1609	static void a5xx_show(struct msm_gpu gpu, struct* msm_gpu_state *state,
1610	struct drm_printer *p)
1611	{
1612	int i, j;
1613	u32 pos = `0`;
1614	struct a5xx_gpu_state *a5xx_state = container_of(state,
1615	struct a5xx_gpu_state, base);
1616
1617	if (IS_ERR_OR_NULL(ptr: state))
1618	return;
1619
1620	adreno_show(gpu, state, p);
1621
1622	/ Dump the additional a5xx HLSQ registers /
1623	if (!a5xx_state->hlsqregs)
1624	return;
1625
1626	drm_printf(p, "registers-hlsq:\n");
1627
1628	for (i = `0`; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1629	u32 o = a5xx_hlsq_aperture_regs[i].regoffset;
1630	u32 c = a5xx_hlsq_aperture_regs[i].count;
1631
1632	for (j = `0`; j < c; j++, pos++, o++) {
1633	/*
1634	* To keep the crashdump simple we pull the entire range
1635	* for each register type but not all of the registers
1636	* in the range are valid. Fortunately invalid registers
1637	* stick out like a sore thumb with a value of
1638	* 0xdeadbeef
1639	*/
1640	if (a5xx_state->hlsqregs[pos] == `0xdeadbeef`)
1641	continue;
1642
1643	drm_printf(p, " - { offset: 0x%04x, value: 0x%08x }\n",
1644	o << `2`, a5xx_state->hlsqregs[pos]);
1645	}
1646	}
1647	}
1648	#endif
1649
1650	static struct msm_ringbuffer a5xx_active_ring(struct* msm_gpu *gpu)
1651	{
1652	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1653	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1654
1655	return a5xx_gpu->cur_ring;
1656	}
1657
1658	static u64 a5xx_gpu_busy(struct msm_gpu gpu, unsigned* long *out_sample_rate)
1659	{
1660	u64 busy_cycles;
1661
1662	busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO);
1663	*out_sample_rate = clk_get_rate(gpu->core_clk);
1664
1665	return busy_cycles;
1666	}
1667
1668	static uint32_t a5xx_get_rptr(struct msm_gpu gpu, struct* msm_ringbuffer *ring)
1669	{
1670	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1671	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1672
1673	if (a5xx_gpu->has_whereami)
1674	return a5xx_gpu->shadow[ring->id];
1675
1676	return ring->memptrs->rptr = gpu_read(gpu, REG_A5XX_CP_RB_RPTR);
1677	}
1678
1679	static const struct adreno_gpu_funcs funcs = {
1680	.base = {
1681	.get_param = adreno_get_param,
1682	.set_param = adreno_set_param,
1683	.hw_init = a5xx_hw_init,
1684	.ucode_load = a5xx_ucode_load,
1685	.pm_suspend = a5xx_pm_suspend,
1686	.pm_resume = a5xx_pm_resume,
1687	.recover = a5xx_recover,
1688	.submit = a5xx_submit,
1689	.active_ring = a5xx_active_ring,
1690	.irq = a5xx_irq,
1691	.destroy = a5xx_destroy,
1692	#if defined(CONFIG_DEBUG_FS) \|\| defined(CONFIG_DEV_COREDUMP)
1693	.show = a5xx_show,
1694	#endif
1695	#if defined(CONFIG_DEBUG_FS)
1696	.debugfs_init = a5xx_debugfs_init,
1697	#endif
1698	.gpu_busy = a5xx_gpu_busy,
1699	.gpu_state_get = a5xx_gpu_state_get,
1700	.gpu_state_put = a5xx_gpu_state_put,
1701	.create_address_space = adreno_create_address_space,
1702	.get_rptr = a5xx_get_rptr,
1703	},
1704	.get_timestamp = a5xx_get_timestamp,
1705	};
1706
1707	static void check_speed_bin(struct device *dev)
1708	{
1709	struct nvmem_cell *cell;
1710	u32 val;
1711
1712	/*
1713	* If the OPP table specifies a opp-supported-hw property then we have
1714	* to set something with dev_pm_opp_set_supported_hw() or the table
1715	* doesn't get populated so pick an arbitrary value that should
1716	* ensure the default frequencies are selected but not conflict with any
1717	* actual bins
1718	*/
1719	val = `0x80`;
1720
1721	cell = nvmem_cell_get(dev, id: "speed_bin");
1722
1723	if (!IS_ERR(ptr: cell)) {
1724	void *buf = nvmem_cell_read(cell, NULL);
1725
1726	if (!IS_ERR(ptr: buf)) {
1727	u8 bin = ((u8 ) buf);
1728
1729	val = (`1` << bin);
1730	kfree(objp: buf);
1731	}
1732
1733	nvmem_cell_put(cell);
1734	}
1735
1736	devm_pm_opp_set_supported_hw(dev, versions: &val, count: `1`);
1737	}
1738
1739	struct msm_gpu a5xx_gpu_init(struct* drm_device *dev)
1740	{
1741	struct msm_drm_private *priv = dev->dev_private;
1742	struct platform_device *pdev = priv->gpu_pdev;
1743	struct adreno_platform_config *config = pdev->dev.platform_data;
1744	struct a5xx_gpu *a5xx_gpu = NULL;
1745	struct adreno_gpu *adreno_gpu;
1746	struct msm_gpu *gpu;
1747	unsigned int nr_rings;
1748	int ret;
1749
1750	if (!pdev) {
1751	DRM_DEV_ERROR(dev->dev, "No A5XX device is defined\n");
1752	return ERR_PTR(error: -ENXIO);
1753	}
1754
1755	a5xx_gpu = kzalloc(size: sizeof(*a5xx_gpu), GFP_KERNEL);
1756	if (!a5xx_gpu)
1757	return ERR_PTR(error: -ENOMEM);
1758
1759	adreno_gpu = &a5xx_gpu->base;
1760	gpu = &adreno_gpu->base;
1761
1762	adreno_gpu->registers = a5xx_registers;
1763
1764	a5xx_gpu->lm_leakage = `0x4E001A`;
1765
1766	check_speed_bin(dev: &pdev->dev);
1767
1768	nr_rings = `4`;
1769
1770	if (config->info->revn == `510`)
1771	nr_rings = `1`;
1772
1773	ret = adreno_gpu_init(drm: dev, pdev, gpu: adreno_gpu, funcs: &funcs, nr_rings);
1774	if (ret) {
1775	a5xx_destroy(gpu: &(a5xx_gpu->base.base));
1776	return ERR_PTR(error: ret);
1777	}
1778
1779	if (gpu->aspace)
1780	msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler);
1781
1782	/ Set up the preemption specific bits and pieces for each ringbuffer /
1783	a5xx_preempt_init(gpu);
1784
1785	/ Set the highest bank bit /
1786	if (adreno_is_a540(gpu: adreno_gpu) \|\| adreno_is_a530(gpu: adreno_gpu))
1787	adreno_gpu->ubwc_config.highest_bank_bit = `15`;
1788	else
1789	adreno_gpu->ubwc_config.highest_bank_bit = `14`;
1790
1791	return gpu;
1792	}
1793

source code of linux/drivers/gpu/drm/msm/adreno/a5xx_gpu.c