a6xx_gpu_state.c source code [linux/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/ Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. /
3
4	#include <linux/ascii85.h>
5	#include "msm_gem.h"
6	#include "a6xx_gpu.h"
7	#include "a6xx_gmu.h"
8	#include "a6xx_gpu_state.h"
9	#include "a6xx_gmu.xml.h"
10
11	/ Ignore diagnostics about register tables that we aren't using yet. We don't*
12	* want to modify these headers too much from their original source.
13	*/
14	#pragma GCC diagnostic push
15	#pragma GCC diagnostic ignored "-Wunused-variable"
16
17	#include "adreno_gen7_0_0_snapshot.h"
18	#include "adreno_gen7_2_0_snapshot.h"
19
20	#pragma GCC diagnostic pop
21
22	struct a6xx_gpu_state_obj {
23	const void *handle;
24	u32 *data;
25	};
26
27	struct a6xx_gpu_state {
28	struct msm_gpu_state base;
29
30	struct a6xx_gpu_state_obj *gmu_registers;
31	int nr_gmu_registers;
32
33	struct a6xx_gpu_state_obj *registers;
34	int nr_registers;
35
36	struct a6xx_gpu_state_obj *shaders;
37	int nr_shaders;
38
39	struct a6xx_gpu_state_obj *clusters;
40	int nr_clusters;
41
42	struct a6xx_gpu_state_obj *dbgahb_clusters;
43	int nr_dbgahb_clusters;
44
45	struct a6xx_gpu_state_obj *indexed_regs;
46	int nr_indexed_regs;
47
48	struct a6xx_gpu_state_obj *debugbus;
49	int nr_debugbus;
50
51	struct a6xx_gpu_state_obj *vbif_debugbus;
52
53	struct a6xx_gpu_state_obj *cx_debugbus;
54	int nr_cx_debugbus;
55
56	struct msm_gpu_state_bo *gmu_log;
57	struct msm_gpu_state_bo *gmu_hfi;
58	struct msm_gpu_state_bo *gmu_debug;
59
60	s32 hfi_queue_history[`2`][HFI_HISTORY_SZ];
61
62	struct list_head objs;
63
64	bool gpu_initialized;
65	};
66
67	static inline int CRASHDUMP_WRITE(u64 *in, u32 reg, u32 val)
68	{
69	in[`0`] = val;
70	in[`1`] = (((u64) reg) << `44` \| (`1` << `21`) \| `1`);
71
72	return `2`;
73	}
74
75	static inline int CRASHDUMP_READ(u64 *in, u32 reg, u32 dwords, u64 target)
76	{
77	in[`0`] = target;
78	in[`1`] = (((u64) reg) << `44` \| dwords);
79
80	return `2`;
81	}
82
83	static inline int CRASHDUMP_FINI(u64 *in)
84	{
85	in[`0`] = `0`;
86	in[`1`] = `0`;
87
88	return `2`;
89	}
90
91	struct a6xx_crashdumper {
92	void *ptr;
93	struct drm_gem_object *bo;
94	u64 iova;
95	};
96
97	struct a6xx_state_memobj {
98	struct list_head node;
99	unsigned long long data[];
100	};
101
102	static void state_kcalloc(struct* a6xx_gpu_state a6xx_state, int* nr, size_t objsize)
103	{
104	struct a6xx_state_memobj *obj =
105	kvzalloc(size: (nr * objsize) + sizeof(*obj), GFP_KERNEL);
106
107	if (!obj)
108	return NULL;
109
110	list_add_tail(new: &obj->node, head: &a6xx_state->objs);
111	return &obj->data;
112	}
113
114	static void state_kmemdup(struct* a6xx_gpu_state a6xx_state, void* *src,
115	size_t size)
116	{
117	void *dst = state_kcalloc(a6xx_state, nr: `1`, objsize: size);
118
119	if (dst)
120	memcpy(dst, src, size);
121	return dst;
122	}
123
124	/*
125	* Allocate 1MB for the crashdumper scratch region - 8k for the script and
126	* the rest for the data
127	*/
128	#define A6XX_CD_DATA_OFFSET 8192
129	#define A6XX_CD_DATA_SIZE (SZ_1M - 8192)
130
131	static int a6xx_crashdumper_init(struct msm_gpu *gpu,
132	struct a6xx_crashdumper *dumper)
133	{
134	dumper->ptr = msm_gem_kernel_new(gpu->dev,
135	SZ_1M, MSM_BO_WC, gpu->aspace,
136	&dumper->bo, &dumper->iova);
137
138	if (!IS_ERR(ptr: dumper->ptr))
139	msm_gem_object_set_name(dumper->bo, "crashdump");
140
141	return PTR_ERR_OR_ZERO(ptr: dumper->ptr);
142	}
143
144	static int a6xx_crashdumper_run(struct msm_gpu *gpu,
145	struct a6xx_crashdumper *dumper)
146	{
147	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
148	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
149	u32 val;
150	int ret;
151
152	if (IS_ERR_OR_NULL(ptr: dumper->ptr))
153	return -EINVAL;
154
155	if (!a6xx_gmu_sptprac_is_on(gmu: &a6xx_gpu->gmu))
156	return -EINVAL;
157
158	/ Make sure all pending memory writes are posted /
159	wmb();
160
161	gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE, dumper->iova);
162
163	gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, `1`);
164
165	ret = gpu_poll_timeout(gpu, REG_A6XX_CP_CRASH_DUMP_STATUS, val,
166	val & `0x02`, `100`, `10000`);
167
168	gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, `0`);
169
170	return ret;
171	}
172
173	/ read a value from the GX debug bus /
174	static int debugbus_read(struct msm_gpu *gpu, u32 block, u32 offset,
175	u32 *data)
176	{
177	u32 reg = A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(val: offset) \|
178	A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(val: block);
179
180	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_A, reg);
181	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_B, reg);
182	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_C, reg);
183	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_D, reg);
184
185	/ Wait 1 us to make sure the data is flowing /
186	udelay(`1`);
187
188	data[`0`] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2);
189	data[`1`] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1);
190
191	return `2`;
192	}
193
194	#define cxdbg_write(ptr, offset, val) \
195	msm_writel((val), (ptr) + ((offset) << 2))
196
197	#define cxdbg_read(ptr, offset) \
198	msm_readl((ptr) + ((offset) << 2))
199
200	/ read a value from the CX debug bus /
201	static int cx_debugbus_read(void __iomem *cxdbg, u32 block, u32 offset,
202	u32 *data)
203	{
204	u32 reg = A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(val: offset) \|
205	A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(val: block);
206
207	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A, reg);
208	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B, reg);
209	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C, reg);
210	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D, reg);
211
212	/ Wait 1 us to make sure the data is flowing /
213	udelay(`1`);
214
215	data[`0`] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2);
216	data[`1`] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1);
217
218	return `2`;
219	}
220
221	/ Read a chunk of data from the VBIF debug bus /
222	static int vbif_debugbus_read(struct msm_gpu *gpu, u32 ctrl0, u32 ctrl1,
223	u32 reg, int count, u32 *data)
224	{
225	int i;
226
227	gpu_write(gpu, ctrl0, reg);
228
229	for (i = `0`; i < count; i++) {
230	gpu_write(gpu, ctrl1, i);
231	data[i] = gpu_read(gpu, REG_A6XX_VBIF_TEST_BUS_OUT);
232	}
233
234	return count;
235	}
236
237	#define AXI_ARB_BLOCKS 2
238	#define XIN_AXI_BLOCKS 5
239	#define XIN_CORE_BLOCKS 4
240
241	#define VBIF_DEBUGBUS_BLOCK_SIZE \
242	((16 * AXI_ARB_BLOCKS) + \
243	(18 * XIN_AXI_BLOCKS) + \
244	(12 * XIN_CORE_BLOCKS))
245
246	static void a6xx_get_vbif_debugbus_block(struct msm_gpu *gpu,
247	struct a6xx_gpu_state *a6xx_state,
248	struct a6xx_gpu_state_obj *obj)
249	{
250	u32 clk, *ptr;
251	int i;
252
253	obj->data = state_kcalloc(a6xx_state, VBIF_DEBUGBUS_BLOCK_SIZE,
254	objsize: sizeof(u32));
255	if (!obj->data)
256	return;
257
258	obj->handle = NULL;
259
260	/ Get the current clock setting /
261	clk = gpu_read(gpu, REG_A6XX_VBIF_CLKON);
262
263	/ Force on the bus so we can read it /
264	gpu_write(gpu, REG_A6XX_VBIF_CLKON,
265	clk \| A6XX_VBIF_CLKON_FORCE_ON_TESTBUS);
266
267	/ We will read from BUS2 first, so disable BUS1 /
268	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS1_CTRL0, `0`);
269
270	/ Enable the VBIF bus for reading /
271	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS_OUT_CTRL, `1`);
272
273	ptr = obj->data;
274
275	for (i = `0`; i < AXI_ARB_BLOCKS; i++)
276	ptr += vbif_debugbus_read(gpu,
277	REG_A6XX_VBIF_TEST_BUS2_CTRL0,
278	REG_A6XX_VBIF_TEST_BUS2_CTRL1,
279	reg: `1` << (i + `16`), count: `16`, data: ptr);
280
281	for (i = `0`; i < XIN_AXI_BLOCKS; i++)
282	ptr += vbif_debugbus_read(gpu,
283	REG_A6XX_VBIF_TEST_BUS2_CTRL0,
284	REG_A6XX_VBIF_TEST_BUS2_CTRL1,
285	reg: `1` << i, count: `18`, data: ptr);
286
287	/ Stop BUS2 so we can turn on BUS1 /
288	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS2_CTRL0, `0`);
289
290	for (i = `0`; i < XIN_CORE_BLOCKS; i++)
291	ptr += vbif_debugbus_read(gpu,
292	REG_A6XX_VBIF_TEST_BUS1_CTRL0,
293	REG_A6XX_VBIF_TEST_BUS1_CTRL1,
294	reg: `1` << i, count: `12`, data: ptr);
295
296	/ Restore the VBIF clock setting /
297	gpu_write(gpu, REG_A6XX_VBIF_CLKON, clk);
298	}
299
300	static void a6xx_get_debugbus_block(struct msm_gpu *gpu,
301	struct a6xx_gpu_state *a6xx_state,
302	const struct a6xx_debugbus_block *block,
303	struct a6xx_gpu_state_obj *obj)
304	{
305	int i;
306	u32 *ptr;
307
308	obj->data = state_kcalloc(a6xx_state, nr: block->count, objsize: sizeof(u64));
309	if (!obj->data)
310	return;
311
312	obj->handle = block;
313
314	for (ptr = obj->data, i = `0`; i < block->count; i++)
315	ptr += debugbus_read(gpu, block: block->id, offset: i, data: ptr);
316	}
317
318	static void a6xx_get_cx_debugbus_block(void __iomem *cxdbg,
319	struct a6xx_gpu_state *a6xx_state,
320	const struct a6xx_debugbus_block *block,
321	struct a6xx_gpu_state_obj *obj)
322	{
323	int i;
324	u32 *ptr;
325
326	obj->data = state_kcalloc(a6xx_state, nr: block->count, objsize: sizeof(u64));
327	if (!obj->data)
328	return;
329
330	obj->handle = block;
331
332	for (ptr = obj->data, i = `0`; i < block->count; i++)
333	ptr += cx_debugbus_read(cxdbg, block: block->id, offset: i, data: ptr);
334	}
335
336	static void a6xx_get_debugbus_blocks(struct msm_gpu *gpu,
337	struct a6xx_gpu_state *a6xx_state)
338	{
339	int nr_debugbus_blocks = ARRAY_SIZE(a6xx_debugbus_blocks) +
340	(a6xx_has_gbif(to_adreno_gpu(gpu)) ? `1` : `0`);
341
342	if (adreno_is_a650_family(to_adreno_gpu(gpu)))
343	nr_debugbus_blocks += ARRAY_SIZE(a650_debugbus_blocks);
344
345	a6xx_state->debugbus = state_kcalloc(a6xx_state, nr: nr_debugbus_blocks,
346	objsize: sizeof(*a6xx_state->debugbus));
347
348	if (a6xx_state->debugbus) {
349	int i;
350
351	for (i = `0`; i < ARRAY_SIZE(a6xx_debugbus_blocks); i++)
352	a6xx_get_debugbus_block(gpu,
353	a6xx_state,
354	block: &a6xx_debugbus_blocks[i],
355	obj: &a6xx_state->debugbus[i]);
356
357	a6xx_state->nr_debugbus = ARRAY_SIZE(a6xx_debugbus_blocks);
358
359	/*
360	* GBIF has same debugbus as of other GPU blocks, fall back to
361	* default path if GPU uses GBIF, also GBIF uses exactly same
362	* ID as of VBIF.
363	*/
364	if (a6xx_has_gbif(to_adreno_gpu(gpu))) {
365	a6xx_get_debugbus_block(gpu, a6xx_state,
366	block: &a6xx_gbif_debugbus_block,
367	obj: &a6xx_state->debugbus[i]);
368
369	a6xx_state->nr_debugbus += `1`;
370	}
371
372
373	if (adreno_is_a650_family(to_adreno_gpu(gpu))) {
374	for (i = `0`; i < ARRAY_SIZE(a650_debugbus_blocks); i++)
375	a6xx_get_debugbus_block(gpu,
376	a6xx_state,
377	block: &a650_debugbus_blocks[i],
378	obj: &a6xx_state->debugbus[i]);
379	}
380	}
381	}
382
383	static void a7xx_get_debugbus_blocks(struct msm_gpu *gpu,
384	struct a6xx_gpu_state *a6xx_state)
385	{
386	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
387	int debugbus_blocks_count, total_debugbus_blocks;
388	const u32 *debugbus_blocks;
389	int i;
390
391	if (adreno_is_a730(gpu: adreno_gpu)) {
392	debugbus_blocks = gen7_0_0_debugbus_blocks;
393	debugbus_blocks_count = ARRAY_SIZE(gen7_0_0_debugbus_blocks);
394	} else {
395	BUG_ON(!adreno_is_a740_family(adreno_gpu));
396	debugbus_blocks = gen7_2_0_debugbus_blocks;
397	debugbus_blocks_count = ARRAY_SIZE(gen7_2_0_debugbus_blocks);
398	}
399
400	total_debugbus_blocks = debugbus_blocks_count +
401	ARRAY_SIZE(a7xx_gbif_debugbus_blocks);
402
403	a6xx_state->debugbus = state_kcalloc(a6xx_state, nr: total_debugbus_blocks,
404	objsize: sizeof(*a6xx_state->debugbus));
405
406	if (a6xx_state->debugbus) {
407	for (i = `0`; i < debugbus_blocks_count; i++) {
408	a6xx_get_debugbus_block(gpu,
409	a6xx_state, block: &a7xx_debugbus_blocks[debugbus_blocks[i]],
410	obj: &a6xx_state->debugbus[i]);
411	}
412
413	for (i = `0`; i < ARRAY_SIZE(a7xx_gbif_debugbus_blocks); i++) {
414	a6xx_get_debugbus_block(gpu,
415	a6xx_state, block: &a7xx_gbif_debugbus_blocks[i],
416	obj: &a6xx_state->debugbus[i + debugbus_blocks_count]);
417	}
418	}
419
420	}
421
422	static void a6xx_get_debugbus(struct msm_gpu *gpu,
423	struct a6xx_gpu_state *a6xx_state)
424	{
425	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
426	struct resource *res;
427	void __iomem *cxdbg = NULL;
428
429	/ Set up the GX debug bus /
430
431	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT,
432	A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(val: `0xf`));
433
434	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM,
435	A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(val: `0xf`));
436
437	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, `0`);
438	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, `0`);
439	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, `0`);
440	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, `0`);
441
442	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0, `0x76543210`);
443	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1, `0xFEDCBA98`);
444
445	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, `0`);
446	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, `0`);
447	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, `0`);
448	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, `0`);
449
450	/ Set up the CX debug bus - it lives elsewhere in the system so do a*
451	* temporary ioremap for the registers
452	*/
453	res = platform_get_resource_byname(gpu->pdev, IORESOURCE_MEM,
454	"cx_dbgc");
455
456	if (res)
457	cxdbg = ioremap(offset: res->start, size: resource_size(res));
458
459	if (cxdbg) {
460	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT,
461	A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(`0xf`));
462
463	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLM,
464	A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(`0xf`));
465
466	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0, `0`);
467	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1, `0`);
468	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2, `0`);
469	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3, `0`);
470
471	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0,
472	`0x76543210`);
473	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1,
474	`0xFEDCBA98`);
475
476	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0, `0`);
477	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1, `0`);
478	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2, `0`);
479	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3, `0`);
480	}
481
482	if (adreno_is_a7xx(gpu: adreno_gpu)) {
483	a7xx_get_debugbus_blocks(gpu, a6xx_state);
484	} else {
485	a6xx_get_debugbus_blocks(gpu, a6xx_state);
486	}
487
488	/ Dump the VBIF debugbus on applicable targets /
489	if (!a6xx_has_gbif(gpu: adreno_gpu)) {
490	a6xx_state->vbif_debugbus =
491	state_kcalloc(a6xx_state, nr: `1`,
492	objsize: sizeof(*a6xx_state->vbif_debugbus));
493
494	if (a6xx_state->vbif_debugbus)
495	a6xx_get_vbif_debugbus_block(gpu, a6xx_state,
496	obj: a6xx_state->vbif_debugbus);
497	}
498
499	if (cxdbg) {
500	unsigned nr_cx_debugbus_blocks;
501	const struct a6xx_debugbus_block *cx_debugbus_blocks;
502
503	if (adreno_is_a7xx(gpu: adreno_gpu)) {
504	BUG_ON(!(adreno_is_a730(adreno_gpu) \|\| adreno_is_a740_family(adreno_gpu)));
505	cx_debugbus_blocks = a7xx_cx_debugbus_blocks;
506	nr_cx_debugbus_blocks = ARRAY_SIZE(a7xx_cx_debugbus_blocks);
507	} else {
508	cx_debugbus_blocks = a6xx_cx_debugbus_blocks;
509	nr_cx_debugbus_blocks = ARRAY_SIZE(a6xx_cx_debugbus_blocks);
510	}
511
512	a6xx_state->cx_debugbus =
513	state_kcalloc(a6xx_state,
514	nr: nr_cx_debugbus_blocks,
515	objsize: sizeof(*a6xx_state->cx_debugbus));
516
517	if (a6xx_state->cx_debugbus) {
518	int i;
519
520	for (i = `0`; i < nr_cx_debugbus_blocks; i++)
521	a6xx_get_cx_debugbus_block(cxdbg,
522	a6xx_state,
523	block: &cx_debugbus_blocks[i],
524	obj: &a6xx_state->cx_debugbus[i]);
525
526	a6xx_state->nr_cx_debugbus =
527	nr_cx_debugbus_blocks;
528	}
529
530	iounmap(addr: cxdbg);
531	}
532	}
533
534	#define RANGE(reg, a) ((reg)[(a) + 1] - (reg)[(a)] + 1)
535
536	/ Read a data cluster from behind the AHB aperture /
537	static void a6xx_get_dbgahb_cluster(struct msm_gpu *gpu,
538	struct a6xx_gpu_state *a6xx_state,
539	const struct a6xx_dbgahb_cluster *dbgahb,
540	struct a6xx_gpu_state_obj *obj,
541	struct a6xx_crashdumper *dumper)
542	{
543	u64 *in = dumper->ptr;
544	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
545	size_t datasize;
546	int i, regcount = `0`;
547
548	for (i = `0`; i < A6XX_NUM_CONTEXTS; i++) {
549	int j;
550
551	in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
552	val: (dbgahb->statetype + i * `2`) << `8`);
553
554	for (j = `0`; j < dbgahb->count; j += `2`) {
555	int count = RANGE(dbgahb->registers, j);
556	u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
557	dbgahb->registers[j] - (dbgahb->base >> `2`);
558
559	in += CRASHDUMP_READ(in, reg: offset, dwords: count, target: out);
560
561	out += count * sizeof(u32);
562
563	if (i == `0`)
564	regcount += count;
565	}
566	}
567
568	CRASHDUMP_FINI(in);
569
570	datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
571
572	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
573	return;
574
575	if (a6xx_crashdumper_run(gpu, dumper))
576	return;
577
578	obj->handle = dbgahb;
579	obj->data = state_kmemdup(a6xx_state, src: dumper->ptr + A6XX_CD_DATA_OFFSET,
580	size: datasize);
581	}
582
583	static void a7xx_get_dbgahb_cluster(struct msm_gpu *gpu,
584	struct a6xx_gpu_state *a6xx_state,
585	const struct gen7_sptp_cluster_registers *dbgahb,
586	struct a6xx_gpu_state_obj *obj,
587	struct a6xx_crashdumper *dumper)
588	{
589	u64 *in = dumper->ptr;
590	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
591	size_t datasize;
592	int i, regcount = `0`;
593
594	in += CRASHDUMP_WRITE(in, REG_A7XX_SP_READ_SEL,
595	val: A7XX_SP_READ_SEL_LOCATION(val: dbgahb->location_id) \|
596	A7XX_SP_READ_SEL_PIPE(val: dbgahb->pipe_id) \|
597	A7XX_SP_READ_SEL_STATETYPE(val: dbgahb->statetype));
598
599	for (i = `0`; dbgahb->regs[i] != UINT_MAX; i += `2`) {
600	int count = RANGE(dbgahb->regs, i);
601	u32 offset = REG_A7XX_SP_AHB_READ_APERTURE +
602	dbgahb->regs[i] - dbgahb->regbase;
603
604	in += CRASHDUMP_READ(in, reg: offset, dwords: count, target: out);
605
606	out += count * sizeof(u32);
607	regcount += count;
608	}
609
610	CRASHDUMP_FINI(in);
611
612	datasize = regcount * sizeof(u32);
613
614	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
615	return;
616
617	if (a6xx_crashdumper_run(gpu, dumper))
618	return;
619
620	obj->handle = dbgahb;
621	obj->data = state_kmemdup(a6xx_state, src: dumper->ptr + A6XX_CD_DATA_OFFSET,
622	size: datasize);
623	}
624
625	static void a6xx_get_dbgahb_clusters(struct msm_gpu *gpu,
626	struct a6xx_gpu_state *a6xx_state,
627	struct a6xx_crashdumper *dumper)
628	{
629	int i;
630
631	a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state,
632	ARRAY_SIZE(a6xx_dbgahb_clusters),
633	objsize: sizeof(*a6xx_state->dbgahb_clusters));
634
635	if (!a6xx_state->dbgahb_clusters)
636	return;
637
638	a6xx_state->nr_dbgahb_clusters = ARRAY_SIZE(a6xx_dbgahb_clusters);
639
640	for (i = `0`; i < ARRAY_SIZE(a6xx_dbgahb_clusters); i++)
641	a6xx_get_dbgahb_cluster(gpu, a6xx_state,
642	dbgahb: &a6xx_dbgahb_clusters[i],
643	obj: &a6xx_state->dbgahb_clusters[i], dumper);
644	}
645
646	static void a7xx_get_dbgahb_clusters(struct msm_gpu *gpu,
647	struct a6xx_gpu_state *a6xx_state,
648	struct a6xx_crashdumper *dumper)
649	{
650	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
651	int i;
652	const struct gen7_sptp_cluster_registers *dbgahb_clusters;
653	unsigned dbgahb_clusters_size;
654
655	if (adreno_is_a730(gpu: adreno_gpu)) {
656	dbgahb_clusters = gen7_0_0_sptp_clusters;
657	dbgahb_clusters_size = ARRAY_SIZE(gen7_0_0_sptp_clusters);
658	} else {
659	BUG_ON(!adreno_is_a740_family(adreno_gpu));
660	dbgahb_clusters = gen7_2_0_sptp_clusters;
661	dbgahb_clusters_size = ARRAY_SIZE(gen7_2_0_sptp_clusters);
662	}
663
664	a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state,
665	nr: dbgahb_clusters_size,
666	objsize: sizeof(*a6xx_state->dbgahb_clusters));
667
668	if (!a6xx_state->dbgahb_clusters)
669	return;
670
671	a6xx_state->nr_dbgahb_clusters = dbgahb_clusters_size;
672
673	for (i = `0`; i < dbgahb_clusters_size; i++)
674	a7xx_get_dbgahb_cluster(gpu, a6xx_state,
675	dbgahb: &dbgahb_clusters[i],
676	obj: &a6xx_state->dbgahb_clusters[i], dumper);
677	}
678
679	/ Read a data cluster from the CP aperture with the crashdumper /
680	static void a6xx_get_cluster(struct msm_gpu *gpu,
681	struct a6xx_gpu_state *a6xx_state,
682	const struct a6xx_cluster *cluster,
683	struct a6xx_gpu_state_obj *obj,
684	struct a6xx_crashdumper *dumper)
685	{
686	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
687	u64 *in = dumper->ptr;
688	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
689	size_t datasize;
690	int i, regcount = `0`;
691	u32 id = cluster->id;
692
693	/ Skip registers that are not present on older generation /
694	if (!adreno_is_a660_family(gpu: adreno_gpu) &&
695	cluster->registers == a660_fe_cluster)
696	return;
697
698	if (adreno_is_a650_family(gpu: adreno_gpu) &&
699	cluster->registers == a6xx_ps_cluster)
700	id = CLUSTER_VPC_PS;
701
702	/ Some clusters need a selector register to be programmed too /
703	if (cluster->sel_reg)
704	in += CRASHDUMP_WRITE(in, reg: cluster->sel_reg, val: cluster->sel_val);
705
706	for (i = `0`; i < A6XX_NUM_CONTEXTS; i++) {
707	int j;
708
709	in += CRASHDUMP_WRITE(in, REG_A6XX_CP_APERTURE_CNTL_CD,
710	val: (id << `8`) \| (i << `4`) \| i);
711
712	for (j = `0`; j < cluster->count; j += `2`) {
713	int count = RANGE(cluster->registers, j);
714
715	in += CRASHDUMP_READ(in, reg: cluster->registers[j],
716	dwords: count, target: out);
717
718	out += count * sizeof(u32);
719
720	if (i == `0`)
721	regcount += count;
722	}
723	}
724
725	CRASHDUMP_FINI(in);
726
727	datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
728
729	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
730	return;
731
732	if (a6xx_crashdumper_run(gpu, dumper))
733	return;
734
735	obj->handle = cluster;
736	obj->data = state_kmemdup(a6xx_state, src: dumper->ptr + A6XX_CD_DATA_OFFSET,
737	size: datasize);
738	}
739
740	static void a7xx_get_cluster(struct msm_gpu *gpu,
741	struct a6xx_gpu_state *a6xx_state,
742	const struct gen7_cluster_registers *cluster,
743	struct a6xx_gpu_state_obj *obj,
744	struct a6xx_crashdumper *dumper)
745	{
746	u64 *in = dumper->ptr;
747	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
748	size_t datasize;
749	int i, regcount = `0`;
750
751	/ Some clusters need a selector register to be programmed too /
752	if (cluster->sel)
753	in += CRASHDUMP_WRITE(in, reg: cluster->sel->cd_reg, val: cluster->sel->val);
754
755	in += CRASHDUMP_WRITE(in, REG_A7XX_CP_APERTURE_CNTL_CD,
756	val: A7XX_CP_APERTURE_CNTL_CD_PIPE(val: cluster->pipe_id) \|
757	A7XX_CP_APERTURE_CNTL_CD_CLUSTER(val: cluster->cluster_id) \|
758	A7XX_CP_APERTURE_CNTL_CD_CONTEXT(val: cluster->context_id));
759
760	for (i = `0`; cluster->regs[i] != UINT_MAX; i += `2`) {
761	int count = RANGE(cluster->regs, i);
762
763	in += CRASHDUMP_READ(in, reg: cluster->regs[i],
764	dwords: count, target: out);
765
766	out += count * sizeof(u32);
767	regcount += count;
768	}
769
770	CRASHDUMP_FINI(in);
771
772	datasize = regcount * sizeof(u32);
773
774	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
775	return;
776
777	if (a6xx_crashdumper_run(gpu, dumper))
778	return;
779
780	obj->handle = cluster;
781	obj->data = state_kmemdup(a6xx_state, src: dumper->ptr + A6XX_CD_DATA_OFFSET,
782	size: datasize);
783	}
784
785	static void a6xx_get_clusters(struct msm_gpu *gpu,
786	struct a6xx_gpu_state *a6xx_state,
787	struct a6xx_crashdumper *dumper)
788	{
789	int i;
790
791	a6xx_state->clusters = state_kcalloc(a6xx_state,
792	ARRAY_SIZE(a6xx_clusters), objsize: sizeof(*a6xx_state->clusters));
793
794	if (!a6xx_state->clusters)
795	return;
796
797	a6xx_state->nr_clusters = ARRAY_SIZE(a6xx_clusters);
798
799	for (i = `0`; i < ARRAY_SIZE(a6xx_clusters); i++)
800	a6xx_get_cluster(gpu, a6xx_state, cluster: &a6xx_clusters[i],
801	obj: &a6xx_state->clusters[i], dumper);
802	}
803
804	static void a7xx_get_clusters(struct msm_gpu *gpu,
805	struct a6xx_gpu_state *a6xx_state,
806	struct a6xx_crashdumper *dumper)
807	{
808	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
809	int i;
810	const struct gen7_cluster_registers *clusters;
811	unsigned clusters_size;
812
813	if (adreno_is_a730(gpu: adreno_gpu)) {
814	clusters = gen7_0_0_clusters;
815	clusters_size = ARRAY_SIZE(gen7_0_0_clusters);
816	} else {
817	BUG_ON(!adreno_is_a740_family(adreno_gpu));
818	clusters = gen7_2_0_clusters;
819	clusters_size = ARRAY_SIZE(gen7_2_0_clusters);
820	}
821
822	a6xx_state->clusters = state_kcalloc(a6xx_state,
823	nr: clusters_size, objsize: sizeof(*a6xx_state->clusters));
824
825	if (!a6xx_state->clusters)
826	return;
827
828	a6xx_state->nr_clusters = clusters_size;
829
830	for (i = `0`; i < clusters_size; i++)
831	a7xx_get_cluster(gpu, a6xx_state, cluster: &clusters[i],
832	obj: &a6xx_state->clusters[i], dumper);
833	}
834
835	/ Read a shader / debug block from the HLSQ aperture with the crashdumper /
836	static void a6xx_get_shader_block(struct msm_gpu *gpu,
837	struct a6xx_gpu_state *a6xx_state,
838	const struct a6xx_shader_block *block,
839	struct a6xx_gpu_state_obj *obj,
840	struct a6xx_crashdumper *dumper)
841	{
842	u64 *in = dumper->ptr;
843	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
844	size_t datasize = block->size * A6XX_NUM_SHADER_BANKS * sizeof(u32);
845	int i;
846
847	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
848	return;
849
850	for (i = `0`; i < A6XX_NUM_SHADER_BANKS; i++) {
851	in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
852	val: (block->type << `8`) \| i);
853
854	in += CRASHDUMP_READ(in, REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE,
855	dwords: block->size, target: out);
856
857	out += block->size * sizeof(u32);
858	}
859
860	CRASHDUMP_FINI(in);
861
862	if (a6xx_crashdumper_run(gpu, dumper))
863	return;
864
865	obj->handle = block;
866	obj->data = state_kmemdup(a6xx_state, src: dumper->ptr + A6XX_CD_DATA_OFFSET,
867	size: datasize);
868	}
869
870	static void a7xx_get_shader_block(struct msm_gpu *gpu,
871	struct a6xx_gpu_state *a6xx_state,
872	const struct gen7_shader_block *block,
873	struct a6xx_gpu_state_obj *obj,
874	struct a6xx_crashdumper *dumper)
875	{
876	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
877	u64 *in = dumper->ptr;
878	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
879	size_t datasize = block->size * block->num_sps * block->num_usptps * sizeof(u32);
880	int i, j;
881
882	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
883	return;
884
885	if (adreno_is_a730(gpu: adreno_gpu)) {
886	gpu_rmw(gpu, REG_A7XX_SP_DBG_CNTL, GENMASK(`1`, `0`), `3`);
887	}
888
889	for (i = `0`; i < block->num_sps; i++) {
890	for (j = `0`; j < block->num_usptps; j++) {
891	in += CRASHDUMP_WRITE(in, REG_A7XX_SP_READ_SEL,
892	val: A7XX_SP_READ_SEL_LOCATION(val: block->location) \|
893	A7XX_SP_READ_SEL_PIPE(val: block->pipeid) \|
894	A7XX_SP_READ_SEL_STATETYPE(val: block->statetype) \|
895	A7XX_SP_READ_SEL_USPTP(val: j) \|
896	A7XX_SP_READ_SEL_SPTP(val: i));
897
898	in += CRASHDUMP_READ(in, REG_A7XX_SP_AHB_READ_APERTURE,
899	dwords: block->size, target: out);
900
901	out += block->size * sizeof(u32);
902	}
903	}
904
905	CRASHDUMP_FINI(in);
906
907	if (a6xx_crashdumper_run(gpu, dumper))
908	goto out;
909
910	obj->handle = block;
911	obj->data = state_kmemdup(a6xx_state, src: dumper->ptr + A6XX_CD_DATA_OFFSET,
912	size: datasize);
913
914	out:
915	if (adreno_is_a730(gpu: adreno_gpu)) {
916	gpu_rmw(gpu, REG_A7XX_SP_DBG_CNTL, GENMASK(`1`, `0`), `0`);
917	}
918	}
919
920	static void a6xx_get_shaders(struct msm_gpu *gpu,
921	struct a6xx_gpu_state *a6xx_state,
922	struct a6xx_crashdumper *dumper)
923	{
924	int i;
925
926	a6xx_state->shaders = state_kcalloc(a6xx_state,
927	ARRAY_SIZE(a6xx_shader_blocks), objsize: sizeof(*a6xx_state->shaders));
928
929	if (!a6xx_state->shaders)
930	return;
931
932	a6xx_state->nr_shaders = ARRAY_SIZE(a6xx_shader_blocks);
933
934	for (i = `0`; i < ARRAY_SIZE(a6xx_shader_blocks); i++)
935	a6xx_get_shader_block(gpu, a6xx_state, block: &a6xx_shader_blocks[i],
936	obj: &a6xx_state->shaders[i], dumper);
937	}
938
939	static void a7xx_get_shaders(struct msm_gpu *gpu,
940	struct a6xx_gpu_state *a6xx_state,
941	struct a6xx_crashdumper *dumper)
942	{
943	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
944	const struct gen7_shader_block *shader_blocks;
945	unsigned num_shader_blocks;
946	int i;
947
948	if (adreno_is_a730(gpu: adreno_gpu)) {
949	shader_blocks = gen7_0_0_shader_blocks;
950	num_shader_blocks = ARRAY_SIZE(gen7_0_0_shader_blocks);
951	} else {
952	BUG_ON(!adreno_is_a740_family(adreno_gpu));
953	shader_blocks = gen7_2_0_shader_blocks;
954	num_shader_blocks = ARRAY_SIZE(gen7_2_0_shader_blocks);
955	}
956
957	a6xx_state->shaders = state_kcalloc(a6xx_state,
958	nr: num_shader_blocks, objsize: sizeof(*a6xx_state->shaders));
959
960	if (!a6xx_state->shaders)
961	return;
962
963	a6xx_state->nr_shaders = num_shader_blocks;
964
965	for (i = `0`; i < num_shader_blocks; i++)
966	a7xx_get_shader_block(gpu, a6xx_state, block: &shader_blocks[i],
967	obj: &a6xx_state->shaders[i], dumper);
968	}
969
970	/ Read registers from behind the HLSQ aperture with the crashdumper /
971	static void a6xx_get_crashdumper_hlsq_registers(struct msm_gpu *gpu,
972	struct a6xx_gpu_state *a6xx_state,
973	const struct a6xx_registers *regs,
974	struct a6xx_gpu_state_obj *obj,
975	struct a6xx_crashdumper *dumper)
976
977	{
978	u64 *in = dumper->ptr;
979	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
980	int i, regcount = `0`;
981
982	in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, val: regs->val1);
983
984	for (i = `0`; i < regs->count; i += `2`) {
985	u32 count = RANGE(regs->registers, i);
986	u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
987	regs->registers[i] - (regs->val0 >> `2`);
988
989	in += CRASHDUMP_READ(in, reg: offset, dwords: count, target: out);
990
991	out += count * sizeof(u32);
992	regcount += count;
993	}
994
995	CRASHDUMP_FINI(in);
996
997	if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
998	return;
999
1000	if (a6xx_crashdumper_run(gpu, dumper))
1001	return;
1002
1003	obj->handle = regs;
1004	obj->data = state_kmemdup(a6xx_state, src: dumper->ptr + A6XX_CD_DATA_OFFSET,
1005	size: regcount * sizeof(u32));
1006	}
1007
1008	/ Read a block of registers using the crashdumper /
1009	static void a6xx_get_crashdumper_registers(struct msm_gpu *gpu,
1010	struct a6xx_gpu_state *a6xx_state,
1011	const struct a6xx_registers *regs,
1012	struct a6xx_gpu_state_obj *obj,
1013	struct a6xx_crashdumper *dumper)
1014
1015	{
1016	u64 *in = dumper->ptr;
1017	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
1018	int i, regcount = `0`;
1019
1020	/ Skip unsupported registers on older generations /
1021	if (!adreno_is_a660_family(to_adreno_gpu(gpu)) &&
1022	(regs->registers == a660_registers))
1023	return;
1024
1025	/ Some blocks might need to program a selector register first /
1026	if (regs->val0)
1027	in += CRASHDUMP_WRITE(in, reg: regs->val0, val: regs->val1);
1028
1029	for (i = `0`; i < regs->count; i += `2`) {
1030	u32 count = RANGE(regs->registers, i);
1031
1032	in += CRASHDUMP_READ(in, reg: regs->registers[i], dwords: count, target: out);
1033
1034	out += count * sizeof(u32);
1035	regcount += count;
1036	}
1037
1038	CRASHDUMP_FINI(in);
1039
1040	if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
1041	return;
1042
1043	if (a6xx_crashdumper_run(gpu, dumper))
1044	return;
1045
1046	obj->handle = regs;
1047	obj->data = state_kmemdup(a6xx_state, src: dumper->ptr + A6XX_CD_DATA_OFFSET,
1048	size: regcount * sizeof(u32));
1049	}
1050
1051	static void a7xx_get_crashdumper_registers(struct msm_gpu *gpu,
1052	struct a6xx_gpu_state *a6xx_state,
1053	const struct gen7_reg_list *regs,
1054	struct a6xx_gpu_state_obj *obj,
1055	struct a6xx_crashdumper *dumper)
1056
1057	{
1058	u64 *in = dumper->ptr;
1059	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
1060	int i, regcount = `0`;
1061
1062	/ Some blocks might need to program a selector register first /
1063	if (regs->sel)
1064	in += CRASHDUMP_WRITE(in, reg: regs->sel->cd_reg, val: regs->sel->val);
1065
1066	for (i = `0`; regs->regs[i] != UINT_MAX; i += `2`) {
1067	u32 count = RANGE(regs->regs, i);
1068
1069	in += CRASHDUMP_READ(in, reg: regs->regs[i], dwords: count, target: out);
1070
1071	out += count * sizeof(u32);
1072	regcount += count;
1073	}
1074
1075	CRASHDUMP_FINI(in);
1076
1077	if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
1078	return;
1079
1080	if (a6xx_crashdumper_run(gpu, dumper))
1081	return;
1082
1083	obj->handle = regs->regs;
1084	obj->data = state_kmemdup(a6xx_state, src: dumper->ptr + A6XX_CD_DATA_OFFSET,
1085	size: regcount * sizeof(u32));
1086	}
1087
1088
1089	/ Read a block of registers via AHB /
1090	static void a6xx_get_ahb_gpu_registers(struct msm_gpu *gpu,
1091	struct a6xx_gpu_state *a6xx_state,
1092	const struct a6xx_registers *regs,
1093	struct a6xx_gpu_state_obj *obj)
1094	{
1095	int i, regcount = `0`, index = `0`;
1096
1097	/ Skip unsupported registers on older generations /
1098	if (!adreno_is_a660_family(to_adreno_gpu(gpu)) &&
1099	(regs->registers == a660_registers))
1100	return;
1101
1102	for (i = `0`; i < regs->count; i += `2`)
1103	regcount += RANGE(regs->registers, i);
1104
1105	obj->handle = (const void *) regs;
1106	obj->data = state_kcalloc(a6xx_state, nr: regcount, objsize: sizeof(u32));
1107	if (!obj->data)
1108	return;
1109
1110	for (i = `0`; i < regs->count; i += `2`) {
1111	u32 count = RANGE(regs->registers, i);
1112	int j;
1113
1114	for (j = `0`; j < count; j++)
1115	obj->data[index++] = gpu_read(gpu,
1116	regs->registers[i] + j);
1117	}
1118	}
1119
1120	static void a7xx_get_ahb_gpu_registers(struct msm_gpu *gpu,
1121	struct a6xx_gpu_state *a6xx_state,
1122	const u32 *regs,
1123	struct a6xx_gpu_state_obj *obj)
1124	{
1125	int i, regcount = `0`, index = `0`;
1126
1127	for (i = `0`; regs[i] != UINT_MAX; i += `2`)
1128	regcount += RANGE(regs, i);
1129
1130	obj->handle = (const void *) regs;
1131	obj->data = state_kcalloc(a6xx_state, nr: regcount, objsize: sizeof(u32));
1132	if (!obj->data)
1133	return;
1134
1135	for (i = `0`; regs[i] != UINT_MAX; i += `2`) {
1136	u32 count = RANGE(regs, i);
1137	int j;
1138
1139	for (j = `0`; j < count; j++)
1140	obj->data[index++] = gpu_read(gpu, regs[i] + j);
1141	}
1142	}
1143
1144	static void a7xx_get_ahb_gpu_reglist(struct msm_gpu *gpu,
1145	struct a6xx_gpu_state *a6xx_state,
1146	const struct gen7_reg_list *regs,
1147	struct a6xx_gpu_state_obj *obj)
1148	{
1149	if (regs->sel)
1150	gpu_write(gpu, regs->sel->host_reg, regs->sel->val);
1151
1152	a7xx_get_ahb_gpu_registers(gpu, a6xx_state, regs: regs->regs, obj);
1153	}
1154
1155	/ Read a block of GMU registers /
1156	static void _a6xx_get_gmu_registers(struct msm_gpu *gpu,
1157	struct a6xx_gpu_state *a6xx_state,
1158	const struct a6xx_registers *regs,
1159	struct a6xx_gpu_state_obj *obj,
1160	bool rscc)
1161	{
1162	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1163	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1164	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
1165	int i, regcount = `0`, index = `0`;
1166
1167	for (i = `0`; i < regs->count; i += `2`)
1168	regcount += RANGE(regs->registers, i);
1169
1170	obj->handle = (const void *) regs;
1171	obj->data = state_kcalloc(a6xx_state, nr: regcount, objsize: sizeof(u32));
1172	if (!obj->data)
1173	return;
1174
1175	for (i = `0`; i < regs->count; i += `2`) {
1176	u32 count = RANGE(regs->registers, i);
1177	int j;
1178
1179	for (j = `0`; j < count; j++) {
1180	u32 offset = regs->registers[i] + j;
1181	u32 val;
1182
1183	if (rscc)
1184	val = gmu_read_rscc(gmu, offset);
1185	else
1186	val = gmu_read(gmu, offset);
1187
1188	obj->data[index++] = val;
1189	}
1190	}
1191	}
1192
1193	static void a6xx_get_gmu_registers(struct msm_gpu *gpu,
1194	struct a6xx_gpu_state *a6xx_state)
1195	{
1196	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1197	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1198
1199	a6xx_state->gmu_registers = state_kcalloc(a6xx_state,
1200	nr: `3`, objsize: sizeof(*a6xx_state->gmu_registers));
1201
1202	if (!a6xx_state->gmu_registers)
1203	return;
1204
1205	a6xx_state->nr_gmu_registers = `3`;
1206
1207	/ Get the CX GMU registers from AHB /
1208	_a6xx_get_gmu_registers(gpu, a6xx_state, regs: &a6xx_gmu_reglist[`0`],
1209	obj: &a6xx_state->gmu_registers[`0`], rscc: false);
1210	_a6xx_get_gmu_registers(gpu, a6xx_state, regs: &a6xx_gmu_reglist[`1`],
1211	obj: &a6xx_state->gmu_registers[`1`], rscc: true);
1212
1213	if (!a6xx_gmu_gx_is_on(gmu: &a6xx_gpu->gmu))
1214	return;
1215
1216	/ Set the fence to ALLOW mode so we can access the registers /
1217	gpu_write(gpu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, `0`);
1218
1219	_a6xx_get_gmu_registers(gpu, a6xx_state, regs: &a6xx_gmu_reglist[`2`],
1220	obj: &a6xx_state->gmu_registers[`2`], rscc: false);
1221	}
1222
1223	static struct msm_gpu_state_bo *a6xx_snapshot_gmu_bo(
1224	struct a6xx_gpu_state a6xx_state, struct* a6xx_gmu_bo *bo)
1225	{
1226	struct msm_gpu_state_bo *snapshot;
1227
1228	if (!bo->size)
1229	return NULL;
1230
1231	snapshot = state_kcalloc(a6xx_state, `1`, sizeof(*snapshot));
1232	if (!snapshot)
1233	return NULL;
1234
1235	snapshot->iova = bo->iova;
1236	snapshot->size = bo->size;
1237	snapshot->data = kvzalloc(size: snapshot->size, GFP_KERNEL);
1238	if (!snapshot->data)
1239	return NULL;
1240
1241	memcpy(snapshot->data, bo->virt, bo->size);
1242
1243	return snapshot;
1244	}
1245
1246	static void a6xx_snapshot_gmu_hfi_history(struct msm_gpu *gpu,
1247	struct a6xx_gpu_state *a6xx_state)
1248	{
1249	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1250	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1251	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
1252	unsigned i, j;
1253
1254	BUILD_BUG_ON(ARRAY_SIZE(gmu->queues) != ARRAY_SIZE(a6xx_state->hfi_queue_history));
1255
1256	for (i = `0`; i < ARRAY_SIZE(gmu->queues); i++) {
1257	struct a6xx_hfi_queue *queue = &gmu->queues[i];
1258	for (j = `0`; j < HFI_HISTORY_SZ; j++) {
1259	unsigned idx = (j + queue->history_idx) % HFI_HISTORY_SZ;
1260	a6xx_state->hfi_queue_history[i][j] = queue->history[idx];
1261	}
1262	}
1263	}
1264
1265	#define A6XX_REGLIST_SIZE 1
1266	#define A6XX_GBIF_REGLIST_SIZE 1
1267	static void a6xx_get_registers(struct msm_gpu *gpu,
1268	struct a6xx_gpu_state *a6xx_state,
1269	struct a6xx_crashdumper *dumper)
1270	{
1271	int i, count = A6XX_REGLIST_SIZE +
1272	ARRAY_SIZE(a6xx_reglist) +
1273	ARRAY_SIZE(a6xx_hlsq_reglist) + A6XX_GBIF_REGLIST_SIZE;
1274	int index = `0`;
1275	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1276
1277	a6xx_state->registers = state_kcalloc(a6xx_state,
1278	nr: count, objsize: sizeof(*a6xx_state->registers));
1279
1280	if (!a6xx_state->registers)
1281	return;
1282
1283	a6xx_state->nr_registers = count;
1284
1285	a6xx_get_ahb_gpu_registers(gpu,
1286	a6xx_state, regs: &a6xx_ahb_reglist,
1287	obj: &a6xx_state->registers[index++]);
1288
1289	if (a6xx_has_gbif(gpu: adreno_gpu))
1290	a6xx_get_ahb_gpu_registers(gpu,
1291	a6xx_state, regs: &a6xx_gbif_reglist,
1292	obj: &a6xx_state->registers[index++]);
1293	else
1294	a6xx_get_ahb_gpu_registers(gpu,
1295	a6xx_state, regs: &a6xx_vbif_reglist,
1296	obj: &a6xx_state->registers[index++]);
1297	if (!dumper) {
1298	/*
1299	* We can't use the crashdumper when the SMMU is stalled,
1300	* because the GPU has no memory access until we resume
1301	* translation (but we don't want to do that until after
1302	* we have captured as much useful GPU state as possible).
1303	* So instead collect registers via the CPU:
1304	*/
1305	for (i = `0`; i < ARRAY_SIZE(a6xx_reglist); i++)
1306	a6xx_get_ahb_gpu_registers(gpu,
1307	a6xx_state, regs: &a6xx_reglist[i],
1308	obj: &a6xx_state->registers[index++]);
1309	return;
1310	}
1311
1312	for (i = `0`; i < ARRAY_SIZE(a6xx_reglist); i++)
1313	a6xx_get_crashdumper_registers(gpu,
1314	a6xx_state, regs: &a6xx_reglist[i],
1315	obj: &a6xx_state->registers[index++],
1316	dumper);
1317
1318	for (i = `0`; i < ARRAY_SIZE(a6xx_hlsq_reglist); i++)
1319	a6xx_get_crashdumper_hlsq_registers(gpu,
1320	a6xx_state, regs: &a6xx_hlsq_reglist[i],
1321	obj: &a6xx_state->registers[index++],
1322	dumper);
1323	}
1324
1325	#define A7XX_PRE_CRASHDUMPER_SIZE 1
1326	#define A7XX_POST_CRASHDUMPER_SIZE 1
1327	static void a7xx_get_registers(struct msm_gpu *gpu,
1328	struct a6xx_gpu_state *a6xx_state,
1329	struct a6xx_crashdumper *dumper)
1330	{
1331	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1332	int i, count;
1333	int index = `0`;
1334	const u32 *pre_crashdumper_regs;
1335	const struct gen7_reg_list *reglist;
1336
1337	if (adreno_is_a730(gpu: adreno_gpu)) {
1338	reglist = gen7_0_0_reg_list;
1339	pre_crashdumper_regs = gen7_0_0_pre_crashdumper_gpu_registers;
1340	} else {
1341	BUG_ON(!adreno_is_a740_family(adreno_gpu));
1342	reglist = gen7_2_0_reg_list;
1343	pre_crashdumper_regs = gen7_0_0_pre_crashdumper_gpu_registers;
1344	}
1345
1346	count = A7XX_PRE_CRASHDUMPER_SIZE + A7XX_POST_CRASHDUMPER_SIZE;
1347
1348	/ The downstream reglist contains registers in other memory regions*
1349	* (cx_misc/cx_mem and cx_dbgc) and we need to plumb through their
1350	* offsets and map them to read them on the CPU. For now only read the
1351	* first region which is the main one.
1352	*/
1353	if (dumper) {
1354	for (i = `0`; reglist[i].regs; i++)
1355	count++;
1356	} else {
1357	count++;
1358	}
1359
1360	a6xx_state->registers = state_kcalloc(a6xx_state,
1361	nr: count, objsize: sizeof(*a6xx_state->registers));
1362
1363	if (!a6xx_state->registers)
1364	return;
1365
1366	a6xx_state->nr_registers = count;
1367
1368	a7xx_get_ahb_gpu_registers(gpu, a6xx_state, regs: pre_crashdumper_regs,
1369	obj: &a6xx_state->registers[index++]);
1370
1371	if (!dumper) {
1372	a7xx_get_ahb_gpu_reglist(gpu,
1373	a6xx_state, regs: &reglist[`0`],
1374	obj: &a6xx_state->registers[index++]);
1375	return;
1376	}
1377
1378	for (i = `0`; reglist[i].regs; i++)
1379	a7xx_get_crashdumper_registers(gpu,
1380	a6xx_state, regs: &reglist[i],
1381	obj: &a6xx_state->registers[index++],
1382	dumper);
1383	}
1384
1385	static void a7xx_get_post_crashdumper_registers(struct msm_gpu *gpu,
1386	struct a6xx_gpu_state *a6xx_state)
1387	{
1388	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1389	const u32 *regs;
1390
1391	BUG_ON(!(adreno_is_a730(adreno_gpu) \|\| adreno_is_a740_family(adreno_gpu)));
1392	regs = gen7_0_0_post_crashdumper_registers;
1393
1394	a7xx_get_ahb_gpu_registers(gpu,
1395	a6xx_state, regs,
1396	obj: &a6xx_state->registers[a6xx_state->nr_registers - `1`]);
1397	}
1398
1399	static u32 a6xx_get_cp_roq_size(struct msm_gpu *gpu)
1400	{
1401	/ The value at [16:31] is in 4dword units. Convert it to dwords /
1402	return gpu_read(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2) >> `14`;
1403	}
1404
1405	static u32 a7xx_get_cp_roq_size(struct msm_gpu *gpu)
1406	{
1407	/*
1408	* The value at CP_ROQ_THRESHOLDS_2[20:31] is in 4dword units.
1409	* That register however is not directly accessible from APSS on A7xx.
1410	* Program the SQE_UCODE_DBG_ADDR with offset=0x70d3 and read the value.
1411	*/
1412	gpu_write(gpu, REG_A6XX_CP_SQE_UCODE_DBG_ADDR, `0x70d3`);
1413
1414	return `4` * (gpu_read(gpu, REG_A6XX_CP_SQE_UCODE_DBG_DATA) >> `20`);
1415	}
1416
1417	/ Read a block of data from an indexed register pair /
1418	static void a6xx_get_indexed_regs(struct msm_gpu *gpu,
1419	struct a6xx_gpu_state *a6xx_state,
1420	struct a6xx_indexed_registers *indexed,
1421	struct a6xx_gpu_state_obj *obj)
1422	{
1423	int i;
1424
1425	obj->handle = (const void *) indexed;
1426	if (indexed->count_fn)
1427	indexed->count = indexed->count_fn(gpu);
1428
1429	obj->data = state_kcalloc(a6xx_state, nr: indexed->count, objsize: sizeof(u32));
1430	if (!obj->data)
1431	return;
1432
1433	/ All the indexed banks start at address 0 /
1434	gpu_write(gpu, indexed->addr, `0`);
1435
1436	/ Read the data - each read increments the internal address by 1 /
1437	for (i = `0`; i < indexed->count; i++)
1438	obj->data[i] = gpu_read(gpu, indexed->data);
1439	}
1440
1441	static void a6xx_get_indexed_registers(struct msm_gpu *gpu,
1442	struct a6xx_gpu_state *a6xx_state)
1443	{
1444	u32 mempool_size;
1445	int count = ARRAY_SIZE(a6xx_indexed_reglist) + `1`;
1446	int i;
1447
1448	a6xx_state->indexed_regs = state_kcalloc(a6xx_state, nr: count,
1449	objsize: sizeof(*a6xx_state->indexed_regs));
1450	if (!a6xx_state->indexed_regs)
1451	return;
1452
1453	for (i = `0`; i < ARRAY_SIZE(a6xx_indexed_reglist); i++)
1454	a6xx_get_indexed_regs(gpu, a6xx_state, indexed: &a6xx_indexed_reglist[i],
1455	obj: &a6xx_state->indexed_regs[i]);
1456
1457	if (adreno_is_a650_family(to_adreno_gpu(gpu))) {
1458	u32 val;
1459
1460	val = gpu_read(gpu, REG_A6XX_CP_CHICKEN_DBG);
1461	gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, val \| `4`);
1462
1463	/ Get the contents of the CP mempool /
1464	a6xx_get_indexed_regs(gpu, a6xx_state, indexed: &a6xx_cp_mempool_indexed,
1465	obj: &a6xx_state->indexed_regs[i]);
1466
1467	gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, val);
1468	a6xx_state->nr_indexed_regs = count;
1469	return;
1470	}
1471
1472	/ Set the CP mempool size to 0 to stabilize it while dumping /
1473	mempool_size = gpu_read(gpu, REG_A6XX_CP_MEM_POOL_SIZE);
1474	gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, `0`);
1475
1476	/ Get the contents of the CP mempool /
1477	a6xx_get_indexed_regs(gpu, a6xx_state, indexed: &a6xx_cp_mempool_indexed,
1478	obj: &a6xx_state->indexed_regs[i]);
1479
1480	/*
1481	* Offset 0x2000 in the mempool is the size - copy the saved size over
1482	* so the data is consistent
1483	*/
1484	a6xx_state->indexed_regs[i].data[`0x2000`] = mempool_size;
1485
1486	/ Restore the size in the hardware /
1487	gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, mempool_size);
1488	}
1489
1490	static void a7xx_get_indexed_registers(struct msm_gpu *gpu,
1491	struct a6xx_gpu_state *a6xx_state)
1492	{
1493	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1494	int i, indexed_count, mempool_count;
1495
1496	BUG_ON(!(adreno_is_a730(adreno_gpu) \|\| adreno_is_a740_family(adreno_gpu)));
1497	indexed_count = ARRAY_SIZE(a7xx_indexed_reglist);
1498	mempool_count = ARRAY_SIZE(a7xx_cp_bv_mempool_indexed);
1499
1500	a6xx_state->indexed_regs = state_kcalloc(a6xx_state,
1501	nr: indexed_count + mempool_count,
1502	objsize: sizeof(*a6xx_state->indexed_regs));
1503	if (!a6xx_state->indexed_regs)
1504	return;
1505
1506	a6xx_state->nr_indexed_regs = indexed_count + mempool_count;
1507
1508	/ First read the common regs /
1509	for (i = `0`; i < indexed_count; i++)
1510	a6xx_get_indexed_regs(gpu, a6xx_state, indexed: &a7xx_indexed_reglist[i],
1511	obj: &a6xx_state->indexed_regs[i]);
1512
1513	gpu_rmw(gpu, REG_A6XX_CP_CHICKEN_DBG, `0`, BIT(`2`));
1514	gpu_rmw(gpu, REG_A7XX_CP_BV_CHICKEN_DBG, `0`, BIT(`2`));
1515
1516	/ Get the contents of the CP_BV mempool /
1517	for (i = `0`; i < mempool_count; i++)
1518	a6xx_get_indexed_regs(gpu, a6xx_state, indexed: &a7xx_cp_bv_mempool_indexed[i],
1519	obj: &a6xx_state->indexed_regs[indexed_count + i]);
1520
1521	gpu_rmw(gpu, REG_A6XX_CP_CHICKEN_DBG, BIT(`2`), `0`);
1522	gpu_rmw(gpu, REG_A7XX_CP_BV_CHICKEN_DBG, BIT(`2`), `0`);
1523	return;
1524	}
1525
1526	struct msm_gpu_state a6xx_gpu_state_get(struct* msm_gpu *gpu)
1527	{
1528	struct a6xx_crashdumper _dumper = { `0` }, *dumper = NULL;
1529	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1530	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1531	struct a6xx_gpu_state a6xx_state = kzalloc(size: sizeof(a6xx_state),
1532	GFP_KERNEL);
1533	bool stalled = !!(gpu_read(gpu, REG_A6XX_RBBM_STATUS3) &
1534	A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT);
1535
1536	if (!a6xx_state)
1537	return ERR_PTR(error: -ENOMEM);
1538
1539	INIT_LIST_HEAD(list: &a6xx_state->objs);
1540
1541	/ Get the generic state from the adreno core /
1542	adreno_gpu_state_get(gpu, state: &a6xx_state->base);
1543
1544	if (!adreno_has_gmu_wrapper(gpu: adreno_gpu)) {
1545	a6xx_get_gmu_registers(gpu, a6xx_state);
1546
1547	a6xx_state->gmu_log = a6xx_snapshot_gmu_bo(a6xx_state, bo: &a6xx_gpu->gmu.log);
1548	a6xx_state->gmu_hfi = a6xx_snapshot_gmu_bo(a6xx_state, bo: &a6xx_gpu->gmu.hfi);
1549	a6xx_state->gmu_debug = a6xx_snapshot_gmu_bo(a6xx_state, bo: &a6xx_gpu->gmu.debug);
1550
1551	a6xx_snapshot_gmu_hfi_history(gpu, a6xx_state);
1552	}
1553
1554	/ If GX isn't on the rest of the data isn't going to be accessible /
1555	if (!adreno_has_gmu_wrapper(gpu: adreno_gpu) && !a6xx_gmu_gx_is_on(gmu: &a6xx_gpu->gmu))
1556	return &a6xx_state->base;
1557
1558	/ Get the banks of indexed registers /
1559	if (adreno_is_a7xx(gpu: adreno_gpu))
1560	a7xx_get_indexed_registers(gpu, a6xx_state);
1561	else
1562	a6xx_get_indexed_registers(gpu, a6xx_state);
1563
1564	/*
1565	* Try to initialize the crashdumper, if we are not dumping state
1566	* with the SMMU stalled. The crashdumper needs memory access to
1567	* write out GPU state, so we need to skip this when the SMMU is
1568	* stalled in response to an iova fault
1569	*/
1570	if (!stalled && !gpu->needs_hw_init &&
1571	!a6xx_crashdumper_init(gpu, dumper: &_dumper)) {
1572	dumper = &_dumper;
1573	}
1574
1575	if (adreno_is_a7xx(gpu: adreno_gpu)) {
1576	a7xx_get_registers(gpu, a6xx_state, dumper);
1577
1578	if (dumper) {
1579	a7xx_get_shaders(gpu, a6xx_state, dumper);
1580	a7xx_get_clusters(gpu, a6xx_state, dumper);
1581	a7xx_get_dbgahb_clusters(gpu, a6xx_state, dumper);
1582
1583	msm_gem_kernel_put(dumper->bo, gpu->aspace);
1584	}
1585
1586	a7xx_get_post_crashdumper_registers(gpu, a6xx_state);
1587	} else {
1588	a6xx_get_registers(gpu, a6xx_state, dumper);
1589
1590	if (dumper) {
1591	a6xx_get_shaders(gpu, a6xx_state, dumper);
1592	a6xx_get_clusters(gpu, a6xx_state, dumper);
1593	a6xx_get_dbgahb_clusters(gpu, a6xx_state, dumper);
1594
1595	msm_gem_kernel_put(dumper->bo, gpu->aspace);
1596	}
1597	}
1598
1599	if (snapshot_debugbus)
1600	a6xx_get_debugbus(gpu, a6xx_state);
1601
1602	a6xx_state->gpu_initialized = !gpu->needs_hw_init;
1603
1604	return &a6xx_state->base;
1605	}
1606
1607	static void a6xx_gpu_state_destroy(struct kref *kref)
1608	{
1609	struct a6xx_state_memobj obj, tmp;
1610	struct msm_gpu_state *state = container_of(kref,
1611	struct msm_gpu_state, ref);
1612	struct a6xx_gpu_state *a6xx_state = container_of(state,
1613	struct a6xx_gpu_state, base);
1614
1615	if (a6xx_state->gmu_log)
1616	kvfree(addr: a6xx_state->gmu_log->data);
1617
1618	if (a6xx_state->gmu_hfi)
1619	kvfree(addr: a6xx_state->gmu_hfi->data);
1620
1621	if (a6xx_state->gmu_debug)
1622	kvfree(addr: a6xx_state->gmu_debug->data);
1623
1624	list_for_each_entry_safe(obj, tmp, &a6xx_state->objs, node) {
1625	list_del(entry: &obj->node);
1626	kvfree(addr: obj);
1627	}
1628
1629	adreno_gpu_state_destroy(state);
1630	kfree(objp: a6xx_state);
1631	}
1632
1633	int a6xx_gpu_state_put(struct msm_gpu_state *state)
1634	{
1635	if (IS_ERR_OR_NULL(ptr: state))
1636	return `1`;
1637
1638	return kref_put(kref: &state->ref, release: a6xx_gpu_state_destroy);
1639	}
1640
1641	static void a6xx_show_registers(const u32 registers, u32 data, size_t count,
1642	struct drm_printer *p)
1643	{
1644	int i, index = `0`;
1645
1646	if (!data)
1647	return;
1648
1649	for (i = `0`; i < count; i += `2`) {
1650	u32 count = RANGE(registers, i);
1651	u32 offset = registers[i];
1652	int j;
1653
1654	for (j = `0`; j < count; index++, offset++, j++) {
1655	if (data[index] == `0xdeafbead`)
1656	continue;
1657
1658	drm_printf(p, " - { offset: 0x%06x, value: 0x%08x }\n",
1659	offset << `2`, data[index]);
1660	}
1661	}
1662	}
1663
1664	static void a7xx_show_registers_indented(const u32 registers, u32 data,
1665	struct drm_printer p, unsigned* indent)
1666	{
1667	int i, index = `0`;
1668
1669	for (i = `0`; registers[i] != UINT_MAX; i += `2`) {
1670	u32 count = RANGE(registers, i);
1671	u32 offset = registers[i];
1672	int j;
1673
1674	for (j = `0`; j < count; index++, offset++, j++) {
1675	int k;
1676
1677	if (data[index] == `0xdeafbead`)
1678	continue;
1679
1680	for (k = `0`; k < indent; k++)
1681	drm_printf(p, " ");
1682	drm_printf(p, "- { offset: 0x%06x, value: 0x%08x }\n",
1683	offset << `2`, data[index]);
1684	}
1685	}
1686	}
1687
1688	static void a7xx_show_registers(const u32 registers, u32 data, struct drm_printer *p)
1689	{
1690	a7xx_show_registers_indented(registers, data, p, indent: `1`);
1691	}
1692
1693	static void print_ascii85(struct drm_printer p, size_t len, u32 data)
1694	{
1695	char out[ASCII85_BUFSZ];
1696	long i, l, datalen = `0`;
1697
1698	for (i = `0`; i < len >> `2`; i++) {
1699	if (data[i])
1700	datalen = (i + `1`) << `2`;
1701	}
1702
1703	if (datalen == `0`)
1704	return;
1705
1706	drm_puts(p, " data: !!ascii85 \|\n");
1707	drm_puts(p, " ");
1708
1709
1710	l = ascii85_encode_len(len: datalen);
1711
1712	for (i = `0`; i < l; i++)
1713	drm_puts(p, ascii85_encode(in: data[i], out));
1714
1715	drm_puts(p, "\n");
1716	}
1717
1718	static void print_name(struct drm_printer p, const* char fmt, const* char *name)
1719	{
1720	drm_puts(p, fmt);
1721	drm_puts(p, name);
1722	drm_puts(p, "\n");
1723	}
1724
1725	static void a6xx_show_shader(struct a6xx_gpu_state_obj *obj,
1726	struct drm_printer *p)
1727	{
1728	const struct a6xx_shader_block *block = obj->handle;
1729	int i;
1730
1731	if (!obj->handle)
1732	return;
1733
1734	print_name(p, fmt: " - type: ", name: block->name);
1735
1736	for (i = `0`; i < A6XX_NUM_SHADER_BANKS; i++) {
1737	drm_printf(p, " - bank: %d\n", i);
1738	drm_printf(p, " size: %d\n", block->size);
1739
1740	if (!obj->data)
1741	continue;
1742
1743	print_ascii85(p, len: block->size << `2`,
1744	data: obj->data + (block->size * i));
1745	}
1746	}
1747
1748	static void a7xx_show_shader(struct a6xx_gpu_state_obj *obj,
1749	struct drm_printer *p)
1750	{
1751	const struct gen7_shader_block *block = obj->handle;
1752	int i, j;
1753	u32 *data = obj->data;
1754
1755	if (!obj->handle)
1756	return;
1757
1758	print_name(p, fmt: " - type: ", name: a7xx_statetype_names[block->statetype]);
1759	print_name(p, fmt: " - pipe: ", name: a7xx_pipe_names[block->pipeid]);
1760
1761	for (i = `0`; i < block->num_sps; i++) {
1762	drm_printf(p, " - sp: %d\n", i);
1763
1764	for (j = `0`; j < block->num_usptps; j++) {
1765	drm_printf(p, " - usptp: %d\n", j);
1766	drm_printf(p, " size: %d\n", block->size);
1767
1768	if (!obj->data)
1769	continue;
1770
1771	print_ascii85(p, len: block->size << `2`, data);
1772
1773	data += block->size;
1774	}
1775	}
1776	}
1777
1778	static void a6xx_show_cluster_data(const u32 registers, int* size, u32 *data,
1779	struct drm_printer *p)
1780	{
1781	int ctx, index = `0`;
1782
1783	for (ctx = `0`; ctx < A6XX_NUM_CONTEXTS; ctx++) {
1784	int j;
1785
1786	drm_printf(p, " - context: %d\n", ctx);
1787
1788	for (j = `0`; j < size; j += `2`) {
1789	u32 count = RANGE(registers, j);
1790	u32 offset = registers[j];
1791	int k;
1792
1793	for (k = `0`; k < count; index++, offset++, k++) {
1794	if (data[index] == `0xdeafbead`)
1795	continue;
1796
1797	drm_printf(p, " - { offset: 0x%06x, value: 0x%08x }\n",
1798	offset << `2`, data[index]);
1799	}
1800	}
1801	}
1802	}
1803
1804	static void a6xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj,
1805	struct drm_printer *p)
1806	{
1807	const struct a6xx_dbgahb_cluster *dbgahb = obj->handle;
1808
1809	if (dbgahb) {
1810	print_name(p, fmt: " - cluster-name: ", name: dbgahb->name);
1811	a6xx_show_cluster_data(registers: dbgahb->registers, size: dbgahb->count,
1812	data: obj->data, p);
1813	}
1814	}
1815
1816	static void a6xx_show_cluster(struct a6xx_gpu_state_obj *obj,
1817	struct drm_printer *p)
1818	{
1819	const struct a6xx_cluster *cluster = obj->handle;
1820
1821	if (cluster) {
1822	print_name(p, fmt: " - cluster-name: ", name: cluster->name);
1823	a6xx_show_cluster_data(registers: cluster->registers, size: cluster->count,
1824	data: obj->data, p);
1825	}
1826	}
1827
1828	static void a7xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj,
1829	struct drm_printer *p)
1830	{
1831	const struct gen7_sptp_cluster_registers *dbgahb = obj->handle;
1832
1833	if (dbgahb) {
1834	print_name(p, fmt: " - pipe: ", name: a7xx_pipe_names[dbgahb->pipe_id]);
1835	print_name(p, fmt: " - cluster-name: ", name: a7xx_cluster_names[dbgahb->cluster_id]);
1836	drm_printf(p, " - context: %d\n", dbgahb->context_id);
1837	a7xx_show_registers_indented(registers: dbgahb->regs, data: obj->data, p, indent: `4`);
1838	}
1839	}
1840
1841	static void a7xx_show_cluster(struct a6xx_gpu_state_obj *obj,
1842	struct drm_printer *p)
1843	{
1844	const struct gen7_cluster_registers *cluster = obj->handle;
1845
1846	if (cluster) {
1847	int context = (cluster->context_id == STATE_FORCE_CTXT_1) ? `1` : `0`;
1848
1849	print_name(p, fmt: " - pipe: ", name: a7xx_pipe_names[cluster->pipe_id]);
1850	print_name(p, fmt: " - cluster-name: ", name: a7xx_cluster_names[cluster->cluster_id]);
1851	drm_printf(p, " - context: %d\n", context);
1852	a7xx_show_registers_indented(registers: cluster->regs, data: obj->data, p, indent: `4`);
1853	}
1854	}
1855
1856	static void a6xx_show_indexed_regs(struct a6xx_gpu_state_obj *obj,
1857	struct drm_printer *p)
1858	{
1859	const struct a6xx_indexed_registers *indexed = obj->handle;
1860
1861	if (!indexed)
1862	return;
1863
1864	print_name(p, fmt: " - regs-name: ", name: indexed->name);
1865	drm_printf(p, " dwords: %d\n", indexed->count);
1866
1867	print_ascii85(p, len: indexed->count << `2`, data: obj->data);
1868	}
1869
1870	static void a6xx_show_debugbus_block(const struct a6xx_debugbus_block *block,
1871	u32 data, struct* drm_printer *p)
1872	{
1873	if (block) {
1874	print_name(p, fmt: " - debugbus-block: ", name: block->name);
1875
1876	/*
1877	* count for regular debugbus data is in quadwords,
1878	* but print the size in dwords for consistency
1879	*/
1880	drm_printf(p, " count: %d\n", block->count << `1`);
1881
1882	print_ascii85(p, len: block->count << `3`, data);
1883	}
1884	}
1885
1886	static void a6xx_show_debugbus(struct a6xx_gpu_state *a6xx_state,
1887	struct drm_printer *p)
1888	{
1889	int i;
1890
1891	for (i = `0`; i < a6xx_state->nr_debugbus; i++) {
1892	struct a6xx_gpu_state_obj *obj = &a6xx_state->debugbus[i];
1893
1894	a6xx_show_debugbus_block(block: obj->handle, data: obj->data, p);
1895	}
1896
1897	if (a6xx_state->vbif_debugbus) {
1898	struct a6xx_gpu_state_obj *obj = a6xx_state->vbif_debugbus;
1899
1900	drm_puts(p, " - debugbus-block: A6XX_DBGBUS_VBIF\n");
1901	drm_printf(p, " count: %d\n", VBIF_DEBUGBUS_BLOCK_SIZE);
1902
1903	/ vbif debugbus data is in dwords. Confusing, huh? /
1904	print_ascii85(p, VBIF_DEBUGBUS_BLOCK_SIZE << `2`, data: obj->data);
1905	}
1906
1907	for (i = `0`; i < a6xx_state->nr_cx_debugbus; i++) {
1908	struct a6xx_gpu_state_obj *obj = &a6xx_state->cx_debugbus[i];
1909
1910	a6xx_show_debugbus_block(block: obj->handle, data: obj->data, p);
1911	}
1912	}
1913
1914	void a6xx_show(struct msm_gpu gpu, struct* msm_gpu_state *state,
1915	struct drm_printer *p)
1916	{
1917	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1918	struct a6xx_gpu_state *a6xx_state = container_of(state,
1919	struct a6xx_gpu_state, base);
1920	int i;
1921
1922	if (IS_ERR_OR_NULL(ptr: state))
1923	return;
1924
1925	drm_printf(p, "gpu-initialized: %d\n", a6xx_state->gpu_initialized);
1926
1927	adreno_show(gpu, state, p);
1928
1929	drm_puts(p, "gmu-log:\n");
1930	if (a6xx_state->gmu_log) {
1931	struct msm_gpu_state_bo *gmu_log = a6xx_state->gmu_log;
1932
1933	drm_printf(p, " iova: 0x%016llx\n", gmu_log->iova);
1934	drm_printf(p, " size: %zu\n", gmu_log->size);
1935	adreno_show_object(p, ptr: &gmu_log->data, len: gmu_log->size,
1936	encoded: &gmu_log->encoded);
1937	}
1938
1939	drm_puts(p, "gmu-hfi:\n");
1940	if (a6xx_state->gmu_hfi) {
1941	struct msm_gpu_state_bo *gmu_hfi = a6xx_state->gmu_hfi;
1942	unsigned i, j;
1943
1944	drm_printf(p, " iova: 0x%016llx\n", gmu_hfi->iova);
1945	drm_printf(p, " size: %zu\n", gmu_hfi->size);
1946	for (i = `0`; i < ARRAY_SIZE(a6xx_state->hfi_queue_history); i++) {
1947	drm_printf(p, " queue-history[%u]:", i);
1948	for (j = `0`; j < HFI_HISTORY_SZ; j++) {
1949	drm_printf(p, " %d", a6xx_state->hfi_queue_history[i][j]);
1950	}
1951	drm_printf(p, "\n");
1952	}
1953	adreno_show_object(p, ptr: &gmu_hfi->data, len: gmu_hfi->size,
1954	encoded: &gmu_hfi->encoded);
1955	}
1956
1957	drm_puts(p, "gmu-debug:\n");
1958	if (a6xx_state->gmu_debug) {
1959	struct msm_gpu_state_bo *gmu_debug = a6xx_state->gmu_debug;
1960
1961	drm_printf(p, " iova: 0x%016llx\n", gmu_debug->iova);
1962	drm_printf(p, " size: %zu\n", gmu_debug->size);
1963	adreno_show_object(p, ptr: &gmu_debug->data, len: gmu_debug->size,
1964	encoded: &gmu_debug->encoded);
1965	}
1966
1967	drm_puts(p, "registers:\n");
1968	for (i = `0`; i < a6xx_state->nr_registers; i++) {
1969	struct a6xx_gpu_state_obj *obj = &a6xx_state->registers[i];
1970
1971	if (!obj->handle)
1972	continue;
1973
1974	if (adreno_is_a7xx(gpu: adreno_gpu)) {
1975	a7xx_show_registers(registers: obj->handle, data: obj->data, p);
1976	} else {
1977	const struct a6xx_registers *regs = obj->handle;
1978
1979	a6xx_show_registers(registers: regs->registers, data: obj->data, count: regs->count, p);
1980	}
1981	}
1982
1983	drm_puts(p, "registers-gmu:\n");
1984	for (i = `0`; i < a6xx_state->nr_gmu_registers; i++) {
1985	struct a6xx_gpu_state_obj *obj = &a6xx_state->gmu_registers[i];
1986	const struct a6xx_registers *regs = obj->handle;
1987
1988	if (!obj->handle)
1989	continue;
1990
1991	a6xx_show_registers(registers: regs->registers, data: obj->data, count: regs->count, p);
1992	}
1993
1994	drm_puts(p, "indexed-registers:\n");
1995	for (i = `0`; i < a6xx_state->nr_indexed_regs; i++)
1996	a6xx_show_indexed_regs(obj: &a6xx_state->indexed_regs[i], p);
1997
1998	drm_puts(p, "shader-blocks:\n");
1999	for (i = `0`; i < a6xx_state->nr_shaders; i++) {
2000	if (adreno_is_a7xx(gpu: adreno_gpu))
2001	a7xx_show_shader(obj: &a6xx_state->shaders[i], p);
2002	else
2003	a6xx_show_shader(obj: &a6xx_state->shaders[i], p);
2004	}
2005
2006	drm_puts(p, "clusters:\n");
2007	for (i = `0`; i < a6xx_state->nr_clusters; i++) {
2008	if (adreno_is_a7xx(gpu: adreno_gpu))
2009	a7xx_show_cluster(obj: &a6xx_state->clusters[i], p);
2010	else
2011	a6xx_show_cluster(obj: &a6xx_state->clusters[i], p);
2012	}
2013
2014	for (i = `0`; i < a6xx_state->nr_dbgahb_clusters; i++) {
2015	if (adreno_is_a7xx(gpu: adreno_gpu))
2016	a7xx_show_dbgahb_cluster(obj: &a6xx_state->dbgahb_clusters[i], p);
2017	else
2018	a6xx_show_dbgahb_cluster(obj: &a6xx_state->dbgahb_clusters[i], p);
2019	}
2020
2021	drm_puts(p, "debugbus:\n");
2022	a6xx_show_debugbus(a6xx_state, p);
2023	}
2024

source code of linux/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c