1// SPDX-License-Identifier: GPL-2.0
2/* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. */
3
4#include <linux/ascii85.h>
5#include "msm_gem.h"
6#include "a6xx_gpu.h"
7#include "a6xx_gmu.h"
8#include "a6xx_gpu_state.h"
9#include "a6xx_gmu.xml.h"
10
11/* Ignore diagnostics about register tables that we aren't using yet. We don't
12 * want to modify these headers too much from their original source.
13 */
14#pragma GCC diagnostic push
15#pragma GCC diagnostic ignored "-Wunused-variable"
16
17#include "adreno_gen7_0_0_snapshot.h"
18#include "adreno_gen7_2_0_snapshot.h"
19
20#pragma GCC diagnostic pop
21
22struct a6xx_gpu_state_obj {
23 const void *handle;
24 u32 *data;
25};
26
27struct a6xx_gpu_state {
28 struct msm_gpu_state base;
29
30 struct a6xx_gpu_state_obj *gmu_registers;
31 int nr_gmu_registers;
32
33 struct a6xx_gpu_state_obj *registers;
34 int nr_registers;
35
36 struct a6xx_gpu_state_obj *shaders;
37 int nr_shaders;
38
39 struct a6xx_gpu_state_obj *clusters;
40 int nr_clusters;
41
42 struct a6xx_gpu_state_obj *dbgahb_clusters;
43 int nr_dbgahb_clusters;
44
45 struct a6xx_gpu_state_obj *indexed_regs;
46 int nr_indexed_regs;
47
48 struct a6xx_gpu_state_obj *debugbus;
49 int nr_debugbus;
50
51 struct a6xx_gpu_state_obj *vbif_debugbus;
52
53 struct a6xx_gpu_state_obj *cx_debugbus;
54 int nr_cx_debugbus;
55
56 struct msm_gpu_state_bo *gmu_log;
57 struct msm_gpu_state_bo *gmu_hfi;
58 struct msm_gpu_state_bo *gmu_debug;
59
60 s32 hfi_queue_history[2][HFI_HISTORY_SZ];
61
62 struct list_head objs;
63
64 bool gpu_initialized;
65};
66
67static inline int CRASHDUMP_WRITE(u64 *in, u32 reg, u32 val)
68{
69 in[0] = val;
70 in[1] = (((u64) reg) << 44 | (1 << 21) | 1);
71
72 return 2;
73}
74
75static inline int CRASHDUMP_READ(u64 *in, u32 reg, u32 dwords, u64 target)
76{
77 in[0] = target;
78 in[1] = (((u64) reg) << 44 | dwords);
79
80 return 2;
81}
82
83static inline int CRASHDUMP_FINI(u64 *in)
84{
85 in[0] = 0;
86 in[1] = 0;
87
88 return 2;
89}
90
91struct a6xx_crashdumper {
92 void *ptr;
93 struct drm_gem_object *bo;
94 u64 iova;
95};
96
97struct a6xx_state_memobj {
98 struct list_head node;
99 unsigned long long data[];
100};
101
102static void *state_kcalloc(struct a6xx_gpu_state *a6xx_state, int nr, size_t objsize)
103{
104 struct a6xx_state_memobj *obj =
105 kvzalloc(size: (nr * objsize) + sizeof(*obj), GFP_KERNEL);
106
107 if (!obj)
108 return NULL;
109
110 list_add_tail(new: &obj->node, head: &a6xx_state->objs);
111 return &obj->data;
112}
113
114static void *state_kmemdup(struct a6xx_gpu_state *a6xx_state, void *src,
115 size_t size)
116{
117 void *dst = state_kcalloc(a6xx_state, nr: 1, objsize: size);
118
119 if (dst)
120 memcpy(dst, src, size);
121 return dst;
122}
123
124/*
125 * Allocate 1MB for the crashdumper scratch region - 8k for the script and
126 * the rest for the data
127 */
128#define A6XX_CD_DATA_OFFSET 8192
129#define A6XX_CD_DATA_SIZE (SZ_1M - 8192)
130
131static int a6xx_crashdumper_init(struct msm_gpu *gpu,
132 struct a6xx_crashdumper *dumper)
133{
134 dumper->ptr = msm_gem_kernel_new(gpu->dev,
135 SZ_1M, MSM_BO_WC, gpu->aspace,
136 &dumper->bo, &dumper->iova);
137
138 if (!IS_ERR(ptr: dumper->ptr))
139 msm_gem_object_set_name(dumper->bo, "crashdump");
140
141 return PTR_ERR_OR_ZERO(ptr: dumper->ptr);
142}
143
144static int a6xx_crashdumper_run(struct msm_gpu *gpu,
145 struct a6xx_crashdumper *dumper)
146{
147 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
148 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
149 u32 val;
150 int ret;
151
152 if (IS_ERR_OR_NULL(ptr: dumper->ptr))
153 return -EINVAL;
154
155 if (!a6xx_gmu_sptprac_is_on(gmu: &a6xx_gpu->gmu))
156 return -EINVAL;
157
158 /* Make sure all pending memory writes are posted */
159 wmb();
160
161 gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE, dumper->iova);
162
163 gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 1);
164
165 ret = gpu_poll_timeout(gpu, REG_A6XX_CP_CRASH_DUMP_STATUS, val,
166 val & 0x02, 100, 10000);
167
168 gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 0);
169
170 return ret;
171}
172
173/* read a value from the GX debug bus */
174static int debugbus_read(struct msm_gpu *gpu, u32 block, u32 offset,
175 u32 *data)
176{
177 u32 reg = A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(val: offset) |
178 A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(val: block);
179
180 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_A, reg);
181 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_B, reg);
182 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_C, reg);
183 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_D, reg);
184
185 /* Wait 1 us to make sure the data is flowing */
186 udelay(1);
187
188 data[0] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2);
189 data[1] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1);
190
191 return 2;
192}
193
194#define cxdbg_write(ptr, offset, val) \
195 msm_writel((val), (ptr) + ((offset) << 2))
196
197#define cxdbg_read(ptr, offset) \
198 msm_readl((ptr) + ((offset) << 2))
199
200/* read a value from the CX debug bus */
201static int cx_debugbus_read(void __iomem *cxdbg, u32 block, u32 offset,
202 u32 *data)
203{
204 u32 reg = A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(val: offset) |
205 A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(val: block);
206
207 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A, reg);
208 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B, reg);
209 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C, reg);
210 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D, reg);
211
212 /* Wait 1 us to make sure the data is flowing */
213 udelay(1);
214
215 data[0] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2);
216 data[1] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1);
217
218 return 2;
219}
220
221/* Read a chunk of data from the VBIF debug bus */
222static int vbif_debugbus_read(struct msm_gpu *gpu, u32 ctrl0, u32 ctrl1,
223 u32 reg, int count, u32 *data)
224{
225 int i;
226
227 gpu_write(gpu, ctrl0, reg);
228
229 for (i = 0; i < count; i++) {
230 gpu_write(gpu, ctrl1, i);
231 data[i] = gpu_read(gpu, REG_A6XX_VBIF_TEST_BUS_OUT);
232 }
233
234 return count;
235}
236
237#define AXI_ARB_BLOCKS 2
238#define XIN_AXI_BLOCKS 5
239#define XIN_CORE_BLOCKS 4
240
241#define VBIF_DEBUGBUS_BLOCK_SIZE \
242 ((16 * AXI_ARB_BLOCKS) + \
243 (18 * XIN_AXI_BLOCKS) + \
244 (12 * XIN_CORE_BLOCKS))
245
246static void a6xx_get_vbif_debugbus_block(struct msm_gpu *gpu,
247 struct a6xx_gpu_state *a6xx_state,
248 struct a6xx_gpu_state_obj *obj)
249{
250 u32 clk, *ptr;
251 int i;
252
253 obj->data = state_kcalloc(a6xx_state, VBIF_DEBUGBUS_BLOCK_SIZE,
254 objsize: sizeof(u32));
255 if (!obj->data)
256 return;
257
258 obj->handle = NULL;
259
260 /* Get the current clock setting */
261 clk = gpu_read(gpu, REG_A6XX_VBIF_CLKON);
262
263 /* Force on the bus so we can read it */
264 gpu_write(gpu, REG_A6XX_VBIF_CLKON,
265 clk | A6XX_VBIF_CLKON_FORCE_ON_TESTBUS);
266
267 /* We will read from BUS2 first, so disable BUS1 */
268 gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS1_CTRL0, 0);
269
270 /* Enable the VBIF bus for reading */
271 gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS_OUT_CTRL, 1);
272
273 ptr = obj->data;
274
275 for (i = 0; i < AXI_ARB_BLOCKS; i++)
276 ptr += vbif_debugbus_read(gpu,
277 REG_A6XX_VBIF_TEST_BUS2_CTRL0,
278 REG_A6XX_VBIF_TEST_BUS2_CTRL1,
279 reg: 1 << (i + 16), count: 16, data: ptr);
280
281 for (i = 0; i < XIN_AXI_BLOCKS; i++)
282 ptr += vbif_debugbus_read(gpu,
283 REG_A6XX_VBIF_TEST_BUS2_CTRL0,
284 REG_A6XX_VBIF_TEST_BUS2_CTRL1,
285 reg: 1 << i, count: 18, data: ptr);
286
287 /* Stop BUS2 so we can turn on BUS1 */
288 gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS2_CTRL0, 0);
289
290 for (i = 0; i < XIN_CORE_BLOCKS; i++)
291 ptr += vbif_debugbus_read(gpu,
292 REG_A6XX_VBIF_TEST_BUS1_CTRL0,
293 REG_A6XX_VBIF_TEST_BUS1_CTRL1,
294 reg: 1 << i, count: 12, data: ptr);
295
296 /* Restore the VBIF clock setting */
297 gpu_write(gpu, REG_A6XX_VBIF_CLKON, clk);
298}
299
300static void a6xx_get_debugbus_block(struct msm_gpu *gpu,
301 struct a6xx_gpu_state *a6xx_state,
302 const struct a6xx_debugbus_block *block,
303 struct a6xx_gpu_state_obj *obj)
304{
305 int i;
306 u32 *ptr;
307
308 obj->data = state_kcalloc(a6xx_state, nr: block->count, objsize: sizeof(u64));
309 if (!obj->data)
310 return;
311
312 obj->handle = block;
313
314 for (ptr = obj->data, i = 0; i < block->count; i++)
315 ptr += debugbus_read(gpu, block: block->id, offset: i, data: ptr);
316}
317
318static void a6xx_get_cx_debugbus_block(void __iomem *cxdbg,
319 struct a6xx_gpu_state *a6xx_state,
320 const struct a6xx_debugbus_block *block,
321 struct a6xx_gpu_state_obj *obj)
322{
323 int i;
324 u32 *ptr;
325
326 obj->data = state_kcalloc(a6xx_state, nr: block->count, objsize: sizeof(u64));
327 if (!obj->data)
328 return;
329
330 obj->handle = block;
331
332 for (ptr = obj->data, i = 0; i < block->count; i++)
333 ptr += cx_debugbus_read(cxdbg, block: block->id, offset: i, data: ptr);
334}
335
336static void a6xx_get_debugbus_blocks(struct msm_gpu *gpu,
337 struct a6xx_gpu_state *a6xx_state)
338{
339 int nr_debugbus_blocks = ARRAY_SIZE(a6xx_debugbus_blocks) +
340 (a6xx_has_gbif(to_adreno_gpu(gpu)) ? 1 : 0);
341
342 if (adreno_is_a650_family(to_adreno_gpu(gpu)))
343 nr_debugbus_blocks += ARRAY_SIZE(a650_debugbus_blocks);
344
345 a6xx_state->debugbus = state_kcalloc(a6xx_state, nr: nr_debugbus_blocks,
346 objsize: sizeof(*a6xx_state->debugbus));
347
348 if (a6xx_state->debugbus) {
349 int i;
350
351 for (i = 0; i < ARRAY_SIZE(a6xx_debugbus_blocks); i++)
352 a6xx_get_debugbus_block(gpu,
353 a6xx_state,
354 block: &a6xx_debugbus_blocks[i],
355 obj: &a6xx_state->debugbus[i]);
356
357 a6xx_state->nr_debugbus = ARRAY_SIZE(a6xx_debugbus_blocks);
358
359 /*
360 * GBIF has same debugbus as of other GPU blocks, fall back to
361 * default path if GPU uses GBIF, also GBIF uses exactly same
362 * ID as of VBIF.
363 */
364 if (a6xx_has_gbif(to_adreno_gpu(gpu))) {
365 a6xx_get_debugbus_block(gpu, a6xx_state,
366 block: &a6xx_gbif_debugbus_block,
367 obj: &a6xx_state->debugbus[i]);
368
369 a6xx_state->nr_debugbus += 1;
370 }
371
372
373 if (adreno_is_a650_family(to_adreno_gpu(gpu))) {
374 for (i = 0; i < ARRAY_SIZE(a650_debugbus_blocks); i++)
375 a6xx_get_debugbus_block(gpu,
376 a6xx_state,
377 block: &a650_debugbus_blocks[i],
378 obj: &a6xx_state->debugbus[i]);
379 }
380 }
381}
382
383static void a7xx_get_debugbus_blocks(struct msm_gpu *gpu,
384 struct a6xx_gpu_state *a6xx_state)
385{
386 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
387 int debugbus_blocks_count, total_debugbus_blocks;
388 const u32 *debugbus_blocks;
389 int i;
390
391 if (adreno_is_a730(gpu: adreno_gpu)) {
392 debugbus_blocks = gen7_0_0_debugbus_blocks;
393 debugbus_blocks_count = ARRAY_SIZE(gen7_0_0_debugbus_blocks);
394 } else {
395 BUG_ON(!adreno_is_a740_family(adreno_gpu));
396 debugbus_blocks = gen7_2_0_debugbus_blocks;
397 debugbus_blocks_count = ARRAY_SIZE(gen7_2_0_debugbus_blocks);
398 }
399
400 total_debugbus_blocks = debugbus_blocks_count +
401 ARRAY_SIZE(a7xx_gbif_debugbus_blocks);
402
403 a6xx_state->debugbus = state_kcalloc(a6xx_state, nr: total_debugbus_blocks,
404 objsize: sizeof(*a6xx_state->debugbus));
405
406 if (a6xx_state->debugbus) {
407 for (i = 0; i < debugbus_blocks_count; i++) {
408 a6xx_get_debugbus_block(gpu,
409 a6xx_state, block: &a7xx_debugbus_blocks[debugbus_blocks[i]],
410 obj: &a6xx_state->debugbus[i]);
411 }
412
413 for (i = 0; i < ARRAY_SIZE(a7xx_gbif_debugbus_blocks); i++) {
414 a6xx_get_debugbus_block(gpu,
415 a6xx_state, block: &a7xx_gbif_debugbus_blocks[i],
416 obj: &a6xx_state->debugbus[i + debugbus_blocks_count]);
417 }
418 }
419
420}
421
422static void a6xx_get_debugbus(struct msm_gpu *gpu,
423 struct a6xx_gpu_state *a6xx_state)
424{
425 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
426 struct resource *res;
427 void __iomem *cxdbg = NULL;
428
429 /* Set up the GX debug bus */
430
431 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT,
432 A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(val: 0xf));
433
434 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM,
435 A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(val: 0xf));
436
437 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0);
438 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0);
439 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0);
440 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0);
441
442 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0, 0x76543210);
443 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1, 0xFEDCBA98);
444
445 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0);
446 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0);
447 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0);
448 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0);
449
450 /* Set up the CX debug bus - it lives elsewhere in the system so do a
451 * temporary ioremap for the registers
452 */
453 res = platform_get_resource_byname(gpu->pdev, IORESOURCE_MEM,
454 "cx_dbgc");
455
456 if (res)
457 cxdbg = ioremap(offset: res->start, size: resource_size(res));
458
459 if (cxdbg) {
460 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT,
461 A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
462
463 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLM,
464 A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
465
466 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0, 0);
467 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1, 0);
468 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2, 0);
469 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3, 0);
470
471 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0,
472 0x76543210);
473 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1,
474 0xFEDCBA98);
475
476 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0, 0);
477 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1, 0);
478 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2, 0);
479 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3, 0);
480 }
481
482 if (adreno_is_a7xx(gpu: adreno_gpu)) {
483 a7xx_get_debugbus_blocks(gpu, a6xx_state);
484 } else {
485 a6xx_get_debugbus_blocks(gpu, a6xx_state);
486 }
487
488 /* Dump the VBIF debugbus on applicable targets */
489 if (!a6xx_has_gbif(gpu: adreno_gpu)) {
490 a6xx_state->vbif_debugbus =
491 state_kcalloc(a6xx_state, nr: 1,
492 objsize: sizeof(*a6xx_state->vbif_debugbus));
493
494 if (a6xx_state->vbif_debugbus)
495 a6xx_get_vbif_debugbus_block(gpu, a6xx_state,
496 obj: a6xx_state->vbif_debugbus);
497 }
498
499 if (cxdbg) {
500 unsigned nr_cx_debugbus_blocks;
501 const struct a6xx_debugbus_block *cx_debugbus_blocks;
502
503 if (adreno_is_a7xx(gpu: adreno_gpu)) {
504 BUG_ON(!(adreno_is_a730(adreno_gpu) || adreno_is_a740_family(adreno_gpu)));
505 cx_debugbus_blocks = a7xx_cx_debugbus_blocks;
506 nr_cx_debugbus_blocks = ARRAY_SIZE(a7xx_cx_debugbus_blocks);
507 } else {
508 cx_debugbus_blocks = a6xx_cx_debugbus_blocks;
509 nr_cx_debugbus_blocks = ARRAY_SIZE(a6xx_cx_debugbus_blocks);
510 }
511
512 a6xx_state->cx_debugbus =
513 state_kcalloc(a6xx_state,
514 nr: nr_cx_debugbus_blocks,
515 objsize: sizeof(*a6xx_state->cx_debugbus));
516
517 if (a6xx_state->cx_debugbus) {
518 int i;
519
520 for (i = 0; i < nr_cx_debugbus_blocks; i++)
521 a6xx_get_cx_debugbus_block(cxdbg,
522 a6xx_state,
523 block: &cx_debugbus_blocks[i],
524 obj: &a6xx_state->cx_debugbus[i]);
525
526 a6xx_state->nr_cx_debugbus =
527 nr_cx_debugbus_blocks;
528 }
529
530 iounmap(addr: cxdbg);
531 }
532}
533
534#define RANGE(reg, a) ((reg)[(a) + 1] - (reg)[(a)] + 1)
535
536/* Read a data cluster from behind the AHB aperture */
537static void a6xx_get_dbgahb_cluster(struct msm_gpu *gpu,
538 struct a6xx_gpu_state *a6xx_state,
539 const struct a6xx_dbgahb_cluster *dbgahb,
540 struct a6xx_gpu_state_obj *obj,
541 struct a6xx_crashdumper *dumper)
542{
543 u64 *in = dumper->ptr;
544 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
545 size_t datasize;
546 int i, regcount = 0;
547
548 for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
549 int j;
550
551 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
552 val: (dbgahb->statetype + i * 2) << 8);
553
554 for (j = 0; j < dbgahb->count; j += 2) {
555 int count = RANGE(dbgahb->registers, j);
556 u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
557 dbgahb->registers[j] - (dbgahb->base >> 2);
558
559 in += CRASHDUMP_READ(in, reg: offset, dwords: count, target: out);
560
561 out += count * sizeof(u32);
562
563 if (i == 0)
564 regcount += count;
565 }
566 }
567
568 CRASHDUMP_FINI(in);
569
570 datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
571
572 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
573 return;
574
575 if (a6xx_crashdumper_run(gpu, dumper))
576 return;
577
578 obj->handle = dbgahb;
579 obj->data = state_kmemdup(a6xx_state, src: dumper->ptr + A6XX_CD_DATA_OFFSET,
580 size: datasize);
581}
582
583static void a7xx_get_dbgahb_cluster(struct msm_gpu *gpu,
584 struct a6xx_gpu_state *a6xx_state,
585 const struct gen7_sptp_cluster_registers *dbgahb,
586 struct a6xx_gpu_state_obj *obj,
587 struct a6xx_crashdumper *dumper)
588{
589 u64 *in = dumper->ptr;
590 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
591 size_t datasize;
592 int i, regcount = 0;
593
594 in += CRASHDUMP_WRITE(in, REG_A7XX_SP_READ_SEL,
595 val: A7XX_SP_READ_SEL_LOCATION(val: dbgahb->location_id) |
596 A7XX_SP_READ_SEL_PIPE(val: dbgahb->pipe_id) |
597 A7XX_SP_READ_SEL_STATETYPE(val: dbgahb->statetype));
598
599 for (i = 0; dbgahb->regs[i] != UINT_MAX; i += 2) {
600 int count = RANGE(dbgahb->regs, i);
601 u32 offset = REG_A7XX_SP_AHB_READ_APERTURE +
602 dbgahb->regs[i] - dbgahb->regbase;
603
604 in += CRASHDUMP_READ(in, reg: offset, dwords: count, target: out);
605
606 out += count * sizeof(u32);
607 regcount += count;
608 }
609
610 CRASHDUMP_FINI(in);
611
612 datasize = regcount * sizeof(u32);
613
614 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
615 return;
616
617 if (a6xx_crashdumper_run(gpu, dumper))
618 return;
619
620 obj->handle = dbgahb;
621 obj->data = state_kmemdup(a6xx_state, src: dumper->ptr + A6XX_CD_DATA_OFFSET,
622 size: datasize);
623}
624
625static void a6xx_get_dbgahb_clusters(struct msm_gpu *gpu,
626 struct a6xx_gpu_state *a6xx_state,
627 struct a6xx_crashdumper *dumper)
628{
629 int i;
630
631 a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state,
632 ARRAY_SIZE(a6xx_dbgahb_clusters),
633 objsize: sizeof(*a6xx_state->dbgahb_clusters));
634
635 if (!a6xx_state->dbgahb_clusters)
636 return;
637
638 a6xx_state->nr_dbgahb_clusters = ARRAY_SIZE(a6xx_dbgahb_clusters);
639
640 for (i = 0; i < ARRAY_SIZE(a6xx_dbgahb_clusters); i++)
641 a6xx_get_dbgahb_cluster(gpu, a6xx_state,
642 dbgahb: &a6xx_dbgahb_clusters[i],
643 obj: &a6xx_state->dbgahb_clusters[i], dumper);
644}
645
646static void a7xx_get_dbgahb_clusters(struct msm_gpu *gpu,
647 struct a6xx_gpu_state *a6xx_state,
648 struct a6xx_crashdumper *dumper)
649{
650 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
651 int i;
652 const struct gen7_sptp_cluster_registers *dbgahb_clusters;
653 unsigned dbgahb_clusters_size;
654
655 if (adreno_is_a730(gpu: adreno_gpu)) {
656 dbgahb_clusters = gen7_0_0_sptp_clusters;
657 dbgahb_clusters_size = ARRAY_SIZE(gen7_0_0_sptp_clusters);
658 } else {
659 BUG_ON(!adreno_is_a740_family(adreno_gpu));
660 dbgahb_clusters = gen7_2_0_sptp_clusters;
661 dbgahb_clusters_size = ARRAY_SIZE(gen7_2_0_sptp_clusters);
662 }
663
664 a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state,
665 nr: dbgahb_clusters_size,
666 objsize: sizeof(*a6xx_state->dbgahb_clusters));
667
668 if (!a6xx_state->dbgahb_clusters)
669 return;
670
671 a6xx_state->nr_dbgahb_clusters = dbgahb_clusters_size;
672
673 for (i = 0; i < dbgahb_clusters_size; i++)
674 a7xx_get_dbgahb_cluster(gpu, a6xx_state,
675 dbgahb: &dbgahb_clusters[i],
676 obj: &a6xx_state->dbgahb_clusters[i], dumper);
677}
678
679/* Read a data cluster from the CP aperture with the crashdumper */
680static void a6xx_get_cluster(struct msm_gpu *gpu,
681 struct a6xx_gpu_state *a6xx_state,
682 const struct a6xx_cluster *cluster,
683 struct a6xx_gpu_state_obj *obj,
684 struct a6xx_crashdumper *dumper)
685{
686 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
687 u64 *in = dumper->ptr;
688 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
689 size_t datasize;
690 int i, regcount = 0;
691 u32 id = cluster->id;
692
693 /* Skip registers that are not present on older generation */
694 if (!adreno_is_a660_family(gpu: adreno_gpu) &&
695 cluster->registers == a660_fe_cluster)
696 return;
697
698 if (adreno_is_a650_family(gpu: adreno_gpu) &&
699 cluster->registers == a6xx_ps_cluster)
700 id = CLUSTER_VPC_PS;
701
702 /* Some clusters need a selector register to be programmed too */
703 if (cluster->sel_reg)
704 in += CRASHDUMP_WRITE(in, reg: cluster->sel_reg, val: cluster->sel_val);
705
706 for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
707 int j;
708
709 in += CRASHDUMP_WRITE(in, REG_A6XX_CP_APERTURE_CNTL_CD,
710 val: (id << 8) | (i << 4) | i);
711
712 for (j = 0; j < cluster->count; j += 2) {
713 int count = RANGE(cluster->registers, j);
714
715 in += CRASHDUMP_READ(in, reg: cluster->registers[j],
716 dwords: count, target: out);
717
718 out += count * sizeof(u32);
719
720 if (i == 0)
721 regcount += count;
722 }
723 }
724
725 CRASHDUMP_FINI(in);
726
727 datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
728
729 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
730 return;
731
732 if (a6xx_crashdumper_run(gpu, dumper))
733 return;
734
735 obj->handle = cluster;
736 obj->data = state_kmemdup(a6xx_state, src: dumper->ptr + A6XX_CD_DATA_OFFSET,
737 size: datasize);
738}
739
740static void a7xx_get_cluster(struct msm_gpu *gpu,
741 struct a6xx_gpu_state *a6xx_state,
742 const struct gen7_cluster_registers *cluster,
743 struct a6xx_gpu_state_obj *obj,
744 struct a6xx_crashdumper *dumper)
745{
746 u64 *in = dumper->ptr;
747 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
748 size_t datasize;
749 int i, regcount = 0;
750
751 /* Some clusters need a selector register to be programmed too */
752 if (cluster->sel)
753 in += CRASHDUMP_WRITE(in, reg: cluster->sel->cd_reg, val: cluster->sel->val);
754
755 in += CRASHDUMP_WRITE(in, REG_A7XX_CP_APERTURE_CNTL_CD,
756 val: A7XX_CP_APERTURE_CNTL_CD_PIPE(val: cluster->pipe_id) |
757 A7XX_CP_APERTURE_CNTL_CD_CLUSTER(val: cluster->cluster_id) |
758 A7XX_CP_APERTURE_CNTL_CD_CONTEXT(val: cluster->context_id));
759
760 for (i = 0; cluster->regs[i] != UINT_MAX; i += 2) {
761 int count = RANGE(cluster->regs, i);
762
763 in += CRASHDUMP_READ(in, reg: cluster->regs[i],
764 dwords: count, target: out);
765
766 out += count * sizeof(u32);
767 regcount += count;
768 }
769
770 CRASHDUMP_FINI(in);
771
772 datasize = regcount * sizeof(u32);
773
774 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
775 return;
776
777 if (a6xx_crashdumper_run(gpu, dumper))
778 return;
779
780 obj->handle = cluster;
781 obj->data = state_kmemdup(a6xx_state, src: dumper->ptr + A6XX_CD_DATA_OFFSET,
782 size: datasize);
783}
784
785static void a6xx_get_clusters(struct msm_gpu *gpu,
786 struct a6xx_gpu_state *a6xx_state,
787 struct a6xx_crashdumper *dumper)
788{
789 int i;
790
791 a6xx_state->clusters = state_kcalloc(a6xx_state,
792 ARRAY_SIZE(a6xx_clusters), objsize: sizeof(*a6xx_state->clusters));
793
794 if (!a6xx_state->clusters)
795 return;
796
797 a6xx_state->nr_clusters = ARRAY_SIZE(a6xx_clusters);
798
799 for (i = 0; i < ARRAY_SIZE(a6xx_clusters); i++)
800 a6xx_get_cluster(gpu, a6xx_state, cluster: &a6xx_clusters[i],
801 obj: &a6xx_state->clusters[i], dumper);
802}
803
804static void a7xx_get_clusters(struct msm_gpu *gpu,
805 struct a6xx_gpu_state *a6xx_state,
806 struct a6xx_crashdumper *dumper)
807{
808 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
809 int i;
810 const struct gen7_cluster_registers *clusters;
811 unsigned clusters_size;
812
813 if (adreno_is_a730(gpu: adreno_gpu)) {
814 clusters = gen7_0_0_clusters;
815 clusters_size = ARRAY_SIZE(gen7_0_0_clusters);
816 } else {
817 BUG_ON(!adreno_is_a740_family(adreno_gpu));
818 clusters = gen7_2_0_clusters;
819 clusters_size = ARRAY_SIZE(gen7_2_0_clusters);
820 }
821
822 a6xx_state->clusters = state_kcalloc(a6xx_state,
823 nr: clusters_size, objsize: sizeof(*a6xx_state->clusters));
824
825 if (!a6xx_state->clusters)
826 return;
827
828 a6xx_state->nr_clusters = clusters_size;
829
830 for (i = 0; i < clusters_size; i++)
831 a7xx_get_cluster(gpu, a6xx_state, cluster: &clusters[i],
832 obj: &a6xx_state->clusters[i], dumper);
833}
834
835/* Read a shader / debug block from the HLSQ aperture with the crashdumper */
836static void a6xx_get_shader_block(struct msm_gpu *gpu,
837 struct a6xx_gpu_state *a6xx_state,
838 const struct a6xx_shader_block *block,
839 struct a6xx_gpu_state_obj *obj,
840 struct a6xx_crashdumper *dumper)
841{
842 u64 *in = dumper->ptr;
843 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
844 size_t datasize = block->size * A6XX_NUM_SHADER_BANKS * sizeof(u32);
845 int i;
846
847 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
848 return;
849
850 for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
851 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
852 val: (block->type << 8) | i);
853
854 in += CRASHDUMP_READ(in, REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE,
855 dwords: block->size, target: out);
856
857 out += block->size * sizeof(u32);
858 }
859
860 CRASHDUMP_FINI(in);
861
862 if (a6xx_crashdumper_run(gpu, dumper))
863 return;
864
865 obj->handle = block;
866 obj->data = state_kmemdup(a6xx_state, src: dumper->ptr + A6XX_CD_DATA_OFFSET,
867 size: datasize);
868}
869
870static void a7xx_get_shader_block(struct msm_gpu *gpu,
871 struct a6xx_gpu_state *a6xx_state,
872 const struct gen7_shader_block *block,
873 struct a6xx_gpu_state_obj *obj,
874 struct a6xx_crashdumper *dumper)
875{
876 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
877 u64 *in = dumper->ptr;
878 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
879 size_t datasize = block->size * block->num_sps * block->num_usptps * sizeof(u32);
880 int i, j;
881
882 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
883 return;
884
885 if (adreno_is_a730(gpu: adreno_gpu)) {
886 gpu_rmw(gpu, REG_A7XX_SP_DBG_CNTL, GENMASK(1, 0), 3);
887 }
888
889 for (i = 0; i < block->num_sps; i++) {
890 for (j = 0; j < block->num_usptps; j++) {
891 in += CRASHDUMP_WRITE(in, REG_A7XX_SP_READ_SEL,
892 val: A7XX_SP_READ_SEL_LOCATION(val: block->location) |
893 A7XX_SP_READ_SEL_PIPE(val: block->pipeid) |
894 A7XX_SP_READ_SEL_STATETYPE(val: block->statetype) |
895 A7XX_SP_READ_SEL_USPTP(val: j) |
896 A7XX_SP_READ_SEL_SPTP(val: i));
897
898 in += CRASHDUMP_READ(in, REG_A7XX_SP_AHB_READ_APERTURE,
899 dwords: block->size, target: out);
900
901 out += block->size * sizeof(u32);
902 }
903 }
904
905 CRASHDUMP_FINI(in);
906
907 if (a6xx_crashdumper_run(gpu, dumper))
908 goto out;
909
910 obj->handle = block;
911 obj->data = state_kmemdup(a6xx_state, src: dumper->ptr + A6XX_CD_DATA_OFFSET,
912 size: datasize);
913
914out:
915 if (adreno_is_a730(gpu: adreno_gpu)) {
916 gpu_rmw(gpu, REG_A7XX_SP_DBG_CNTL, GENMASK(1, 0), 0);
917 }
918}
919
920static void a6xx_get_shaders(struct msm_gpu *gpu,
921 struct a6xx_gpu_state *a6xx_state,
922 struct a6xx_crashdumper *dumper)
923{
924 int i;
925
926 a6xx_state->shaders = state_kcalloc(a6xx_state,
927 ARRAY_SIZE(a6xx_shader_blocks), objsize: sizeof(*a6xx_state->shaders));
928
929 if (!a6xx_state->shaders)
930 return;
931
932 a6xx_state->nr_shaders = ARRAY_SIZE(a6xx_shader_blocks);
933
934 for (i = 0; i < ARRAY_SIZE(a6xx_shader_blocks); i++)
935 a6xx_get_shader_block(gpu, a6xx_state, block: &a6xx_shader_blocks[i],
936 obj: &a6xx_state->shaders[i], dumper);
937}
938
939static void a7xx_get_shaders(struct msm_gpu *gpu,
940 struct a6xx_gpu_state *a6xx_state,
941 struct a6xx_crashdumper *dumper)
942{
943 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
944 const struct gen7_shader_block *shader_blocks;
945 unsigned num_shader_blocks;
946 int i;
947
948 if (adreno_is_a730(gpu: adreno_gpu)) {
949 shader_blocks = gen7_0_0_shader_blocks;
950 num_shader_blocks = ARRAY_SIZE(gen7_0_0_shader_blocks);
951 } else {
952 BUG_ON(!adreno_is_a740_family(adreno_gpu));
953 shader_blocks = gen7_2_0_shader_blocks;
954 num_shader_blocks = ARRAY_SIZE(gen7_2_0_shader_blocks);
955 }
956
957 a6xx_state->shaders = state_kcalloc(a6xx_state,
958 nr: num_shader_blocks, objsize: sizeof(*a6xx_state->shaders));
959
960 if (!a6xx_state->shaders)
961 return;
962
963 a6xx_state->nr_shaders = num_shader_blocks;
964
965 for (i = 0; i < num_shader_blocks; i++)
966 a7xx_get_shader_block(gpu, a6xx_state, block: &shader_blocks[i],
967 obj: &a6xx_state->shaders[i], dumper);
968}
969
970/* Read registers from behind the HLSQ aperture with the crashdumper */
971static void a6xx_get_crashdumper_hlsq_registers(struct msm_gpu *gpu,
972 struct a6xx_gpu_state *a6xx_state,
973 const struct a6xx_registers *regs,
974 struct a6xx_gpu_state_obj *obj,
975 struct a6xx_crashdumper *dumper)
976
977{
978 u64 *in = dumper->ptr;
979 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
980 int i, regcount = 0;
981
982 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, val: regs->val1);
983
984 for (i = 0; i < regs->count; i += 2) {
985 u32 count = RANGE(regs->registers, i);
986 u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
987 regs->registers[i] - (regs->val0 >> 2);
988
989 in += CRASHDUMP_READ(in, reg: offset, dwords: count, target: out);
990
991 out += count * sizeof(u32);
992 regcount += count;
993 }
994
995 CRASHDUMP_FINI(in);
996
997 if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
998 return;
999
1000 if (a6xx_crashdumper_run(gpu, dumper))
1001 return;
1002
1003 obj->handle = regs;
1004 obj->data = state_kmemdup(a6xx_state, src: dumper->ptr + A6XX_CD_DATA_OFFSET,
1005 size: regcount * sizeof(u32));
1006}
1007
1008/* Read a block of registers using the crashdumper */
1009static void a6xx_get_crashdumper_registers(struct msm_gpu *gpu,
1010 struct a6xx_gpu_state *a6xx_state,
1011 const struct a6xx_registers *regs,
1012 struct a6xx_gpu_state_obj *obj,
1013 struct a6xx_crashdumper *dumper)
1014
1015{
1016 u64 *in = dumper->ptr;
1017 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
1018 int i, regcount = 0;
1019
1020 /* Skip unsupported registers on older generations */
1021 if (!adreno_is_a660_family(to_adreno_gpu(gpu)) &&
1022 (regs->registers == a660_registers))
1023 return;
1024
1025 /* Some blocks might need to program a selector register first */
1026 if (regs->val0)
1027 in += CRASHDUMP_WRITE(in, reg: regs->val0, val: regs->val1);
1028
1029 for (i = 0; i < regs->count; i += 2) {
1030 u32 count = RANGE(regs->registers, i);
1031
1032 in += CRASHDUMP_READ(in, reg: regs->registers[i], dwords: count, target: out);
1033
1034 out += count * sizeof(u32);
1035 regcount += count;
1036 }
1037
1038 CRASHDUMP_FINI(in);
1039
1040 if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
1041 return;
1042
1043 if (a6xx_crashdumper_run(gpu, dumper))
1044 return;
1045
1046 obj->handle = regs;
1047 obj->data = state_kmemdup(a6xx_state, src: dumper->ptr + A6XX_CD_DATA_OFFSET,
1048 size: regcount * sizeof(u32));
1049}
1050
1051static void a7xx_get_crashdumper_registers(struct msm_gpu *gpu,
1052 struct a6xx_gpu_state *a6xx_state,
1053 const struct gen7_reg_list *regs,
1054 struct a6xx_gpu_state_obj *obj,
1055 struct a6xx_crashdumper *dumper)
1056
1057{
1058 u64 *in = dumper->ptr;
1059 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
1060 int i, regcount = 0;
1061
1062 /* Some blocks might need to program a selector register first */
1063 if (regs->sel)
1064 in += CRASHDUMP_WRITE(in, reg: regs->sel->cd_reg, val: regs->sel->val);
1065
1066 for (i = 0; regs->regs[i] != UINT_MAX; i += 2) {
1067 u32 count = RANGE(regs->regs, i);
1068
1069 in += CRASHDUMP_READ(in, reg: regs->regs[i], dwords: count, target: out);
1070
1071 out += count * sizeof(u32);
1072 regcount += count;
1073 }
1074
1075 CRASHDUMP_FINI(in);
1076
1077 if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
1078 return;
1079
1080 if (a6xx_crashdumper_run(gpu, dumper))
1081 return;
1082
1083 obj->handle = regs->regs;
1084 obj->data = state_kmemdup(a6xx_state, src: dumper->ptr + A6XX_CD_DATA_OFFSET,
1085 size: regcount * sizeof(u32));
1086}
1087
1088
1089/* Read a block of registers via AHB */
1090static void a6xx_get_ahb_gpu_registers(struct msm_gpu *gpu,
1091 struct a6xx_gpu_state *a6xx_state,
1092 const struct a6xx_registers *regs,
1093 struct a6xx_gpu_state_obj *obj)
1094{
1095 int i, regcount = 0, index = 0;
1096
1097 /* Skip unsupported registers on older generations */
1098 if (!adreno_is_a660_family(to_adreno_gpu(gpu)) &&
1099 (regs->registers == a660_registers))
1100 return;
1101
1102 for (i = 0; i < regs->count; i += 2)
1103 regcount += RANGE(regs->registers, i);
1104
1105 obj->handle = (const void *) regs;
1106 obj->data = state_kcalloc(a6xx_state, nr: regcount, objsize: sizeof(u32));
1107 if (!obj->data)
1108 return;
1109
1110 for (i = 0; i < regs->count; i += 2) {
1111 u32 count = RANGE(regs->registers, i);
1112 int j;
1113
1114 for (j = 0; j < count; j++)
1115 obj->data[index++] = gpu_read(gpu,
1116 regs->registers[i] + j);
1117 }
1118}
1119
1120static void a7xx_get_ahb_gpu_registers(struct msm_gpu *gpu,
1121 struct a6xx_gpu_state *a6xx_state,
1122 const u32 *regs,
1123 struct a6xx_gpu_state_obj *obj)
1124{
1125 int i, regcount = 0, index = 0;
1126
1127 for (i = 0; regs[i] != UINT_MAX; i += 2)
1128 regcount += RANGE(regs, i);
1129
1130 obj->handle = (const void *) regs;
1131 obj->data = state_kcalloc(a6xx_state, nr: regcount, objsize: sizeof(u32));
1132 if (!obj->data)
1133 return;
1134
1135 for (i = 0; regs[i] != UINT_MAX; i += 2) {
1136 u32 count = RANGE(regs, i);
1137 int j;
1138
1139 for (j = 0; j < count; j++)
1140 obj->data[index++] = gpu_read(gpu, regs[i] + j);
1141 }
1142}
1143
1144static void a7xx_get_ahb_gpu_reglist(struct msm_gpu *gpu,
1145 struct a6xx_gpu_state *a6xx_state,
1146 const struct gen7_reg_list *regs,
1147 struct a6xx_gpu_state_obj *obj)
1148{
1149 if (regs->sel)
1150 gpu_write(gpu, regs->sel->host_reg, regs->sel->val);
1151
1152 a7xx_get_ahb_gpu_registers(gpu, a6xx_state, regs: regs->regs, obj);
1153}
1154
1155/* Read a block of GMU registers */
1156static void _a6xx_get_gmu_registers(struct msm_gpu *gpu,
1157 struct a6xx_gpu_state *a6xx_state,
1158 const struct a6xx_registers *regs,
1159 struct a6xx_gpu_state_obj *obj,
1160 bool rscc)
1161{
1162 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1163 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1164 struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
1165 int i, regcount = 0, index = 0;
1166
1167 for (i = 0; i < regs->count; i += 2)
1168 regcount += RANGE(regs->registers, i);
1169
1170 obj->handle = (const void *) regs;
1171 obj->data = state_kcalloc(a6xx_state, nr: regcount, objsize: sizeof(u32));
1172 if (!obj->data)
1173 return;
1174
1175 for (i = 0; i < regs->count; i += 2) {
1176 u32 count = RANGE(regs->registers, i);
1177 int j;
1178
1179 for (j = 0; j < count; j++) {
1180 u32 offset = regs->registers[i] + j;
1181 u32 val;
1182
1183 if (rscc)
1184 val = gmu_read_rscc(gmu, offset);
1185 else
1186 val = gmu_read(gmu, offset);
1187
1188 obj->data[index++] = val;
1189 }
1190 }
1191}
1192
1193static void a6xx_get_gmu_registers(struct msm_gpu *gpu,
1194 struct a6xx_gpu_state *a6xx_state)
1195{
1196 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1197 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1198
1199 a6xx_state->gmu_registers = state_kcalloc(a6xx_state,
1200 nr: 3, objsize: sizeof(*a6xx_state->gmu_registers));
1201
1202 if (!a6xx_state->gmu_registers)
1203 return;
1204
1205 a6xx_state->nr_gmu_registers = 3;
1206
1207 /* Get the CX GMU registers from AHB */
1208 _a6xx_get_gmu_registers(gpu, a6xx_state, regs: &a6xx_gmu_reglist[0],
1209 obj: &a6xx_state->gmu_registers[0], rscc: false);
1210 _a6xx_get_gmu_registers(gpu, a6xx_state, regs: &a6xx_gmu_reglist[1],
1211 obj: &a6xx_state->gmu_registers[1], rscc: true);
1212
1213 if (!a6xx_gmu_gx_is_on(gmu: &a6xx_gpu->gmu))
1214 return;
1215
1216 /* Set the fence to ALLOW mode so we can access the registers */
1217 gpu_write(gpu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, 0);
1218
1219 _a6xx_get_gmu_registers(gpu, a6xx_state, regs: &a6xx_gmu_reglist[2],
1220 obj: &a6xx_state->gmu_registers[2], rscc: false);
1221}
1222
1223static struct msm_gpu_state_bo *a6xx_snapshot_gmu_bo(
1224 struct a6xx_gpu_state *a6xx_state, struct a6xx_gmu_bo *bo)
1225{
1226 struct msm_gpu_state_bo *snapshot;
1227
1228 if (!bo->size)
1229 return NULL;
1230
1231 snapshot = state_kcalloc(a6xx_state, 1, sizeof(*snapshot));
1232 if (!snapshot)
1233 return NULL;
1234
1235 snapshot->iova = bo->iova;
1236 snapshot->size = bo->size;
1237 snapshot->data = kvzalloc(size: snapshot->size, GFP_KERNEL);
1238 if (!snapshot->data)
1239 return NULL;
1240
1241 memcpy(snapshot->data, bo->virt, bo->size);
1242
1243 return snapshot;
1244}
1245
1246static void a6xx_snapshot_gmu_hfi_history(struct msm_gpu *gpu,
1247 struct a6xx_gpu_state *a6xx_state)
1248{
1249 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1250 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1251 struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
1252 unsigned i, j;
1253
1254 BUILD_BUG_ON(ARRAY_SIZE(gmu->queues) != ARRAY_SIZE(a6xx_state->hfi_queue_history));
1255
1256 for (i = 0; i < ARRAY_SIZE(gmu->queues); i++) {
1257 struct a6xx_hfi_queue *queue = &gmu->queues[i];
1258 for (j = 0; j < HFI_HISTORY_SZ; j++) {
1259 unsigned idx = (j + queue->history_idx) % HFI_HISTORY_SZ;
1260 a6xx_state->hfi_queue_history[i][j] = queue->history[idx];
1261 }
1262 }
1263}
1264
1265#define A6XX_REGLIST_SIZE 1
1266#define A6XX_GBIF_REGLIST_SIZE 1
1267static void a6xx_get_registers(struct msm_gpu *gpu,
1268 struct a6xx_gpu_state *a6xx_state,
1269 struct a6xx_crashdumper *dumper)
1270{
1271 int i, count = A6XX_REGLIST_SIZE +
1272 ARRAY_SIZE(a6xx_reglist) +
1273 ARRAY_SIZE(a6xx_hlsq_reglist) + A6XX_GBIF_REGLIST_SIZE;
1274 int index = 0;
1275 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1276
1277 a6xx_state->registers = state_kcalloc(a6xx_state,
1278 nr: count, objsize: sizeof(*a6xx_state->registers));
1279
1280 if (!a6xx_state->registers)
1281 return;
1282
1283 a6xx_state->nr_registers = count;
1284
1285 a6xx_get_ahb_gpu_registers(gpu,
1286 a6xx_state, regs: &a6xx_ahb_reglist,
1287 obj: &a6xx_state->registers[index++]);
1288
1289 if (a6xx_has_gbif(gpu: adreno_gpu))
1290 a6xx_get_ahb_gpu_registers(gpu,
1291 a6xx_state, regs: &a6xx_gbif_reglist,
1292 obj: &a6xx_state->registers[index++]);
1293 else
1294 a6xx_get_ahb_gpu_registers(gpu,
1295 a6xx_state, regs: &a6xx_vbif_reglist,
1296 obj: &a6xx_state->registers[index++]);
1297 if (!dumper) {
1298 /*
1299 * We can't use the crashdumper when the SMMU is stalled,
1300 * because the GPU has no memory access until we resume
1301 * translation (but we don't want to do that until after
1302 * we have captured as much useful GPU state as possible).
1303 * So instead collect registers via the CPU:
1304 */
1305 for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
1306 a6xx_get_ahb_gpu_registers(gpu,
1307 a6xx_state, regs: &a6xx_reglist[i],
1308 obj: &a6xx_state->registers[index++]);
1309 return;
1310 }
1311
1312 for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
1313 a6xx_get_crashdumper_registers(gpu,
1314 a6xx_state, regs: &a6xx_reglist[i],
1315 obj: &a6xx_state->registers[index++],
1316 dumper);
1317
1318 for (i = 0; i < ARRAY_SIZE(a6xx_hlsq_reglist); i++)
1319 a6xx_get_crashdumper_hlsq_registers(gpu,
1320 a6xx_state, regs: &a6xx_hlsq_reglist[i],
1321 obj: &a6xx_state->registers[index++],
1322 dumper);
1323}
1324
1325#define A7XX_PRE_CRASHDUMPER_SIZE 1
1326#define A7XX_POST_CRASHDUMPER_SIZE 1
1327static void a7xx_get_registers(struct msm_gpu *gpu,
1328 struct a6xx_gpu_state *a6xx_state,
1329 struct a6xx_crashdumper *dumper)
1330{
1331 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1332 int i, count;
1333 int index = 0;
1334 const u32 *pre_crashdumper_regs;
1335 const struct gen7_reg_list *reglist;
1336
1337 if (adreno_is_a730(gpu: adreno_gpu)) {
1338 reglist = gen7_0_0_reg_list;
1339 pre_crashdumper_regs = gen7_0_0_pre_crashdumper_gpu_registers;
1340 } else {
1341 BUG_ON(!adreno_is_a740_family(adreno_gpu));
1342 reglist = gen7_2_0_reg_list;
1343 pre_crashdumper_regs = gen7_0_0_pre_crashdumper_gpu_registers;
1344 }
1345
1346 count = A7XX_PRE_CRASHDUMPER_SIZE + A7XX_POST_CRASHDUMPER_SIZE;
1347
1348 /* The downstream reglist contains registers in other memory regions
1349 * (cx_misc/cx_mem and cx_dbgc) and we need to plumb through their
1350 * offsets and map them to read them on the CPU. For now only read the
1351 * first region which is the main one.
1352 */
1353 if (dumper) {
1354 for (i = 0; reglist[i].regs; i++)
1355 count++;
1356 } else {
1357 count++;
1358 }
1359
1360 a6xx_state->registers = state_kcalloc(a6xx_state,
1361 nr: count, objsize: sizeof(*a6xx_state->registers));
1362
1363 if (!a6xx_state->registers)
1364 return;
1365
1366 a6xx_state->nr_registers = count;
1367
1368 a7xx_get_ahb_gpu_registers(gpu, a6xx_state, regs: pre_crashdumper_regs,
1369 obj: &a6xx_state->registers[index++]);
1370
1371 if (!dumper) {
1372 a7xx_get_ahb_gpu_reglist(gpu,
1373 a6xx_state, regs: &reglist[0],
1374 obj: &a6xx_state->registers[index++]);
1375 return;
1376 }
1377
1378 for (i = 0; reglist[i].regs; i++)
1379 a7xx_get_crashdumper_registers(gpu,
1380 a6xx_state, regs: &reglist[i],
1381 obj: &a6xx_state->registers[index++],
1382 dumper);
1383}
1384
1385static void a7xx_get_post_crashdumper_registers(struct msm_gpu *gpu,
1386 struct a6xx_gpu_state *a6xx_state)
1387{
1388 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1389 const u32 *regs;
1390
1391 BUG_ON(!(adreno_is_a730(adreno_gpu) || adreno_is_a740_family(adreno_gpu)));
1392 regs = gen7_0_0_post_crashdumper_registers;
1393
1394 a7xx_get_ahb_gpu_registers(gpu,
1395 a6xx_state, regs,
1396 obj: &a6xx_state->registers[a6xx_state->nr_registers - 1]);
1397}
1398
1399static u32 a6xx_get_cp_roq_size(struct msm_gpu *gpu)
1400{
1401 /* The value at [16:31] is in 4dword units. Convert it to dwords */
1402 return gpu_read(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2) >> 14;
1403}
1404
1405static u32 a7xx_get_cp_roq_size(struct msm_gpu *gpu)
1406{
1407 /*
1408 * The value at CP_ROQ_THRESHOLDS_2[20:31] is in 4dword units.
1409 * That register however is not directly accessible from APSS on A7xx.
1410 * Program the SQE_UCODE_DBG_ADDR with offset=0x70d3 and read the value.
1411 */
1412 gpu_write(gpu, REG_A6XX_CP_SQE_UCODE_DBG_ADDR, 0x70d3);
1413
1414 return 4 * (gpu_read(gpu, REG_A6XX_CP_SQE_UCODE_DBG_DATA) >> 20);
1415}
1416
1417/* Read a block of data from an indexed register pair */
1418static void a6xx_get_indexed_regs(struct msm_gpu *gpu,
1419 struct a6xx_gpu_state *a6xx_state,
1420 struct a6xx_indexed_registers *indexed,
1421 struct a6xx_gpu_state_obj *obj)
1422{
1423 int i;
1424
1425 obj->handle = (const void *) indexed;
1426 if (indexed->count_fn)
1427 indexed->count = indexed->count_fn(gpu);
1428
1429 obj->data = state_kcalloc(a6xx_state, nr: indexed->count, objsize: sizeof(u32));
1430 if (!obj->data)
1431 return;
1432
1433 /* All the indexed banks start at address 0 */
1434 gpu_write(gpu, indexed->addr, 0);
1435
1436 /* Read the data - each read increments the internal address by 1 */
1437 for (i = 0; i < indexed->count; i++)
1438 obj->data[i] = gpu_read(gpu, indexed->data);
1439}
1440
1441static void a6xx_get_indexed_registers(struct msm_gpu *gpu,
1442 struct a6xx_gpu_state *a6xx_state)
1443{
1444 u32 mempool_size;
1445 int count = ARRAY_SIZE(a6xx_indexed_reglist) + 1;
1446 int i;
1447
1448 a6xx_state->indexed_regs = state_kcalloc(a6xx_state, nr: count,
1449 objsize: sizeof(*a6xx_state->indexed_regs));
1450 if (!a6xx_state->indexed_regs)
1451 return;
1452
1453 for (i = 0; i < ARRAY_SIZE(a6xx_indexed_reglist); i++)
1454 a6xx_get_indexed_regs(gpu, a6xx_state, indexed: &a6xx_indexed_reglist[i],
1455 obj: &a6xx_state->indexed_regs[i]);
1456
1457 if (adreno_is_a650_family(to_adreno_gpu(gpu))) {
1458 u32 val;
1459
1460 val = gpu_read(gpu, REG_A6XX_CP_CHICKEN_DBG);
1461 gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, val | 4);
1462
1463 /* Get the contents of the CP mempool */
1464 a6xx_get_indexed_regs(gpu, a6xx_state, indexed: &a6xx_cp_mempool_indexed,
1465 obj: &a6xx_state->indexed_regs[i]);
1466
1467 gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, val);
1468 a6xx_state->nr_indexed_regs = count;
1469 return;
1470 }
1471
1472 /* Set the CP mempool size to 0 to stabilize it while dumping */
1473 mempool_size = gpu_read(gpu, REG_A6XX_CP_MEM_POOL_SIZE);
1474 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 0);
1475
1476 /* Get the contents of the CP mempool */
1477 a6xx_get_indexed_regs(gpu, a6xx_state, indexed: &a6xx_cp_mempool_indexed,
1478 obj: &a6xx_state->indexed_regs[i]);
1479
1480 /*
1481 * Offset 0x2000 in the mempool is the size - copy the saved size over
1482 * so the data is consistent
1483 */
1484 a6xx_state->indexed_regs[i].data[0x2000] = mempool_size;
1485
1486 /* Restore the size in the hardware */
1487 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, mempool_size);
1488}
1489
1490static void a7xx_get_indexed_registers(struct msm_gpu *gpu,
1491 struct a6xx_gpu_state *a6xx_state)
1492{
1493 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1494 int i, indexed_count, mempool_count;
1495
1496 BUG_ON(!(adreno_is_a730(adreno_gpu) || adreno_is_a740_family(adreno_gpu)));
1497 indexed_count = ARRAY_SIZE(a7xx_indexed_reglist);
1498 mempool_count = ARRAY_SIZE(a7xx_cp_bv_mempool_indexed);
1499
1500 a6xx_state->indexed_regs = state_kcalloc(a6xx_state,
1501 nr: indexed_count + mempool_count,
1502 objsize: sizeof(*a6xx_state->indexed_regs));
1503 if (!a6xx_state->indexed_regs)
1504 return;
1505
1506 a6xx_state->nr_indexed_regs = indexed_count + mempool_count;
1507
1508 /* First read the common regs */
1509 for (i = 0; i < indexed_count; i++)
1510 a6xx_get_indexed_regs(gpu, a6xx_state, indexed: &a7xx_indexed_reglist[i],
1511 obj: &a6xx_state->indexed_regs[i]);
1512
1513 gpu_rmw(gpu, REG_A6XX_CP_CHICKEN_DBG, 0, BIT(2));
1514 gpu_rmw(gpu, REG_A7XX_CP_BV_CHICKEN_DBG, 0, BIT(2));
1515
1516 /* Get the contents of the CP_BV mempool */
1517 for (i = 0; i < mempool_count; i++)
1518 a6xx_get_indexed_regs(gpu, a6xx_state, indexed: &a7xx_cp_bv_mempool_indexed[i],
1519 obj: &a6xx_state->indexed_regs[indexed_count + i]);
1520
1521 gpu_rmw(gpu, REG_A6XX_CP_CHICKEN_DBG, BIT(2), 0);
1522 gpu_rmw(gpu, REG_A7XX_CP_BV_CHICKEN_DBG, BIT(2), 0);
1523 return;
1524}
1525
1526struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu)
1527{
1528 struct a6xx_crashdumper _dumper = { 0 }, *dumper = NULL;
1529 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1530 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1531 struct a6xx_gpu_state *a6xx_state = kzalloc(size: sizeof(*a6xx_state),
1532 GFP_KERNEL);
1533 bool stalled = !!(gpu_read(gpu, REG_A6XX_RBBM_STATUS3) &
1534 A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT);
1535
1536 if (!a6xx_state)
1537 return ERR_PTR(error: -ENOMEM);
1538
1539 INIT_LIST_HEAD(list: &a6xx_state->objs);
1540
1541 /* Get the generic state from the adreno core */
1542 adreno_gpu_state_get(gpu, state: &a6xx_state->base);
1543
1544 if (!adreno_has_gmu_wrapper(gpu: adreno_gpu)) {
1545 a6xx_get_gmu_registers(gpu, a6xx_state);
1546
1547 a6xx_state->gmu_log = a6xx_snapshot_gmu_bo(a6xx_state, bo: &a6xx_gpu->gmu.log);
1548 a6xx_state->gmu_hfi = a6xx_snapshot_gmu_bo(a6xx_state, bo: &a6xx_gpu->gmu.hfi);
1549 a6xx_state->gmu_debug = a6xx_snapshot_gmu_bo(a6xx_state, bo: &a6xx_gpu->gmu.debug);
1550
1551 a6xx_snapshot_gmu_hfi_history(gpu, a6xx_state);
1552 }
1553
1554 /* If GX isn't on the rest of the data isn't going to be accessible */
1555 if (!adreno_has_gmu_wrapper(gpu: adreno_gpu) && !a6xx_gmu_gx_is_on(gmu: &a6xx_gpu->gmu))
1556 return &a6xx_state->base;
1557
1558 /* Get the banks of indexed registers */
1559 if (adreno_is_a7xx(gpu: adreno_gpu))
1560 a7xx_get_indexed_registers(gpu, a6xx_state);
1561 else
1562 a6xx_get_indexed_registers(gpu, a6xx_state);
1563
1564 /*
1565 * Try to initialize the crashdumper, if we are not dumping state
1566 * with the SMMU stalled. The crashdumper needs memory access to
1567 * write out GPU state, so we need to skip this when the SMMU is
1568 * stalled in response to an iova fault
1569 */
1570 if (!stalled && !gpu->needs_hw_init &&
1571 !a6xx_crashdumper_init(gpu, dumper: &_dumper)) {
1572 dumper = &_dumper;
1573 }
1574
1575 if (adreno_is_a7xx(gpu: adreno_gpu)) {
1576 a7xx_get_registers(gpu, a6xx_state, dumper);
1577
1578 if (dumper) {
1579 a7xx_get_shaders(gpu, a6xx_state, dumper);
1580 a7xx_get_clusters(gpu, a6xx_state, dumper);
1581 a7xx_get_dbgahb_clusters(gpu, a6xx_state, dumper);
1582
1583 msm_gem_kernel_put(dumper->bo, gpu->aspace);
1584 }
1585
1586 a7xx_get_post_crashdumper_registers(gpu, a6xx_state);
1587 } else {
1588 a6xx_get_registers(gpu, a6xx_state, dumper);
1589
1590 if (dumper) {
1591 a6xx_get_shaders(gpu, a6xx_state, dumper);
1592 a6xx_get_clusters(gpu, a6xx_state, dumper);
1593 a6xx_get_dbgahb_clusters(gpu, a6xx_state, dumper);
1594
1595 msm_gem_kernel_put(dumper->bo, gpu->aspace);
1596 }
1597 }
1598
1599 if (snapshot_debugbus)
1600 a6xx_get_debugbus(gpu, a6xx_state);
1601
1602 a6xx_state->gpu_initialized = !gpu->needs_hw_init;
1603
1604 return &a6xx_state->base;
1605}
1606
1607static void a6xx_gpu_state_destroy(struct kref *kref)
1608{
1609 struct a6xx_state_memobj *obj, *tmp;
1610 struct msm_gpu_state *state = container_of(kref,
1611 struct msm_gpu_state, ref);
1612 struct a6xx_gpu_state *a6xx_state = container_of(state,
1613 struct a6xx_gpu_state, base);
1614
1615 if (a6xx_state->gmu_log)
1616 kvfree(addr: a6xx_state->gmu_log->data);
1617
1618 if (a6xx_state->gmu_hfi)
1619 kvfree(addr: a6xx_state->gmu_hfi->data);
1620
1621 if (a6xx_state->gmu_debug)
1622 kvfree(addr: a6xx_state->gmu_debug->data);
1623
1624 list_for_each_entry_safe(obj, tmp, &a6xx_state->objs, node) {
1625 list_del(entry: &obj->node);
1626 kvfree(addr: obj);
1627 }
1628
1629 adreno_gpu_state_destroy(state);
1630 kfree(objp: a6xx_state);
1631}
1632
1633int a6xx_gpu_state_put(struct msm_gpu_state *state)
1634{
1635 if (IS_ERR_OR_NULL(ptr: state))
1636 return 1;
1637
1638 return kref_put(kref: &state->ref, release: a6xx_gpu_state_destroy);
1639}
1640
1641static void a6xx_show_registers(const u32 *registers, u32 *data, size_t count,
1642 struct drm_printer *p)
1643{
1644 int i, index = 0;
1645
1646 if (!data)
1647 return;
1648
1649 for (i = 0; i < count; i += 2) {
1650 u32 count = RANGE(registers, i);
1651 u32 offset = registers[i];
1652 int j;
1653
1654 for (j = 0; j < count; index++, offset++, j++) {
1655 if (data[index] == 0xdeafbead)
1656 continue;
1657
1658 drm_printf(p, " - { offset: 0x%06x, value: 0x%08x }\n",
1659 offset << 2, data[index]);
1660 }
1661 }
1662}
1663
1664static void a7xx_show_registers_indented(const u32 *registers, u32 *data,
1665 struct drm_printer *p, unsigned indent)
1666{
1667 int i, index = 0;
1668
1669 for (i = 0; registers[i] != UINT_MAX; i += 2) {
1670 u32 count = RANGE(registers, i);
1671 u32 offset = registers[i];
1672 int j;
1673
1674 for (j = 0; j < count; index++, offset++, j++) {
1675 int k;
1676
1677 if (data[index] == 0xdeafbead)
1678 continue;
1679
1680 for (k = 0; k < indent; k++)
1681 drm_printf(p, " ");
1682 drm_printf(p, "- { offset: 0x%06x, value: 0x%08x }\n",
1683 offset << 2, data[index]);
1684 }
1685 }
1686}
1687
1688static void a7xx_show_registers(const u32 *registers, u32 *data, struct drm_printer *p)
1689{
1690 a7xx_show_registers_indented(registers, data, p, indent: 1);
1691}
1692
1693static void print_ascii85(struct drm_printer *p, size_t len, u32 *data)
1694{
1695 char out[ASCII85_BUFSZ];
1696 long i, l, datalen = 0;
1697
1698 for (i = 0; i < len >> 2; i++) {
1699 if (data[i])
1700 datalen = (i + 1) << 2;
1701 }
1702
1703 if (datalen == 0)
1704 return;
1705
1706 drm_puts(p, " data: !!ascii85 |\n");
1707 drm_puts(p, " ");
1708
1709
1710 l = ascii85_encode_len(len: datalen);
1711
1712 for (i = 0; i < l; i++)
1713 drm_puts(p, ascii85_encode(in: data[i], out));
1714
1715 drm_puts(p, "\n");
1716}
1717
1718static void print_name(struct drm_printer *p, const char *fmt, const char *name)
1719{
1720 drm_puts(p, fmt);
1721 drm_puts(p, name);
1722 drm_puts(p, "\n");
1723}
1724
1725static void a6xx_show_shader(struct a6xx_gpu_state_obj *obj,
1726 struct drm_printer *p)
1727{
1728 const struct a6xx_shader_block *block = obj->handle;
1729 int i;
1730
1731 if (!obj->handle)
1732 return;
1733
1734 print_name(p, fmt: " - type: ", name: block->name);
1735
1736 for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
1737 drm_printf(p, " - bank: %d\n", i);
1738 drm_printf(p, " size: %d\n", block->size);
1739
1740 if (!obj->data)
1741 continue;
1742
1743 print_ascii85(p, len: block->size << 2,
1744 data: obj->data + (block->size * i));
1745 }
1746}
1747
1748static void a7xx_show_shader(struct a6xx_gpu_state_obj *obj,
1749 struct drm_printer *p)
1750{
1751 const struct gen7_shader_block *block = obj->handle;
1752 int i, j;
1753 u32 *data = obj->data;
1754
1755 if (!obj->handle)
1756 return;
1757
1758 print_name(p, fmt: " - type: ", name: a7xx_statetype_names[block->statetype]);
1759 print_name(p, fmt: " - pipe: ", name: a7xx_pipe_names[block->pipeid]);
1760
1761 for (i = 0; i < block->num_sps; i++) {
1762 drm_printf(p, " - sp: %d\n", i);
1763
1764 for (j = 0; j < block->num_usptps; j++) {
1765 drm_printf(p, " - usptp: %d\n", j);
1766 drm_printf(p, " size: %d\n", block->size);
1767
1768 if (!obj->data)
1769 continue;
1770
1771 print_ascii85(p, len: block->size << 2, data);
1772
1773 data += block->size;
1774 }
1775 }
1776}
1777
1778static void a6xx_show_cluster_data(const u32 *registers, int size, u32 *data,
1779 struct drm_printer *p)
1780{
1781 int ctx, index = 0;
1782
1783 for (ctx = 0; ctx < A6XX_NUM_CONTEXTS; ctx++) {
1784 int j;
1785
1786 drm_printf(p, " - context: %d\n", ctx);
1787
1788 for (j = 0; j < size; j += 2) {
1789 u32 count = RANGE(registers, j);
1790 u32 offset = registers[j];
1791 int k;
1792
1793 for (k = 0; k < count; index++, offset++, k++) {
1794 if (data[index] == 0xdeafbead)
1795 continue;
1796
1797 drm_printf(p, " - { offset: 0x%06x, value: 0x%08x }\n",
1798 offset << 2, data[index]);
1799 }
1800 }
1801 }
1802}
1803
1804static void a6xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj,
1805 struct drm_printer *p)
1806{
1807 const struct a6xx_dbgahb_cluster *dbgahb = obj->handle;
1808
1809 if (dbgahb) {
1810 print_name(p, fmt: " - cluster-name: ", name: dbgahb->name);
1811 a6xx_show_cluster_data(registers: dbgahb->registers, size: dbgahb->count,
1812 data: obj->data, p);
1813 }
1814}
1815
1816static void a6xx_show_cluster(struct a6xx_gpu_state_obj *obj,
1817 struct drm_printer *p)
1818{
1819 const struct a6xx_cluster *cluster = obj->handle;
1820
1821 if (cluster) {
1822 print_name(p, fmt: " - cluster-name: ", name: cluster->name);
1823 a6xx_show_cluster_data(registers: cluster->registers, size: cluster->count,
1824 data: obj->data, p);
1825 }
1826}
1827
1828static void a7xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj,
1829 struct drm_printer *p)
1830{
1831 const struct gen7_sptp_cluster_registers *dbgahb = obj->handle;
1832
1833 if (dbgahb) {
1834 print_name(p, fmt: " - pipe: ", name: a7xx_pipe_names[dbgahb->pipe_id]);
1835 print_name(p, fmt: " - cluster-name: ", name: a7xx_cluster_names[dbgahb->cluster_id]);
1836 drm_printf(p, " - context: %d\n", dbgahb->context_id);
1837 a7xx_show_registers_indented(registers: dbgahb->regs, data: obj->data, p, indent: 4);
1838 }
1839}
1840
1841static void a7xx_show_cluster(struct a6xx_gpu_state_obj *obj,
1842 struct drm_printer *p)
1843{
1844 const struct gen7_cluster_registers *cluster = obj->handle;
1845
1846 if (cluster) {
1847 int context = (cluster->context_id == STATE_FORCE_CTXT_1) ? 1 : 0;
1848
1849 print_name(p, fmt: " - pipe: ", name: a7xx_pipe_names[cluster->pipe_id]);
1850 print_name(p, fmt: " - cluster-name: ", name: a7xx_cluster_names[cluster->cluster_id]);
1851 drm_printf(p, " - context: %d\n", context);
1852 a7xx_show_registers_indented(registers: cluster->regs, data: obj->data, p, indent: 4);
1853 }
1854}
1855
1856static void a6xx_show_indexed_regs(struct a6xx_gpu_state_obj *obj,
1857 struct drm_printer *p)
1858{
1859 const struct a6xx_indexed_registers *indexed = obj->handle;
1860
1861 if (!indexed)
1862 return;
1863
1864 print_name(p, fmt: " - regs-name: ", name: indexed->name);
1865 drm_printf(p, " dwords: %d\n", indexed->count);
1866
1867 print_ascii85(p, len: indexed->count << 2, data: obj->data);
1868}
1869
1870static void a6xx_show_debugbus_block(const struct a6xx_debugbus_block *block,
1871 u32 *data, struct drm_printer *p)
1872{
1873 if (block) {
1874 print_name(p, fmt: " - debugbus-block: ", name: block->name);
1875
1876 /*
1877 * count for regular debugbus data is in quadwords,
1878 * but print the size in dwords for consistency
1879 */
1880 drm_printf(p, " count: %d\n", block->count << 1);
1881
1882 print_ascii85(p, len: block->count << 3, data);
1883 }
1884}
1885
1886static void a6xx_show_debugbus(struct a6xx_gpu_state *a6xx_state,
1887 struct drm_printer *p)
1888{
1889 int i;
1890
1891 for (i = 0; i < a6xx_state->nr_debugbus; i++) {
1892 struct a6xx_gpu_state_obj *obj = &a6xx_state->debugbus[i];
1893
1894 a6xx_show_debugbus_block(block: obj->handle, data: obj->data, p);
1895 }
1896
1897 if (a6xx_state->vbif_debugbus) {
1898 struct a6xx_gpu_state_obj *obj = a6xx_state->vbif_debugbus;
1899
1900 drm_puts(p, " - debugbus-block: A6XX_DBGBUS_VBIF\n");
1901 drm_printf(p, " count: %d\n", VBIF_DEBUGBUS_BLOCK_SIZE);
1902
1903 /* vbif debugbus data is in dwords. Confusing, huh? */
1904 print_ascii85(p, VBIF_DEBUGBUS_BLOCK_SIZE << 2, data: obj->data);
1905 }
1906
1907 for (i = 0; i < a6xx_state->nr_cx_debugbus; i++) {
1908 struct a6xx_gpu_state_obj *obj = &a6xx_state->cx_debugbus[i];
1909
1910 a6xx_show_debugbus_block(block: obj->handle, data: obj->data, p);
1911 }
1912}
1913
1914void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1915 struct drm_printer *p)
1916{
1917 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1918 struct a6xx_gpu_state *a6xx_state = container_of(state,
1919 struct a6xx_gpu_state, base);
1920 int i;
1921
1922 if (IS_ERR_OR_NULL(ptr: state))
1923 return;
1924
1925 drm_printf(p, "gpu-initialized: %d\n", a6xx_state->gpu_initialized);
1926
1927 adreno_show(gpu, state, p);
1928
1929 drm_puts(p, "gmu-log:\n");
1930 if (a6xx_state->gmu_log) {
1931 struct msm_gpu_state_bo *gmu_log = a6xx_state->gmu_log;
1932
1933 drm_printf(p, " iova: 0x%016llx\n", gmu_log->iova);
1934 drm_printf(p, " size: %zu\n", gmu_log->size);
1935 adreno_show_object(p, ptr: &gmu_log->data, len: gmu_log->size,
1936 encoded: &gmu_log->encoded);
1937 }
1938
1939 drm_puts(p, "gmu-hfi:\n");
1940 if (a6xx_state->gmu_hfi) {
1941 struct msm_gpu_state_bo *gmu_hfi = a6xx_state->gmu_hfi;
1942 unsigned i, j;
1943
1944 drm_printf(p, " iova: 0x%016llx\n", gmu_hfi->iova);
1945 drm_printf(p, " size: %zu\n", gmu_hfi->size);
1946 for (i = 0; i < ARRAY_SIZE(a6xx_state->hfi_queue_history); i++) {
1947 drm_printf(p, " queue-history[%u]:", i);
1948 for (j = 0; j < HFI_HISTORY_SZ; j++) {
1949 drm_printf(p, " %d", a6xx_state->hfi_queue_history[i][j]);
1950 }
1951 drm_printf(p, "\n");
1952 }
1953 adreno_show_object(p, ptr: &gmu_hfi->data, len: gmu_hfi->size,
1954 encoded: &gmu_hfi->encoded);
1955 }
1956
1957 drm_puts(p, "gmu-debug:\n");
1958 if (a6xx_state->gmu_debug) {
1959 struct msm_gpu_state_bo *gmu_debug = a6xx_state->gmu_debug;
1960
1961 drm_printf(p, " iova: 0x%016llx\n", gmu_debug->iova);
1962 drm_printf(p, " size: %zu\n", gmu_debug->size);
1963 adreno_show_object(p, ptr: &gmu_debug->data, len: gmu_debug->size,
1964 encoded: &gmu_debug->encoded);
1965 }
1966
1967 drm_puts(p, "registers:\n");
1968 for (i = 0; i < a6xx_state->nr_registers; i++) {
1969 struct a6xx_gpu_state_obj *obj = &a6xx_state->registers[i];
1970
1971 if (!obj->handle)
1972 continue;
1973
1974 if (adreno_is_a7xx(gpu: adreno_gpu)) {
1975 a7xx_show_registers(registers: obj->handle, data: obj->data, p);
1976 } else {
1977 const struct a6xx_registers *regs = obj->handle;
1978
1979 a6xx_show_registers(registers: regs->registers, data: obj->data, count: regs->count, p);
1980 }
1981 }
1982
1983 drm_puts(p, "registers-gmu:\n");
1984 for (i = 0; i < a6xx_state->nr_gmu_registers; i++) {
1985 struct a6xx_gpu_state_obj *obj = &a6xx_state->gmu_registers[i];
1986 const struct a6xx_registers *regs = obj->handle;
1987
1988 if (!obj->handle)
1989 continue;
1990
1991 a6xx_show_registers(registers: regs->registers, data: obj->data, count: regs->count, p);
1992 }
1993
1994 drm_puts(p, "indexed-registers:\n");
1995 for (i = 0; i < a6xx_state->nr_indexed_regs; i++)
1996 a6xx_show_indexed_regs(obj: &a6xx_state->indexed_regs[i], p);
1997
1998 drm_puts(p, "shader-blocks:\n");
1999 for (i = 0; i < a6xx_state->nr_shaders; i++) {
2000 if (adreno_is_a7xx(gpu: adreno_gpu))
2001 a7xx_show_shader(obj: &a6xx_state->shaders[i], p);
2002 else
2003 a6xx_show_shader(obj: &a6xx_state->shaders[i], p);
2004 }
2005
2006 drm_puts(p, "clusters:\n");
2007 for (i = 0; i < a6xx_state->nr_clusters; i++) {
2008 if (adreno_is_a7xx(gpu: adreno_gpu))
2009 a7xx_show_cluster(obj: &a6xx_state->clusters[i], p);
2010 else
2011 a6xx_show_cluster(obj: &a6xx_state->clusters[i], p);
2012 }
2013
2014 for (i = 0; i < a6xx_state->nr_dbgahb_clusters; i++) {
2015 if (adreno_is_a7xx(gpu: adreno_gpu))
2016 a7xx_show_dbgahb_cluster(obj: &a6xx_state->dbgahb_clusters[i], p);
2017 else
2018 a6xx_show_dbgahb_cluster(obj: &a6xx_state->dbgahb_clusters[i], p);
2019 }
2020
2021 drm_puts(p, "debugbus:\n");
2022 a6xx_show_debugbus(a6xx_state, p);
2023}
2024

source code of linux/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c