Linux-libre 5.3.12-gnu
[librecmc/linux-libre.git] / drivers / gpu / drm / msm / adreno / a6xx_gpu_state.c
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2018 The Linux Foundation. All rights reserved. */
3
4 #include <linux/ascii85.h>
5 #include "msm_gem.h"
6 #include "a6xx_gpu.h"
7 #include "a6xx_gmu.h"
8 #include "a6xx_gpu_state.h"
9 #include "a6xx_gmu.xml.h"
10
11 struct a6xx_gpu_state_obj {
12         const void *handle;
13         u32 *data;
14 };
15
16 struct a6xx_gpu_state {
17         struct msm_gpu_state base;
18
19         struct a6xx_gpu_state_obj *gmu_registers;
20         int nr_gmu_registers;
21
22         struct a6xx_gpu_state_obj *registers;
23         int nr_registers;
24
25         struct a6xx_gpu_state_obj *shaders;
26         int nr_shaders;
27
28         struct a6xx_gpu_state_obj *clusters;
29         int nr_clusters;
30
31         struct a6xx_gpu_state_obj *dbgahb_clusters;
32         int nr_dbgahb_clusters;
33
34         struct a6xx_gpu_state_obj *indexed_regs;
35         int nr_indexed_regs;
36
37         struct a6xx_gpu_state_obj *debugbus;
38         int nr_debugbus;
39
40         struct a6xx_gpu_state_obj *vbif_debugbus;
41
42         struct a6xx_gpu_state_obj *cx_debugbus;
43         int nr_cx_debugbus;
44
45         struct list_head objs;
46 };
47
48 static inline int CRASHDUMP_WRITE(u64 *in, u32 reg, u32 val)
49 {
50         in[0] = val;
51         in[1] = (((u64) reg) << 44 | (1 << 21) | 1);
52
53         return 2;
54 }
55
56 static inline int CRASHDUMP_READ(u64 *in, u32 reg, u32 dwords, u64 target)
57 {
58         in[0] = target;
59         in[1] = (((u64) reg) << 44 | dwords);
60
61         return 2;
62 }
63
64 static inline int CRASHDUMP_FINI(u64 *in)
65 {
66         in[0] = 0;
67         in[1] = 0;
68
69         return 2;
70 }
71
72 struct a6xx_crashdumper {
73         void *ptr;
74         struct drm_gem_object *bo;
75         u64 iova;
76 };
77
78 struct a6xx_state_memobj {
79         struct list_head node;
80         unsigned long long data[];
81 };
82
83 void *state_kcalloc(struct a6xx_gpu_state *a6xx_state, int nr, size_t objsize)
84 {
85         struct a6xx_state_memobj *obj =
86                 kzalloc((nr * objsize) + sizeof(*obj), GFP_KERNEL);
87
88         if (!obj)
89                 return NULL;
90
91         list_add_tail(&obj->node, &a6xx_state->objs);
92         return &obj->data;
93 }
94
95 void *state_kmemdup(struct a6xx_gpu_state *a6xx_state, void *src,
96                 size_t size)
97 {
98         void *dst = state_kcalloc(a6xx_state, 1, size);
99
100         if (dst)
101                 memcpy(dst, src, size);
102         return dst;
103 }
104
105 /*
106  * Allocate 1MB for the crashdumper scratch region - 8k for the script and
107  * the rest for the data
108  */
109 #define A6XX_CD_DATA_OFFSET 8192
110 #define A6XX_CD_DATA_SIZE  (SZ_1M - 8192)
111
112 static int a6xx_crashdumper_init(struct msm_gpu *gpu,
113                 struct a6xx_crashdumper *dumper)
114 {
115         dumper->ptr = msm_gem_kernel_new_locked(gpu->dev,
116                 SZ_1M, MSM_BO_UNCACHED, gpu->aspace,
117                 &dumper->bo, &dumper->iova);
118
119         if (!IS_ERR(dumper->ptr))
120                 msm_gem_object_set_name(dumper->bo, "crashdump");
121
122         return PTR_ERR_OR_ZERO(dumper->ptr);
123 }
124
125 static int a6xx_crashdumper_run(struct msm_gpu *gpu,
126                 struct a6xx_crashdumper *dumper)
127 {
128         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
129         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
130         u32 val;
131         int ret;
132
133         if (IS_ERR_OR_NULL(dumper->ptr))
134                 return -EINVAL;
135
136         if (!a6xx_gmu_sptprac_is_on(&a6xx_gpu->gmu))
137                 return -EINVAL;
138
139         /* Make sure all pending memory writes are posted */
140         wmb();
141
142         gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE_LO,
143                 REG_A6XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
144
145         gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 1);
146
147         ret = gpu_poll_timeout(gpu, REG_A6XX_CP_CRASH_DUMP_STATUS, val,
148                 val & 0x02, 100, 10000);
149
150         gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 0);
151
152         return ret;
153 }
154
155 /* read a value from the GX debug bus */
156 static int debugbus_read(struct msm_gpu *gpu, u32 block, u32 offset,
157                 u32 *data)
158 {
159         u32 reg = A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) |
160                 A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block);
161
162         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_A, reg);
163         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_B, reg);
164         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_C, reg);
165         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_D, reg);
166
167         /* Wait 1 us to make sure the data is flowing */
168         udelay(1);
169
170         data[0] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2);
171         data[1] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1);
172
173         return 2;
174 }
175
176 #define cxdbg_write(ptr, offset, val) \
177         msm_writel((val), (ptr) + ((offset) << 2))
178
179 #define cxdbg_read(ptr, offset) \
180         msm_readl((ptr) + ((offset) << 2))
181
182 /* read a value from the CX debug bus */
183 static int cx_debugbus_read(void *__iomem cxdbg, u32 block, u32 offset,
184                 u32 *data)
185 {
186         u32 reg = A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) |
187                 A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block);
188
189         cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A, reg);
190         cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B, reg);
191         cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C, reg);
192         cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D, reg);
193
194         /* Wait 1 us to make sure the data is flowing */
195         udelay(1);
196
197         data[0] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2);
198         data[1] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1);
199
200         return 2;
201 }
202
203 /* Read a chunk of data from the VBIF debug bus */
204 static int vbif_debugbus_read(struct msm_gpu *gpu, u32 ctrl0, u32 ctrl1,
205                 u32 reg, int count, u32 *data)
206 {
207         int i;
208
209         gpu_write(gpu, ctrl0, reg);
210
211         for (i = 0; i < count; i++) {
212                 gpu_write(gpu, ctrl1, i);
213                 data[i] = gpu_read(gpu, REG_A6XX_VBIF_TEST_BUS_OUT);
214         }
215
216         return count;
217 }
218
219 #define AXI_ARB_BLOCKS 2
220 #define XIN_AXI_BLOCKS 5
221 #define XIN_CORE_BLOCKS 4
222
223 #define VBIF_DEBUGBUS_BLOCK_SIZE \
224         ((16 * AXI_ARB_BLOCKS) + \
225          (18 * XIN_AXI_BLOCKS) + \
226          (12 * XIN_CORE_BLOCKS))
227
228 static void a6xx_get_vbif_debugbus_block(struct msm_gpu *gpu,
229                 struct a6xx_gpu_state *a6xx_state,
230                 struct a6xx_gpu_state_obj *obj)
231 {
232         u32 clk, *ptr;
233         int i;
234
235         obj->data = state_kcalloc(a6xx_state, VBIF_DEBUGBUS_BLOCK_SIZE,
236                 sizeof(u32));
237         if (!obj->data)
238                 return;
239
240         obj->handle = NULL;
241
242         /* Get the current clock setting */
243         clk = gpu_read(gpu, REG_A6XX_VBIF_CLKON);
244
245         /* Force on the bus so we can read it */
246         gpu_write(gpu, REG_A6XX_VBIF_CLKON,
247                 clk | A6XX_VBIF_CLKON_FORCE_ON_TESTBUS);
248
249         /* We will read from BUS2 first, so disable BUS1 */
250         gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS1_CTRL0, 0);
251
252         /* Enable the VBIF bus for reading */
253         gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS_OUT_CTRL, 1);
254
255         ptr = obj->data;
256
257         for (i = 0; i < AXI_ARB_BLOCKS; i++)
258                 ptr += vbif_debugbus_read(gpu,
259                         REG_A6XX_VBIF_TEST_BUS2_CTRL0,
260                         REG_A6XX_VBIF_TEST_BUS2_CTRL1,
261                         1 << (i + 16), 16, ptr);
262
263         for (i = 0; i < XIN_AXI_BLOCKS; i++)
264                 ptr += vbif_debugbus_read(gpu,
265                         REG_A6XX_VBIF_TEST_BUS2_CTRL0,
266                         REG_A6XX_VBIF_TEST_BUS2_CTRL1,
267                         1 << i, 18, ptr);
268
269         /* Stop BUS2 so we can turn on BUS1 */
270         gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS2_CTRL0, 0);
271
272         for (i = 0; i < XIN_CORE_BLOCKS; i++)
273                 ptr += vbif_debugbus_read(gpu,
274                         REG_A6XX_VBIF_TEST_BUS1_CTRL0,
275                         REG_A6XX_VBIF_TEST_BUS1_CTRL1,
276                         1 << i, 12, ptr);
277
278         /* Restore the VBIF clock setting */
279         gpu_write(gpu, REG_A6XX_VBIF_CLKON, clk);
280 }
281
282 static void a6xx_get_debugbus_block(struct msm_gpu *gpu,
283                 struct a6xx_gpu_state *a6xx_state,
284                 const struct a6xx_debugbus_block *block,
285                 struct a6xx_gpu_state_obj *obj)
286 {
287         int i;
288         u32 *ptr;
289
290         obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
291         if (!obj->data)
292                 return;
293
294         obj->handle = block;
295
296         for (ptr = obj->data, i = 0; i < block->count; i++)
297                 ptr += debugbus_read(gpu, block->id, i, ptr);
298 }
299
300 static void a6xx_get_cx_debugbus_block(void __iomem *cxdbg,
301                 struct a6xx_gpu_state *a6xx_state,
302                 const struct a6xx_debugbus_block *block,
303                 struct a6xx_gpu_state_obj *obj)
304 {
305         int i;
306         u32 *ptr;
307
308         obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
309         if (!obj->data)
310                 return;
311
312         obj->handle = block;
313
314         for (ptr = obj->data, i = 0; i < block->count; i++)
315                 ptr += cx_debugbus_read(cxdbg, block->id, i, ptr);
316 }
317
318 static void a6xx_get_debugbus(struct msm_gpu *gpu,
319                 struct a6xx_gpu_state *a6xx_state)
320 {
321         struct resource *res;
322         void __iomem *cxdbg = NULL;
323
324         /* Set up the GX debug bus */
325
326         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT,
327                 A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
328
329         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM,
330                 A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
331
332         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0);
333         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0);
334         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0);
335         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0);
336
337         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0, 0x76543210);
338         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1, 0xFEDCBA98);
339
340         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0);
341         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0);
342         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0);
343         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0);
344
345         /* Set up the CX debug bus - it lives elsewhere in the system so do a
346          * temporary ioremap for the registers
347          */
348         res = platform_get_resource_byname(gpu->pdev, IORESOURCE_MEM,
349                         "cx_dbgc");
350
351         if (res)
352                 cxdbg = ioremap(res->start, resource_size(res));
353
354         if (cxdbg) {
355                 cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT,
356                         A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
357
358                 cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM,
359                         A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
360
361                 cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0);
362                 cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0);
363                 cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0);
364                 cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0);
365
366                 cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0,
367                         0x76543210);
368                 cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1,
369                         0xFEDCBA98);
370
371                 cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0);
372                 cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0);
373                 cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0);
374                 cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0);
375         }
376
377         a6xx_state->debugbus = state_kcalloc(a6xx_state,
378                 ARRAY_SIZE(a6xx_debugbus_blocks),
379                 sizeof(*a6xx_state->debugbus));
380
381         if (a6xx_state->debugbus) {
382                 int i;
383
384                 for (i = 0; i < ARRAY_SIZE(a6xx_debugbus_blocks); i++)
385                         a6xx_get_debugbus_block(gpu,
386                                 a6xx_state,
387                                 &a6xx_debugbus_blocks[i],
388                                 &a6xx_state->debugbus[i]);
389
390                 a6xx_state->nr_debugbus = ARRAY_SIZE(a6xx_debugbus_blocks);
391         }
392
393         a6xx_state->vbif_debugbus =
394                 state_kcalloc(a6xx_state, 1,
395                         sizeof(*a6xx_state->vbif_debugbus));
396
397         if (a6xx_state->vbif_debugbus)
398                 a6xx_get_vbif_debugbus_block(gpu, a6xx_state,
399                         a6xx_state->vbif_debugbus);
400
401         if (cxdbg) {
402                 a6xx_state->cx_debugbus =
403                         state_kcalloc(a6xx_state,
404                         ARRAY_SIZE(a6xx_cx_debugbus_blocks),
405                         sizeof(*a6xx_state->cx_debugbus));
406
407                 if (a6xx_state->cx_debugbus) {
408                         int i;
409
410                         for (i = 0; i < ARRAY_SIZE(a6xx_cx_debugbus_blocks); i++)
411                                 a6xx_get_cx_debugbus_block(cxdbg,
412                                         a6xx_state,
413                                         &a6xx_cx_debugbus_blocks[i],
414                                         &a6xx_state->cx_debugbus[i]);
415
416                         a6xx_state->nr_cx_debugbus =
417                                 ARRAY_SIZE(a6xx_cx_debugbus_blocks);
418                 }
419
420                 iounmap(cxdbg);
421         }
422 }
423
424 #define RANGE(reg, a) ((reg)[(a) + 1] - (reg)[(a)] + 1)
425
426 /* Read a data cluster from behind the AHB aperture */
427 static void a6xx_get_dbgahb_cluster(struct msm_gpu *gpu,
428                 struct a6xx_gpu_state *a6xx_state,
429                 const struct a6xx_dbgahb_cluster *dbgahb,
430                 struct a6xx_gpu_state_obj *obj,
431                 struct a6xx_crashdumper *dumper)
432 {
433         u64 *in = dumper->ptr;
434         u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
435         size_t datasize;
436         int i, regcount = 0;
437
438         for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
439                 int j;
440
441                 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
442                         (dbgahb->statetype + i * 2) << 8);
443
444                 for (j = 0; j < dbgahb->count; j += 2) {
445                         int count = RANGE(dbgahb->registers, j);
446                         u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
447                                 dbgahb->registers[j] - (dbgahb->base >> 2);
448
449                         in += CRASHDUMP_READ(in, offset, count, out);
450
451                         out += count * sizeof(u32);
452
453                         if (i == 0)
454                                 regcount += count;
455                 }
456         }
457
458         CRASHDUMP_FINI(in);
459
460         datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
461
462         if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
463                 return;
464
465         if (a6xx_crashdumper_run(gpu, dumper))
466                 return;
467
468         obj->handle = dbgahb;
469         obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
470                 datasize);
471 }
472
473 static void a6xx_get_dbgahb_clusters(struct msm_gpu *gpu,
474                 struct a6xx_gpu_state *a6xx_state,
475                 struct a6xx_crashdumper *dumper)
476 {
477         int i;
478
479         a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state,
480                 ARRAY_SIZE(a6xx_dbgahb_clusters),
481                 sizeof(*a6xx_state->dbgahb_clusters));
482
483         if (!a6xx_state->dbgahb_clusters)
484                 return;
485
486         a6xx_state->nr_dbgahb_clusters = ARRAY_SIZE(a6xx_dbgahb_clusters);
487
488         for (i = 0; i < ARRAY_SIZE(a6xx_dbgahb_clusters); i++)
489                 a6xx_get_dbgahb_cluster(gpu, a6xx_state,
490                         &a6xx_dbgahb_clusters[i],
491                         &a6xx_state->dbgahb_clusters[i], dumper);
492 }
493
494 /* Read a data cluster from the CP aperture with the crashdumper */
495 static void a6xx_get_cluster(struct msm_gpu *gpu,
496                 struct a6xx_gpu_state *a6xx_state,
497                 const struct a6xx_cluster *cluster,
498                 struct a6xx_gpu_state_obj *obj,
499                 struct a6xx_crashdumper *dumper)
500 {
501         u64 *in = dumper->ptr;
502         u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
503         size_t datasize;
504         int i, regcount = 0;
505
506         /* Some clusters need a selector register to be programmed too */
507         if (cluster->sel_reg)
508                 in += CRASHDUMP_WRITE(in, cluster->sel_reg, cluster->sel_val);
509
510         for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
511                 int j;
512
513                 in += CRASHDUMP_WRITE(in, REG_A6XX_CP_APERTURE_CNTL_CD,
514                         (cluster->id << 8) | (i << 4) | i);
515
516                 for (j = 0; j < cluster->count; j += 2) {
517                         int count = RANGE(cluster->registers, j);
518
519                         in += CRASHDUMP_READ(in, cluster->registers[j],
520                                 count, out);
521
522                         out += count * sizeof(u32);
523
524                         if (i == 0)
525                                 regcount += count;
526                 }
527         }
528
529         CRASHDUMP_FINI(in);
530
531         datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
532
533         if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
534                 return;
535
536         if (a6xx_crashdumper_run(gpu, dumper))
537                 return;
538
539         obj->handle = cluster;
540         obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
541                 datasize);
542 }
543
544 static void a6xx_get_clusters(struct msm_gpu *gpu,
545                 struct a6xx_gpu_state *a6xx_state,
546                 struct a6xx_crashdumper *dumper)
547 {
548         int i;
549
550         a6xx_state->clusters = state_kcalloc(a6xx_state,
551                 ARRAY_SIZE(a6xx_clusters), sizeof(*a6xx_state->clusters));
552
553         if (!a6xx_state->clusters)
554                 return;
555
556         a6xx_state->nr_clusters = ARRAY_SIZE(a6xx_clusters);
557
558         for (i = 0; i < ARRAY_SIZE(a6xx_clusters); i++)
559                 a6xx_get_cluster(gpu, a6xx_state, &a6xx_clusters[i],
560                         &a6xx_state->clusters[i], dumper);
561 }
562
563 /* Read a shader / debug block from the HLSQ aperture with the crashdumper */
564 static void a6xx_get_shader_block(struct msm_gpu *gpu,
565                 struct a6xx_gpu_state *a6xx_state,
566                 const struct a6xx_shader_block *block,
567                 struct a6xx_gpu_state_obj *obj,
568                 struct a6xx_crashdumper *dumper)
569 {
570         u64 *in = dumper->ptr;
571         size_t datasize = block->size * A6XX_NUM_SHADER_BANKS * sizeof(u32);
572         int i;
573
574         if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
575                 return;
576
577         for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
578                 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
579                         (block->type << 8) | i);
580
581                 in += CRASHDUMP_READ(in, REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE,
582                         block->size, dumper->iova + A6XX_CD_DATA_OFFSET);
583         }
584
585         CRASHDUMP_FINI(in);
586
587         if (a6xx_crashdumper_run(gpu, dumper))
588                 return;
589
590         obj->handle = block;
591         obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
592                 datasize);
593 }
594
595 static void a6xx_get_shaders(struct msm_gpu *gpu,
596                 struct a6xx_gpu_state *a6xx_state,
597                 struct a6xx_crashdumper *dumper)
598 {
599         int i;
600
601         a6xx_state->shaders = state_kcalloc(a6xx_state,
602                 ARRAY_SIZE(a6xx_shader_blocks), sizeof(*a6xx_state->shaders));
603
604         if (!a6xx_state->shaders)
605                 return;
606
607         a6xx_state->nr_shaders = ARRAY_SIZE(a6xx_shader_blocks);
608
609         for (i = 0; i < ARRAY_SIZE(a6xx_shader_blocks); i++)
610                 a6xx_get_shader_block(gpu, a6xx_state, &a6xx_shader_blocks[i],
611                         &a6xx_state->shaders[i], dumper);
612 }
613
614 /* Read registers from behind the HLSQ aperture with the crashdumper */
615 static void a6xx_get_crashdumper_hlsq_registers(struct msm_gpu *gpu,
616                 struct a6xx_gpu_state *a6xx_state,
617                 const struct a6xx_registers *regs,
618                 struct a6xx_gpu_state_obj *obj,
619                 struct a6xx_crashdumper *dumper)
620
621 {
622         u64 *in = dumper->ptr;
623         u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
624         int i, regcount = 0;
625
626         in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, regs->val1);
627
628         for (i = 0; i < regs->count; i += 2) {
629                 u32 count = RANGE(regs->registers, i);
630                 u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
631                         regs->registers[i] - (regs->val0 >> 2);
632
633                 in += CRASHDUMP_READ(in, offset, count, out);
634
635                 out += count * sizeof(u32);
636                 regcount += count;
637         }
638
639         CRASHDUMP_FINI(in);
640
641         if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
642                 return;
643
644         if (a6xx_crashdumper_run(gpu, dumper))
645                 return;
646
647         obj->handle = regs;
648         obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
649                 regcount * sizeof(u32));
650 }
651
652 /* Read a block of registers using the crashdumper */
653 static void a6xx_get_crashdumper_registers(struct msm_gpu *gpu,
654                 struct a6xx_gpu_state *a6xx_state,
655                 const struct a6xx_registers *regs,
656                 struct a6xx_gpu_state_obj *obj,
657                 struct a6xx_crashdumper *dumper)
658
659 {
660         u64 *in = dumper->ptr;
661         u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
662         int i, regcount = 0;
663
664         /* Some blocks might need to program a selector register first */
665         if (regs->val0)
666                 in += CRASHDUMP_WRITE(in, regs->val0, regs->val1);
667
668         for (i = 0; i < regs->count; i += 2) {
669                 u32 count = RANGE(regs->registers, i);
670
671                 in += CRASHDUMP_READ(in, regs->registers[i], count, out);
672
673                 out += count * sizeof(u32);
674                 regcount += count;
675         }
676
677         CRASHDUMP_FINI(in);
678
679         if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
680                 return;
681
682         if (a6xx_crashdumper_run(gpu, dumper))
683                 return;
684
685         obj->handle = regs;
686         obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
687                 regcount * sizeof(u32));
688 }
689
690 /* Read a block of registers via AHB */
691 static void a6xx_get_ahb_gpu_registers(struct msm_gpu *gpu,
692                 struct a6xx_gpu_state *a6xx_state,
693                 const struct a6xx_registers *regs,
694                 struct a6xx_gpu_state_obj *obj)
695 {
696         int i, regcount = 0, index = 0;
697
698         for (i = 0; i < regs->count; i += 2)
699                 regcount += RANGE(regs->registers, i);
700
701         obj->handle = (const void *) regs;
702         obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
703         if (!obj->data)
704                 return;
705
706         for (i = 0; i < regs->count; i += 2) {
707                 u32 count = RANGE(regs->registers, i);
708                 int j;
709
710                 for (j = 0; j < count; j++)
711                         obj->data[index++] = gpu_read(gpu,
712                                 regs->registers[i] + j);
713         }
714 }
715
716 /* Read a block of GMU registers */
717 static void _a6xx_get_gmu_registers(struct msm_gpu *gpu,
718                 struct a6xx_gpu_state *a6xx_state,
719                 const struct a6xx_registers *regs,
720                 struct a6xx_gpu_state_obj *obj)
721 {
722         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
723         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
724         struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
725         int i, regcount = 0, index = 0;
726
727         for (i = 0; i < regs->count; i += 2)
728                 regcount += RANGE(regs->registers, i);
729
730         obj->handle = (const void *) regs;
731         obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
732         if (!obj->data)
733                 return;
734
735         for (i = 0; i < regs->count; i += 2) {
736                 u32 count = RANGE(regs->registers, i);
737                 int j;
738
739                 for (j = 0; j < count; j++)
740                         obj->data[index++] = gmu_read(gmu,
741                                 regs->registers[i] + j);
742         }
743 }
744
745 static void a6xx_get_gmu_registers(struct msm_gpu *gpu,
746                 struct a6xx_gpu_state *a6xx_state)
747 {
748         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
749         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
750
751         a6xx_state->gmu_registers = state_kcalloc(a6xx_state,
752                 2, sizeof(*a6xx_state->gmu_registers));
753
754         if (!a6xx_state->gmu_registers)
755                 return;
756
757         a6xx_state->nr_gmu_registers = 2;
758
759         /* Get the CX GMU registers from AHB */
760         _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[0],
761                 &a6xx_state->gmu_registers[0]);
762
763         if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
764                 return;
765
766         /* Set the fence to ALLOW mode so we can access the registers */
767         gpu_write(gpu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, 0);
768
769         _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[1],
770                 &a6xx_state->gmu_registers[1]);
771 }
772
773 static void a6xx_get_registers(struct msm_gpu *gpu,
774                 struct a6xx_gpu_state *a6xx_state,
775                 struct a6xx_crashdumper *dumper)
776 {
777         int i, count = ARRAY_SIZE(a6xx_ahb_reglist) +
778                 ARRAY_SIZE(a6xx_reglist) +
779                 ARRAY_SIZE(a6xx_hlsq_reglist);
780         int index = 0;
781
782         a6xx_state->registers = state_kcalloc(a6xx_state,
783                 count, sizeof(*a6xx_state->registers));
784
785         if (!a6xx_state->registers)
786                 return;
787
788         a6xx_state->nr_registers = count;
789
790         for (i = 0; i < ARRAY_SIZE(a6xx_ahb_reglist); i++)
791                 a6xx_get_ahb_gpu_registers(gpu,
792                         a6xx_state, &a6xx_ahb_reglist[i],
793                         &a6xx_state->registers[index++]);
794
795         for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
796                 a6xx_get_crashdumper_registers(gpu,
797                         a6xx_state, &a6xx_reglist[i],
798                         &a6xx_state->registers[index++],
799                         dumper);
800
801         for (i = 0; i < ARRAY_SIZE(a6xx_hlsq_reglist); i++)
802                 a6xx_get_crashdumper_hlsq_registers(gpu,
803                         a6xx_state, &a6xx_hlsq_reglist[i],
804                         &a6xx_state->registers[index++],
805                         dumper);
806 }
807
808 /* Read a block of data from an indexed register pair */
809 static void a6xx_get_indexed_regs(struct msm_gpu *gpu,
810                 struct a6xx_gpu_state *a6xx_state,
811                 const struct a6xx_indexed_registers *indexed,
812                 struct a6xx_gpu_state_obj *obj)
813 {
814         int i;
815
816         obj->handle = (const void *) indexed;
817         obj->data = state_kcalloc(a6xx_state, indexed->count, sizeof(u32));
818         if (!obj->data)
819                 return;
820
821         /* All the indexed banks start at address 0 */
822         gpu_write(gpu, indexed->addr, 0);
823
824         /* Read the data - each read increments the internal address by 1 */
825         for (i = 0; i < indexed->count; i++)
826                 obj->data[i] = gpu_read(gpu, indexed->data);
827 }
828
829 static void a6xx_get_indexed_registers(struct msm_gpu *gpu,
830                 struct a6xx_gpu_state *a6xx_state)
831 {
832         u32 mempool_size;
833         int count = ARRAY_SIZE(a6xx_indexed_reglist) + 1;
834         int i;
835
836         a6xx_state->indexed_regs = state_kcalloc(a6xx_state, count,
837                 sizeof(a6xx_state->indexed_regs));
838         if (!a6xx_state->indexed_regs)
839                 return;
840
841         for (i = 0; i < ARRAY_SIZE(a6xx_indexed_reglist); i++)
842                 a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_indexed_reglist[i],
843                         &a6xx_state->indexed_regs[i]);
844
845         /* Set the CP mempool size to 0 to stabilize it while dumping */
846         mempool_size = gpu_read(gpu, REG_A6XX_CP_MEM_POOL_SIZE);
847         gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 0);
848
849         /* Get the contents of the CP mempool */
850         a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed,
851                 &a6xx_state->indexed_regs[i]);
852
853         /*
854          * Offset 0x2000 in the mempool is the size - copy the saved size over
855          * so the data is consistent
856          */
857         a6xx_state->indexed_regs[i].data[0x2000] = mempool_size;
858
859         /* Restore the size in the hardware */
860         gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, mempool_size);
861
862         a6xx_state->nr_indexed_regs = count;
863 }
864
865 struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu)
866 {
867         struct a6xx_crashdumper dumper = { 0 };
868         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
869         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
870         struct a6xx_gpu_state *a6xx_state = kzalloc(sizeof(*a6xx_state),
871                 GFP_KERNEL);
872
873         if (!a6xx_state)
874                 return ERR_PTR(-ENOMEM);
875
876         INIT_LIST_HEAD(&a6xx_state->objs);
877
878         /* Get the generic state from the adreno core */
879         adreno_gpu_state_get(gpu, &a6xx_state->base);
880
881         a6xx_get_gmu_registers(gpu, a6xx_state);
882
883         /* If GX isn't on the rest of the data isn't going to be accessible */
884         if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
885                 return &a6xx_state->base;
886
887         /* Get the banks of indexed registers */
888         a6xx_get_indexed_registers(gpu, a6xx_state);
889
890         /* Try to initialize the crashdumper */
891         if (!a6xx_crashdumper_init(gpu, &dumper)) {
892                 a6xx_get_registers(gpu, a6xx_state, &dumper);
893                 a6xx_get_shaders(gpu, a6xx_state, &dumper);
894                 a6xx_get_clusters(gpu, a6xx_state, &dumper);
895                 a6xx_get_dbgahb_clusters(gpu, a6xx_state, &dumper);
896
897                 msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
898         }
899
900         a6xx_get_debugbus(gpu, a6xx_state);
901
902         return  &a6xx_state->base;
903 }
904
905 void a6xx_gpu_state_destroy(struct kref *kref)
906 {
907         struct a6xx_state_memobj *obj, *tmp;
908         struct msm_gpu_state *state = container_of(kref,
909                         struct msm_gpu_state, ref);
910         struct a6xx_gpu_state *a6xx_state = container_of(state,
911                         struct a6xx_gpu_state, base);
912
913         list_for_each_entry_safe(obj, tmp, &a6xx_state->objs, node)
914                 kfree(obj);
915
916         adreno_gpu_state_destroy(state);
917         kfree(a6xx_state);
918 }
919
920 int a6xx_gpu_state_put(struct msm_gpu_state *state)
921 {
922         if (IS_ERR_OR_NULL(state))
923                 return 1;
924
925         return kref_put(&state->ref, a6xx_gpu_state_destroy);
926 }
927
928 static void a6xx_show_registers(const u32 *registers, u32 *data, size_t count,
929                 struct drm_printer *p)
930 {
931         int i, index = 0;
932
933         if (!data)
934                 return;
935
936         for (i = 0; i < count; i += 2) {
937                 u32 count = RANGE(registers, i);
938                 u32 offset = registers[i];
939                 int j;
940
941                 for (j = 0; j < count; index++, offset++, j++) {
942                         if (data[index] == 0xdeafbead)
943                                 continue;
944
945                         drm_printf(p, "  - { offset: 0x%06x, value: 0x%08x }\n",
946                                 offset << 2, data[index]);
947                 }
948         }
949 }
950
951 static void print_ascii85(struct drm_printer *p, size_t len, u32 *data)
952 {
953         char out[ASCII85_BUFSZ];
954         long i, l, datalen = 0;
955
956         for (i = 0; i < len >> 2; i++) {
957                 if (data[i])
958                         datalen = (i + 1) << 2;
959         }
960
961         if (datalen == 0)
962                 return;
963
964         drm_puts(p, "    data: !!ascii85 |\n");
965         drm_puts(p, "      ");
966
967
968         l = ascii85_encode_len(datalen);
969
970         for (i = 0; i < l; i++)
971                 drm_puts(p, ascii85_encode(data[i], out));
972
973         drm_puts(p, "\n");
974 }
975
976 static void print_name(struct drm_printer *p, const char *fmt, const char *name)
977 {
978         drm_puts(p, fmt);
979         drm_puts(p, name);
980         drm_puts(p, "\n");
981 }
982
983 static void a6xx_show_shader(struct a6xx_gpu_state_obj *obj,
984                 struct drm_printer *p)
985 {
986         const struct a6xx_shader_block *block = obj->handle;
987         int i;
988
989         if (!obj->handle)
990                 return;
991
992         print_name(p, "  - type: ", block->name);
993
994         for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
995                 drm_printf(p, "    - bank: %d\n", i);
996                 drm_printf(p, "      size: %d\n", block->size);
997
998                 if (!obj->data)
999                         continue;
1000
1001                 print_ascii85(p, block->size << 2,
1002                         obj->data + (block->size * i));
1003         }
1004 }
1005
1006 static void a6xx_show_cluster_data(const u32 *registers, int size, u32 *data,
1007                 struct drm_printer *p)
1008 {
1009         int ctx, index = 0;
1010
1011         for (ctx = 0; ctx < A6XX_NUM_CONTEXTS; ctx++) {
1012                 int j;
1013
1014                 drm_printf(p, "    - context: %d\n", ctx);
1015
1016                 for (j = 0; j < size; j += 2) {
1017                         u32 count = RANGE(registers, j);
1018                         u32 offset = registers[j];
1019                         int k;
1020
1021                         for (k = 0; k < count; index++, offset++, k++) {
1022                                 if (data[index] == 0xdeafbead)
1023                                         continue;
1024
1025                                 drm_printf(p, "      - { offset: 0x%06x, value: 0x%08x }\n",
1026                                         offset << 2, data[index]);
1027                         }
1028                 }
1029         }
1030 }
1031
1032 static void a6xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj,
1033                 struct drm_printer *p)
1034 {
1035         const struct a6xx_dbgahb_cluster *dbgahb = obj->handle;
1036
1037         if (dbgahb) {
1038                 print_name(p, "  - cluster-name: ", dbgahb->name);
1039                 a6xx_show_cluster_data(dbgahb->registers, dbgahb->count,
1040                         obj->data, p);
1041         }
1042 }
1043
1044 static void a6xx_show_cluster(struct a6xx_gpu_state_obj *obj,
1045                 struct drm_printer *p)
1046 {
1047         const struct a6xx_cluster *cluster = obj->handle;
1048
1049         if (cluster) {
1050                 print_name(p, "  - cluster-name: ", cluster->name);
1051                 a6xx_show_cluster_data(cluster->registers, cluster->count,
1052                         obj->data, p);
1053         }
1054 }
1055
1056 static void a6xx_show_indexed_regs(struct a6xx_gpu_state_obj *obj,
1057                 struct drm_printer *p)
1058 {
1059         const struct a6xx_indexed_registers *indexed = obj->handle;
1060
1061         if (!indexed)
1062                 return;
1063
1064         print_name(p, "  - regs-name: ", indexed->name);
1065         drm_printf(p, "    dwords: %d\n", indexed->count);
1066
1067         print_ascii85(p, indexed->count << 2, obj->data);
1068 }
1069
1070 static void a6xx_show_debugbus_block(const struct a6xx_debugbus_block *block,
1071                 u32 *data, struct drm_printer *p)
1072 {
1073         if (block) {
1074                 print_name(p, "  - debugbus-block: ", block->name);
1075
1076                 /*
1077                  * count for regular debugbus data is in quadwords,
1078                  * but print the size in dwords for consistency
1079                  */
1080                 drm_printf(p, "    count: %d\n", block->count << 1);
1081
1082                 print_ascii85(p, block->count << 3, data);
1083         }
1084 }
1085
1086 static void a6xx_show_debugbus(struct a6xx_gpu_state *a6xx_state,
1087                 struct drm_printer *p)
1088 {
1089         int i;
1090
1091         for (i = 0; i < a6xx_state->nr_debugbus; i++) {
1092                 struct a6xx_gpu_state_obj *obj = &a6xx_state->debugbus[i];
1093
1094                 a6xx_show_debugbus_block(obj->handle, obj->data, p);
1095         }
1096
1097         if (a6xx_state->vbif_debugbus) {
1098                 struct a6xx_gpu_state_obj *obj = a6xx_state->vbif_debugbus;
1099
1100                 drm_puts(p, "  - debugbus-block: A6XX_DBGBUS_VBIF\n");
1101                 drm_printf(p, "    count: %d\n", VBIF_DEBUGBUS_BLOCK_SIZE);
1102
1103                 /* vbif debugbus data is in dwords.  Confusing, huh? */
1104                 print_ascii85(p, VBIF_DEBUGBUS_BLOCK_SIZE << 2, obj->data);
1105         }
1106
1107         for (i = 0; i < a6xx_state->nr_cx_debugbus; i++) {
1108                 struct a6xx_gpu_state_obj *obj = &a6xx_state->cx_debugbus[i];
1109
1110                 a6xx_show_debugbus_block(obj->handle, obj->data, p);
1111         }
1112 }
1113
1114 void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1115                 struct drm_printer *p)
1116 {
1117         struct a6xx_gpu_state *a6xx_state = container_of(state,
1118                         struct a6xx_gpu_state, base);
1119         int i;
1120
1121         if (IS_ERR_OR_NULL(state))
1122                 return;
1123
1124         adreno_show(gpu, state, p);
1125
1126         drm_puts(p, "registers:\n");
1127         for (i = 0; i < a6xx_state->nr_registers; i++) {
1128                 struct a6xx_gpu_state_obj *obj = &a6xx_state->registers[i];
1129                 const struct a6xx_registers *regs = obj->handle;
1130
1131                 if (!obj->handle)
1132                         continue;
1133
1134                 a6xx_show_registers(regs->registers, obj->data, regs->count, p);
1135         }
1136
1137         drm_puts(p, "registers-gmu:\n");
1138         for (i = 0; i < a6xx_state->nr_gmu_registers; i++) {
1139                 struct a6xx_gpu_state_obj *obj = &a6xx_state->gmu_registers[i];
1140                 const struct a6xx_registers *regs = obj->handle;
1141
1142                 if (!obj->handle)
1143                         continue;
1144
1145                 a6xx_show_registers(regs->registers, obj->data, regs->count, p);
1146         }
1147
1148         drm_puts(p, "indexed-registers:\n");
1149         for (i = 0; i < a6xx_state->nr_indexed_regs; i++)
1150                 a6xx_show_indexed_regs(&a6xx_state->indexed_regs[i], p);
1151
1152         drm_puts(p, "shader-blocks:\n");
1153         for (i = 0; i < a6xx_state->nr_shaders; i++)
1154                 a6xx_show_shader(&a6xx_state->shaders[i], p);
1155
1156         drm_puts(p, "clusters:\n");
1157         for (i = 0; i < a6xx_state->nr_clusters; i++)
1158                 a6xx_show_cluster(&a6xx_state->clusters[i], p);
1159
1160         for (i = 0; i < a6xx_state->nr_dbgahb_clusters; i++)
1161                 a6xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p);
1162
1163         drm_puts(p, "debugbus:\n");
1164         a6xx_show_debugbus(a6xx_state, p);
1165 }