Linux-libre 5.7.3-gnu
[librecmc/linux-libre.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "vi.h"
33 #include "vi_structs.h"
34 #include "vid.h"
35 #include "amdgpu_ucode.h"
36 #include "amdgpu_atombios.h"
37 #include "atombios_i2c.h"
38 #include "clearstate_vi.h"
39
40 #include "gmc/gmc_8_2_d.h"
41 #include "gmc/gmc_8_2_sh_mask.h"
42
43 #include "oss/oss_3_0_d.h"
44 #include "oss/oss_3_0_sh_mask.h"
45
46 #include "bif/bif_5_0_d.h"
47 #include "bif/bif_5_0_sh_mask.h"
48 #include "gca/gfx_8_0_d.h"
49 #include "gca/gfx_8_0_enum.h"
50 #include "gca/gfx_8_0_sh_mask.h"
51
52 #include "dce/dce_10_0_d.h"
53 #include "dce/dce_10_0_sh_mask.h"
54
55 #include "smu/smu_7_1_3_d.h"
56
57 #include "ivsrcid/ivsrcid_vislands30.h"
58
59 #define GFX8_NUM_GFX_RINGS     1
60 #define GFX8_MEC_HPD_SIZE 4096
61
62 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
63 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
64 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
65 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
66
67 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
68 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
69 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
70 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
71 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
72 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
73 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
74 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
75 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
76
77 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
78 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
79 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
80 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
81 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
82 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
83
84 /* BPM SERDES CMD */
85 #define SET_BPM_SERDES_CMD    1
86 #define CLE_BPM_SERDES_CMD    0
87
88 /* BPM Register Address*/
89 enum {
90         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
91         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
92         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
93         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
94         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
95         BPM_REG_FGCG_MAX
96 };
97
98 #define RLC_FormatDirectRegListLength        14
99
100 /*(DEBLOBBED)*/
101
102 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
103 {
104         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
105         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
106         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
107         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
108         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
109         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
110         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
111         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
112         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
113         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
114         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
115         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
116         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
117         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
118         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
119         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
120 };
121
122 static const u32 golden_settings_tonga_a11[] =
123 {
124         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
125         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
126         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
127         mmGB_GPU_ID, 0x0000000f, 0x00000000,
128         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
129         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
130         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
131         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
132         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
133         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
134         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
135         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
136         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
137         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
138         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
139         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
140 };
141
142 static const u32 tonga_golden_common_all[] =
143 {
144         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
145         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
146         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
147         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
148         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
149         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
150         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
151         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
152 };
153
154 static const u32 tonga_mgcg_cgcg_init[] =
155 {
156         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
157         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
158         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
159         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
160         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
161         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
162         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
163         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
164         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
165         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
166         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
167         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
168         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
169         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
170         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
171         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
172         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
173         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
174         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
175         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
176         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
177         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
178         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
179         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
180         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
181         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
182         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
183         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
184         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
185         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
186         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
187         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
188         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
189         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
190         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
191         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
192         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
193         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
194         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
195         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
196         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
197         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
198         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
199         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
200         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
201         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
202         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
203         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
204         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
205         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
206         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
207         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
208         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
209         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
210         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
211         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
212         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
213         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
214         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
215         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
216         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
217         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
218         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
219         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
220         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
221         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
222         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
223         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
224         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
225         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
226         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
227         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
228         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
229         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
230         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
231 };
232
233 static const u32 golden_settings_vegam_a11[] =
234 {
235         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
236         mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
237         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
238         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
239         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
240         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
241         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
242         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
243         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
244         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
245         mmSQ_CONFIG, 0x07f80000, 0x01180000,
246         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
247         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
248         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
249         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
250         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
251         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
252 };
253
254 static const u32 vegam_golden_common_all[] =
255 {
256         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
257         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
258         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
259         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
260         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
261         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
262 };
263
264 static const u32 golden_settings_polaris11_a11[] =
265 {
266         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
267         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
268         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
269         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
270         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
271         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
272         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
273         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
274         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
275         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
276         mmSQ_CONFIG, 0x07f80000, 0x01180000,
277         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
278         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
279         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
280         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
281         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
282         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
283 };
284
285 static const u32 polaris11_golden_common_all[] =
286 {
287         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
288         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
289         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
290         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
291         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
292         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
293 };
294
295 static const u32 golden_settings_polaris10_a11[] =
296 {
297         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
298         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
299         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
300         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
301         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
302         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
303         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
304         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
305         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
306         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
307         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
308         mmSQ_CONFIG, 0x07f80000, 0x07180000,
309         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
310         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
311         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
312         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
313         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
314 };
315
316 static const u32 polaris10_golden_common_all[] =
317 {
318         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
319         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
320         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
321         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
322         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
323         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
324         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
325         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
326 };
327
328 static const u32 fiji_golden_common_all[] =
329 {
330         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
331         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
332         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
333         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
334         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
335         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
336         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
337         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
338         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
339         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
340 };
341
342 static const u32 golden_settings_fiji_a10[] =
343 {
344         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
345         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
346         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
347         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
348         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
349         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
350         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
351         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
352         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
353         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
354         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
355 };
356
357 static const u32 fiji_mgcg_cgcg_init[] =
358 {
359         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
360         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
361         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
362         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
363         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
364         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
365         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
366         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
367         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
368         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
369         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
370         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
371         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
372         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
373         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
374         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
375         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
376         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
377         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
378         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
379         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
380         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
381         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
382         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
383         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
384         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
385         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
386         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
387         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
388         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
389         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
390         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
391         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
392         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
393         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
394 };
395
396 static const u32 golden_settings_iceland_a11[] =
397 {
398         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
399         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
400         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
401         mmGB_GPU_ID, 0x0000000f, 0x00000000,
402         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
403         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
404         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
405         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
406         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
407         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
408         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
409         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
410         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
411         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
412         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
413         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
414 };
415
416 static const u32 iceland_golden_common_all[] =
417 {
418         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
419         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
420         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
421         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
422         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
423         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
424         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
425         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
426 };
427
428 static const u32 iceland_mgcg_cgcg_init[] =
429 {
430         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
431         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
432         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
433         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
434         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
435         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
436         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
437         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
438         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
439         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
440         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
441         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
442         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
443         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
444         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
445         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
446         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
447         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
448         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
449         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
450         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
451         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
452         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
453         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
454         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
455         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
456         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
457         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
458         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
459         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
460         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
461         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
462         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
463         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
464         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
465         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
466         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
467         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
468         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
469         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
470         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
471         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
472         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
473         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
474         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
475         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
476         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
477         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
478         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
479         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
480         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
481         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
482         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
483         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
484         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
485         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
486         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
487         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
488         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
489         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
490         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
491         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
492         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
493         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
494 };
495
496 static const u32 cz_golden_settings_a11[] =
497 {
498         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
499         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
500         mmGB_GPU_ID, 0x0000000f, 0x00000000,
501         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
502         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
503         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
504         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
505         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
506         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
507         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
508         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
509         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
510 };
511
512 static const u32 cz_golden_common_all[] =
513 {
514         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
515         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
516         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
517         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
518         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
519         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
520         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
521         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
522 };
523
524 static const u32 cz_mgcg_cgcg_init[] =
525 {
526         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
527         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
528         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
529         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
530         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
531         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
532         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
533         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
534         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
535         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
536         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
537         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
538         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
539         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
540         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
541         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
542         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
543         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
544         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
545         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
546         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
547         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
548         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
549         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
550         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
551         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
552         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
553         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
554         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
555         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
556         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
557         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
558         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
559         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
560         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
561         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
562         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
563         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
564         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
565         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
566         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
567         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
568         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
569         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
570         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
571         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
572         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
573         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
574         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
575         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
576         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
577         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
578         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
579         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
580         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
581         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
582         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
583         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
584         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
585         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
586         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
587         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
588         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
589         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
590         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
591         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
592         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
593         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
594         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
595         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
596         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
597         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
598         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
599         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
600         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
601 };
602
603 static const u32 stoney_golden_settings_a11[] =
604 {
605         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
606         mmGB_GPU_ID, 0x0000000f, 0x00000000,
607         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
608         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
609         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
610         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
611         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
612         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
613         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
614         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
615 };
616
617 static const u32 stoney_golden_common_all[] =
618 {
619         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
620         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
621         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
622         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
623         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
624         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
625         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
626         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
627 };
628
629 static const u32 stoney_mgcg_cgcg_init[] =
630 {
631         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
632         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
633         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
634         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
635         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
636 };
637
638
639 static const char * const sq_edc_source_names[] = {
640         "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
641         "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
642         "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
643         "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
644         "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
645         "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
646         "SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
647 };
648
649 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
650 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
651 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
652 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
653 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
654 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
655 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
656 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
657
658 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
659 {
660         switch (adev->asic_type) {
661         case CHIP_TOPAZ:
662                 amdgpu_device_program_register_sequence(adev,
663                                                         iceland_mgcg_cgcg_init,
664                                                         ARRAY_SIZE(iceland_mgcg_cgcg_init));
665                 amdgpu_device_program_register_sequence(adev,
666                                                         golden_settings_iceland_a11,
667                                                         ARRAY_SIZE(golden_settings_iceland_a11));
668                 amdgpu_device_program_register_sequence(adev,
669                                                         iceland_golden_common_all,
670                                                         ARRAY_SIZE(iceland_golden_common_all));
671                 break;
672         case CHIP_FIJI:
673                 amdgpu_device_program_register_sequence(adev,
674                                                         fiji_mgcg_cgcg_init,
675                                                         ARRAY_SIZE(fiji_mgcg_cgcg_init));
676                 amdgpu_device_program_register_sequence(adev,
677                                                         golden_settings_fiji_a10,
678                                                         ARRAY_SIZE(golden_settings_fiji_a10));
679                 amdgpu_device_program_register_sequence(adev,
680                                                         fiji_golden_common_all,
681                                                         ARRAY_SIZE(fiji_golden_common_all));
682                 break;
683
684         case CHIP_TONGA:
685                 amdgpu_device_program_register_sequence(adev,
686                                                         tonga_mgcg_cgcg_init,
687                                                         ARRAY_SIZE(tonga_mgcg_cgcg_init));
688                 amdgpu_device_program_register_sequence(adev,
689                                                         golden_settings_tonga_a11,
690                                                         ARRAY_SIZE(golden_settings_tonga_a11));
691                 amdgpu_device_program_register_sequence(adev,
692                                                         tonga_golden_common_all,
693                                                         ARRAY_SIZE(tonga_golden_common_all));
694                 break;
695         case CHIP_VEGAM:
696                 amdgpu_device_program_register_sequence(adev,
697                                                         golden_settings_vegam_a11,
698                                                         ARRAY_SIZE(golden_settings_vegam_a11));
699                 amdgpu_device_program_register_sequence(adev,
700                                                         vegam_golden_common_all,
701                                                         ARRAY_SIZE(vegam_golden_common_all));
702                 break;
703         case CHIP_POLARIS11:
704         case CHIP_POLARIS12:
705                 amdgpu_device_program_register_sequence(adev,
706                                                         golden_settings_polaris11_a11,
707                                                         ARRAY_SIZE(golden_settings_polaris11_a11));
708                 amdgpu_device_program_register_sequence(adev,
709                                                         polaris11_golden_common_all,
710                                                         ARRAY_SIZE(polaris11_golden_common_all));
711                 break;
712         case CHIP_POLARIS10:
713                 amdgpu_device_program_register_sequence(adev,
714                                                         golden_settings_polaris10_a11,
715                                                         ARRAY_SIZE(golden_settings_polaris10_a11));
716                 amdgpu_device_program_register_sequence(adev,
717                                                         polaris10_golden_common_all,
718                                                         ARRAY_SIZE(polaris10_golden_common_all));
719                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
720                 if (adev->pdev->revision == 0xc7 &&
721                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
722                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
723                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
724                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
725                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
726                 }
727                 break;
728         case CHIP_CARRIZO:
729                 amdgpu_device_program_register_sequence(adev,
730                                                         cz_mgcg_cgcg_init,
731                                                         ARRAY_SIZE(cz_mgcg_cgcg_init));
732                 amdgpu_device_program_register_sequence(adev,
733                                                         cz_golden_settings_a11,
734                                                         ARRAY_SIZE(cz_golden_settings_a11));
735                 amdgpu_device_program_register_sequence(adev,
736                                                         cz_golden_common_all,
737                                                         ARRAY_SIZE(cz_golden_common_all));
738                 break;
739         case CHIP_STONEY:
740                 amdgpu_device_program_register_sequence(adev,
741                                                         stoney_mgcg_cgcg_init,
742                                                         ARRAY_SIZE(stoney_mgcg_cgcg_init));
743                 amdgpu_device_program_register_sequence(adev,
744                                                         stoney_golden_settings_a11,
745                                                         ARRAY_SIZE(stoney_golden_settings_a11));
746                 amdgpu_device_program_register_sequence(adev,
747                                                         stoney_golden_common_all,
748                                                         ARRAY_SIZE(stoney_golden_common_all));
749                 break;
750         default:
751                 break;
752         }
753 }
754
755 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
756 {
757         adev->gfx.scratch.num_reg = 8;
758         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
759         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
760 }
761
762 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
763 {
764         struct amdgpu_device *adev = ring->adev;
765         uint32_t scratch;
766         uint32_t tmp = 0;
767         unsigned i;
768         int r;
769
770         r = amdgpu_gfx_scratch_get(adev, &scratch);
771         if (r)
772                 return r;
773
774         WREG32(scratch, 0xCAFEDEAD);
775         r = amdgpu_ring_alloc(ring, 3);
776         if (r)
777                 goto error_free_scratch;
778
779         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
780         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
781         amdgpu_ring_write(ring, 0xDEADBEEF);
782         amdgpu_ring_commit(ring);
783
784         for (i = 0; i < adev->usec_timeout; i++) {
785                 tmp = RREG32(scratch);
786                 if (tmp == 0xDEADBEEF)
787                         break;
788                 udelay(1);
789         }
790
791         if (i >= adev->usec_timeout)
792                 r = -ETIMEDOUT;
793
794 error_free_scratch:
795         amdgpu_gfx_scratch_free(adev, scratch);
796         return r;
797 }
798
799 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
800 {
801         struct amdgpu_device *adev = ring->adev;
802         struct amdgpu_ib ib;
803         struct dma_fence *f = NULL;
804
805         unsigned int index;
806         uint64_t gpu_addr;
807         uint32_t tmp;
808         long r;
809
810         r = amdgpu_device_wb_get(adev, &index);
811         if (r)
812                 return r;
813
814         gpu_addr = adev->wb.gpu_addr + (index * 4);
815         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
816         memset(&ib, 0, sizeof(ib));
817         r = amdgpu_ib_get(adev, NULL, 16, &ib);
818         if (r)
819                 goto err1;
820
821         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
822         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
823         ib.ptr[2] = lower_32_bits(gpu_addr);
824         ib.ptr[3] = upper_32_bits(gpu_addr);
825         ib.ptr[4] = 0xDEADBEEF;
826         ib.length_dw = 5;
827
828         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
829         if (r)
830                 goto err2;
831
832         r = dma_fence_wait_timeout(f, false, timeout);
833         if (r == 0) {
834                 r = -ETIMEDOUT;
835                 goto err2;
836         } else if (r < 0) {
837                 goto err2;
838         }
839
840         tmp = adev->wb.wb[index];
841         if (tmp == 0xDEADBEEF)
842                 r = 0;
843         else
844                 r = -EINVAL;
845
846 err2:
847         amdgpu_ib_free(adev, &ib, NULL);
848         dma_fence_put(f);
849 err1:
850         amdgpu_device_wb_free(adev, index);
851         return r;
852 }
853
854
855 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
856 {
857         release_firmware(adev->gfx.pfp_fw);
858         adev->gfx.pfp_fw = NULL;
859         release_firmware(adev->gfx.me_fw);
860         adev->gfx.me_fw = NULL;
861         release_firmware(adev->gfx.ce_fw);
862         adev->gfx.ce_fw = NULL;
863         release_firmware(adev->gfx.rlc_fw);
864         adev->gfx.rlc_fw = NULL;
865         release_firmware(adev->gfx.mec_fw);
866         adev->gfx.mec_fw = NULL;
867         if ((adev->asic_type != CHIP_STONEY) &&
868             (adev->asic_type != CHIP_TOPAZ))
869                 release_firmware(adev->gfx.mec2_fw);
870         adev->gfx.mec2_fw = NULL;
871
872         kfree(adev->gfx.rlc.register_list_format);
873 }
874
875 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
876 {
877         const char *chip_name;
878         char fw_name[30];
879         int err;
880         struct amdgpu_firmware_info *info = NULL;
881         const struct common_firmware_header *header = NULL;
882         const struct gfx_firmware_header_v1_0 *cp_hdr;
883         const struct rlc_firmware_header_v2_0 *rlc_hdr;
884         unsigned int *tmp = NULL, i;
885
886         DRM_DEBUG("\n");
887
888         switch (adev->asic_type) {
889         case CHIP_TOPAZ:
890                 chip_name = "topaz";
891                 break;
892         case CHIP_TONGA:
893                 chip_name = "tonga";
894                 break;
895         case CHIP_CARRIZO:
896                 chip_name = "carrizo";
897                 break;
898         case CHIP_FIJI:
899                 chip_name = "fiji";
900                 break;
901         case CHIP_STONEY:
902                 chip_name = "stoney";
903                 break;
904         case CHIP_POLARIS10:
905                 chip_name = "polaris10";
906                 break;
907         case CHIP_POLARIS11:
908                 chip_name = "polaris11";
909                 break;
910         case CHIP_POLARIS12:
911                 chip_name = "polaris12";
912                 break;
913         case CHIP_VEGAM:
914                 chip_name = "vegam";
915                 break;
916         default:
917                 BUG();
918         }
919
920         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
921                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
922                 err = reject_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
923                 if (err == -ENOENT) {
924                         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
925                         err = reject_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
926                 }
927         } else {
928                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
929                 err = reject_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
930         }
931         if (err)
932                 goto out;
933         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
934         if (err)
935                 goto out;
936         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
937         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
938         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
939
940         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
941                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
942                 err = reject_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
943                 if (err == -ENOENT) {
944                         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
945                         err = reject_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
946                 }
947         } else {
948                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
949                 err = reject_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
950         }
951         if (err)
952                 goto out;
953         err = amdgpu_ucode_validate(adev->gfx.me_fw);
954         if (err)
955                 goto out;
956         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
957         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
958
959         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
960
961         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
962                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
963                 err = reject_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
964                 if (err == -ENOENT) {
965                         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
966                         err = reject_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
967                 }
968         } else {
969                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
970                 err = reject_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
971         }
972         if (err)
973                 goto out;
974         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
975         if (err)
976                 goto out;
977         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
978         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
979         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
980
981         /*
982          * Support for MCBP/Virtualization in combination with chained IBs is
983          * formal released on feature version #46
984          */
985         if (adev->gfx.ce_feature_version >= 46 &&
986             adev->gfx.pfp_feature_version >= 46) {
987                 adev->virt.chained_ib_support = true;
988                 DRM_INFO("Chained IB support enabled!\n");
989         } else
990                 adev->virt.chained_ib_support = false;
991
992         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
993         err = reject_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
994         if (err)
995                 goto out;
996         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
997         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
998         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
999         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1000
1001         adev->gfx.rlc.save_and_restore_offset =
1002                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1003         adev->gfx.rlc.clear_state_descriptor_offset =
1004                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1005         adev->gfx.rlc.avail_scratch_ram_locations =
1006                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1007         adev->gfx.rlc.reg_restore_list_size =
1008                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1009         adev->gfx.rlc.reg_list_format_start =
1010                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1011         adev->gfx.rlc.reg_list_format_separate_start =
1012                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1013         adev->gfx.rlc.starting_offsets_start =
1014                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1015         adev->gfx.rlc.reg_list_format_size_bytes =
1016                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1017         adev->gfx.rlc.reg_list_size_bytes =
1018                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1019
1020         adev->gfx.rlc.register_list_format =
1021                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1022                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1023
1024         if (!adev->gfx.rlc.register_list_format) {
1025                 err = -ENOMEM;
1026                 goto out;
1027         }
1028
1029         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1030                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1031         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1032                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1033
1034         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1035
1036         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1037                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1038         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1039                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1040
1041         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1042                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1043                 err = reject_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1044                 if (err == -ENOENT) {
1045                         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1046                         err = reject_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1047                 }
1048         } else {
1049                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1050                 err = reject_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1051         }
1052         if (err)
1053                 goto out;
1054         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1055         if (err)
1056                 goto out;
1057         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1058         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1059         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1060
1061         if ((adev->asic_type != CHIP_STONEY) &&
1062             (adev->asic_type != CHIP_TOPAZ)) {
1063                 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1064                         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1065                         err = reject_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1066                         if (err == -ENOENT) {
1067                                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1068                                 err = reject_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1069                         }
1070                 } else {
1071                         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1072                         err = reject_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1073                 }
1074                 if (!err) {
1075                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1076                         if (err)
1077                                 goto out;
1078                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1079                                 adev->gfx.mec2_fw->data;
1080                         adev->gfx.mec2_fw_version =
1081                                 le32_to_cpu(cp_hdr->header.ucode_version);
1082                         adev->gfx.mec2_feature_version =
1083                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1084                 } else {
1085                         err = 0;
1086                         adev->gfx.mec2_fw = NULL;
1087                 }
1088         }
1089
1090         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1091         info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1092         info->fw = adev->gfx.pfp_fw;
1093         header = (const struct common_firmware_header *)info->fw->data;
1094         adev->firmware.fw_size +=
1095                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1096
1097         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1098         info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1099         info->fw = adev->gfx.me_fw;
1100         header = (const struct common_firmware_header *)info->fw->data;
1101         adev->firmware.fw_size +=
1102                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1103
1104         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1105         info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1106         info->fw = adev->gfx.ce_fw;
1107         header = (const struct common_firmware_header *)info->fw->data;
1108         adev->firmware.fw_size +=
1109                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1110
1111         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1112         info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1113         info->fw = adev->gfx.rlc_fw;
1114         header = (const struct common_firmware_header *)info->fw->data;
1115         adev->firmware.fw_size +=
1116                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1117
1118         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1119         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1120         info->fw = adev->gfx.mec_fw;
1121         header = (const struct common_firmware_header *)info->fw->data;
1122         adev->firmware.fw_size +=
1123                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1124
1125         /* we need account JT in */
1126         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1127         adev->firmware.fw_size +=
1128                 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1129
1130         if (amdgpu_sriov_vf(adev)) {
1131                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1132                 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1133                 info->fw = adev->gfx.mec_fw;
1134                 adev->firmware.fw_size +=
1135                         ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1136         }
1137
1138         if (adev->gfx.mec2_fw) {
1139                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1140                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1141                 info->fw = adev->gfx.mec2_fw;
1142                 header = (const struct common_firmware_header *)info->fw->data;
1143                 adev->firmware.fw_size +=
1144                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1145         }
1146
1147 out:
1148         if (err) {
1149                 dev_err(adev->dev,
1150                         "gfx8: Failed to load firmware \"%s\"\n",
1151                         fw_name);
1152                 release_firmware(adev->gfx.pfp_fw);
1153                 adev->gfx.pfp_fw = NULL;
1154                 release_firmware(adev->gfx.me_fw);
1155                 adev->gfx.me_fw = NULL;
1156                 release_firmware(adev->gfx.ce_fw);
1157                 adev->gfx.ce_fw = NULL;
1158                 release_firmware(adev->gfx.rlc_fw);
1159                 adev->gfx.rlc_fw = NULL;
1160                 release_firmware(adev->gfx.mec_fw);
1161                 adev->gfx.mec_fw = NULL;
1162                 release_firmware(adev->gfx.mec2_fw);
1163                 adev->gfx.mec2_fw = NULL;
1164         }
1165         return err;
1166 }
1167
1168 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1169                                     volatile u32 *buffer)
1170 {
1171         u32 count = 0, i;
1172         const struct cs_section_def *sect = NULL;
1173         const struct cs_extent_def *ext = NULL;
1174
1175         if (adev->gfx.rlc.cs_data == NULL)
1176                 return;
1177         if (buffer == NULL)
1178                 return;
1179
1180         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1181         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1182
1183         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1184         buffer[count++] = cpu_to_le32(0x80000000);
1185         buffer[count++] = cpu_to_le32(0x80000000);
1186
1187         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1188                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1189                         if (sect->id == SECT_CONTEXT) {
1190                                 buffer[count++] =
1191                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1192                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1193                                                 PACKET3_SET_CONTEXT_REG_START);
1194                                 for (i = 0; i < ext->reg_count; i++)
1195                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1196                         } else {
1197                                 return;
1198                         }
1199                 }
1200         }
1201
1202         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1203         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1204                         PACKET3_SET_CONTEXT_REG_START);
1205         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1206         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1207
1208         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1209         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1210
1211         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1212         buffer[count++] = cpu_to_le32(0);
1213 }
1214
1215 static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
1216 {
1217         if (adev->asic_type == CHIP_CARRIZO)
1218                 return 5;
1219         else
1220                 return 4;
1221 }
1222
1223 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1224 {
1225         const struct cs_section_def *cs_data;
1226         int r;
1227
1228         adev->gfx.rlc.cs_data = vi_cs_data;
1229
1230         cs_data = adev->gfx.rlc.cs_data;
1231
1232         if (cs_data) {
1233                 /* init clear state block */
1234                 r = amdgpu_gfx_rlc_init_csb(adev);
1235                 if (r)
1236                         return r;
1237         }
1238
1239         if ((adev->asic_type == CHIP_CARRIZO) ||
1240             (adev->asic_type == CHIP_STONEY)) {
1241                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1242                 r = amdgpu_gfx_rlc_init_cpt(adev);
1243                 if (r)
1244                         return r;
1245         }
1246
1247         /* init spm vmid with 0xf */
1248         if (adev->gfx.rlc.funcs->update_spm_vmid)
1249                 adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
1250
1251         return 0;
1252 }
1253
1254 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1255 {
1256         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1257 }
1258
1259 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1260 {
1261         int r;
1262         u32 *hpd;
1263         size_t mec_hpd_size;
1264
1265         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1266
1267         /* take ownership of the relevant compute queues */
1268         amdgpu_gfx_compute_queue_acquire(adev);
1269
1270         mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1271
1272         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1273                                       AMDGPU_GEM_DOMAIN_VRAM,
1274                                       &adev->gfx.mec.hpd_eop_obj,
1275                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1276                                       (void **)&hpd);
1277         if (r) {
1278                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1279                 return r;
1280         }
1281
1282         memset(hpd, 0, mec_hpd_size);
1283
1284         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1285         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1286
1287         return 0;
1288 }
1289
1290 static const u32 vgpr_init_compute_shader[] =
1291 {
1292         0x7e000209, 0x7e020208,
1293         0x7e040207, 0x7e060206,
1294         0x7e080205, 0x7e0a0204,
1295         0x7e0c0203, 0x7e0e0202,
1296         0x7e100201, 0x7e120200,
1297         0x7e140209, 0x7e160208,
1298         0x7e180207, 0x7e1a0206,
1299         0x7e1c0205, 0x7e1e0204,
1300         0x7e200203, 0x7e220202,
1301         0x7e240201, 0x7e260200,
1302         0x7e280209, 0x7e2a0208,
1303         0x7e2c0207, 0x7e2e0206,
1304         0x7e300205, 0x7e320204,
1305         0x7e340203, 0x7e360202,
1306         0x7e380201, 0x7e3a0200,
1307         0x7e3c0209, 0x7e3e0208,
1308         0x7e400207, 0x7e420206,
1309         0x7e440205, 0x7e460204,
1310         0x7e480203, 0x7e4a0202,
1311         0x7e4c0201, 0x7e4e0200,
1312         0x7e500209, 0x7e520208,
1313         0x7e540207, 0x7e560206,
1314         0x7e580205, 0x7e5a0204,
1315         0x7e5c0203, 0x7e5e0202,
1316         0x7e600201, 0x7e620200,
1317         0x7e640209, 0x7e660208,
1318         0x7e680207, 0x7e6a0206,
1319         0x7e6c0205, 0x7e6e0204,
1320         0x7e700203, 0x7e720202,
1321         0x7e740201, 0x7e760200,
1322         0x7e780209, 0x7e7a0208,
1323         0x7e7c0207, 0x7e7e0206,
1324         0xbf8a0000, 0xbf810000,
1325 };
1326
1327 static const u32 sgpr_init_compute_shader[] =
1328 {
1329         0xbe8a0100, 0xbe8c0102,
1330         0xbe8e0104, 0xbe900106,
1331         0xbe920108, 0xbe940100,
1332         0xbe960102, 0xbe980104,
1333         0xbe9a0106, 0xbe9c0108,
1334         0xbe9e0100, 0xbea00102,
1335         0xbea20104, 0xbea40106,
1336         0xbea60108, 0xbea80100,
1337         0xbeaa0102, 0xbeac0104,
1338         0xbeae0106, 0xbeb00108,
1339         0xbeb20100, 0xbeb40102,
1340         0xbeb60104, 0xbeb80106,
1341         0xbeba0108, 0xbebc0100,
1342         0xbebe0102, 0xbec00104,
1343         0xbec20106, 0xbec40108,
1344         0xbec60100, 0xbec80102,
1345         0xbee60004, 0xbee70005,
1346         0xbeea0006, 0xbeeb0007,
1347         0xbee80008, 0xbee90009,
1348         0xbefc0000, 0xbf8a0000,
1349         0xbf810000, 0x00000000,
1350 };
1351
1352 static const u32 vgpr_init_regs[] =
1353 {
1354         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1355         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1356         mmCOMPUTE_NUM_THREAD_X, 256*4,
1357         mmCOMPUTE_NUM_THREAD_Y, 1,
1358         mmCOMPUTE_NUM_THREAD_Z, 1,
1359         mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1360         mmCOMPUTE_PGM_RSRC2, 20,
1361         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1362         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1363         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1364         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1365         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1366         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1367         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1368         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1369         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1370         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1371 };
1372
1373 static const u32 sgpr1_init_regs[] =
1374 {
1375         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1376         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1377         mmCOMPUTE_NUM_THREAD_X, 256*5,
1378         mmCOMPUTE_NUM_THREAD_Y, 1,
1379         mmCOMPUTE_NUM_THREAD_Z, 1,
1380         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1381         mmCOMPUTE_PGM_RSRC2, 20,
1382         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1383         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1384         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1385         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1386         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1387         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1388         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1389         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1390         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1391         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1392 };
1393
1394 static const u32 sgpr2_init_regs[] =
1395 {
1396         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1397         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1398         mmCOMPUTE_NUM_THREAD_X, 256*5,
1399         mmCOMPUTE_NUM_THREAD_Y, 1,
1400         mmCOMPUTE_NUM_THREAD_Z, 1,
1401         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1402         mmCOMPUTE_PGM_RSRC2, 20,
1403         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1404         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1405         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1406         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1407         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1408         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1409         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1410         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1411         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1412         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1413 };
1414
1415 static const u32 sec_ded_counter_registers[] =
1416 {
1417         mmCPC_EDC_ATC_CNT,
1418         mmCPC_EDC_SCRATCH_CNT,
1419         mmCPC_EDC_UCODE_CNT,
1420         mmCPF_EDC_ATC_CNT,
1421         mmCPF_EDC_ROQ_CNT,
1422         mmCPF_EDC_TAG_CNT,
1423         mmCPG_EDC_ATC_CNT,
1424         mmCPG_EDC_DMA_CNT,
1425         mmCPG_EDC_TAG_CNT,
1426         mmDC_EDC_CSINVOC_CNT,
1427         mmDC_EDC_RESTORE_CNT,
1428         mmDC_EDC_STATE_CNT,
1429         mmGDS_EDC_CNT,
1430         mmGDS_EDC_GRBM_CNT,
1431         mmGDS_EDC_OA_DED,
1432         mmSPI_EDC_CNT,
1433         mmSQC_ATC_EDC_GATCL1_CNT,
1434         mmSQC_EDC_CNT,
1435         mmSQ_EDC_DED_CNT,
1436         mmSQ_EDC_INFO,
1437         mmSQ_EDC_SEC_CNT,
1438         mmTCC_EDC_CNT,
1439         mmTCP_ATC_EDC_GATCL1_CNT,
1440         mmTCP_EDC_CNT,
1441         mmTD_EDC_CNT
1442 };
1443
1444 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1445 {
1446         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1447         struct amdgpu_ib ib;
1448         struct dma_fence *f = NULL;
1449         int r, i;
1450         u32 tmp;
1451         unsigned total_size, vgpr_offset, sgpr_offset;
1452         u64 gpu_addr;
1453
1454         /* only supported on CZ */
1455         if (adev->asic_type != CHIP_CARRIZO)
1456                 return 0;
1457
1458         /* bail if the compute ring is not ready */
1459         if (!ring->sched.ready)
1460                 return 0;
1461
1462         tmp = RREG32(mmGB_EDC_MODE);
1463         WREG32(mmGB_EDC_MODE, 0);
1464
1465         total_size =
1466                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1467         total_size +=
1468                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1469         total_size +=
1470                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1471         total_size = ALIGN(total_size, 256);
1472         vgpr_offset = total_size;
1473         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1474         sgpr_offset = total_size;
1475         total_size += sizeof(sgpr_init_compute_shader);
1476
1477         /* allocate an indirect buffer to put the commands in */
1478         memset(&ib, 0, sizeof(ib));
1479         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1480         if (r) {
1481                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1482                 return r;
1483         }
1484
1485         /* load the compute shaders */
1486         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1487                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1488
1489         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1490                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1491
1492         /* init the ib length to 0 */
1493         ib.length_dw = 0;
1494
1495         /* VGPR */
1496         /* write the register state for the compute dispatch */
1497         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1498                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1499                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1500                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1501         }
1502         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1503         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1504         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1505         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1506         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1507         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1508
1509         /* write dispatch packet */
1510         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1511         ib.ptr[ib.length_dw++] = 8; /* x */
1512         ib.ptr[ib.length_dw++] = 1; /* y */
1513         ib.ptr[ib.length_dw++] = 1; /* z */
1514         ib.ptr[ib.length_dw++] =
1515                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1516
1517         /* write CS partial flush packet */
1518         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1519         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1520
1521         /* SGPR1 */
1522         /* write the register state for the compute dispatch */
1523         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1524                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1525                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1526                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1527         }
1528         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1529         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1530         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1531         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1532         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1533         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1534
1535         /* write dispatch packet */
1536         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1537         ib.ptr[ib.length_dw++] = 8; /* x */
1538         ib.ptr[ib.length_dw++] = 1; /* y */
1539         ib.ptr[ib.length_dw++] = 1; /* z */
1540         ib.ptr[ib.length_dw++] =
1541                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1542
1543         /* write CS partial flush packet */
1544         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1545         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1546
1547         /* SGPR2 */
1548         /* write the register state for the compute dispatch */
1549         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1550                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1551                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1552                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1553         }
1554         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1555         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1556         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1557         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1558         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1559         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1560
1561         /* write dispatch packet */
1562         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1563         ib.ptr[ib.length_dw++] = 8; /* x */
1564         ib.ptr[ib.length_dw++] = 1; /* y */
1565         ib.ptr[ib.length_dw++] = 1; /* z */
1566         ib.ptr[ib.length_dw++] =
1567                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1568
1569         /* write CS partial flush packet */
1570         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1571         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1572
1573         /* shedule the ib on the ring */
1574         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1575         if (r) {
1576                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1577                 goto fail;
1578         }
1579
1580         /* wait for the GPU to finish processing the IB */
1581         r = dma_fence_wait(f, false);
1582         if (r) {
1583                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1584                 goto fail;
1585         }
1586
1587         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1588         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1589         WREG32(mmGB_EDC_MODE, tmp);
1590
1591         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1592         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1593         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1594
1595
1596         /* read back registers to clear the counters */
1597         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1598                 RREG32(sec_ded_counter_registers[i]);
1599
1600 fail:
1601         amdgpu_ib_free(adev, &ib, NULL);
1602         dma_fence_put(f);
1603
1604         return r;
1605 }
1606
1607 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1608 {
1609         u32 gb_addr_config;
1610         u32 mc_arb_ramcfg;
1611         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1612         u32 tmp;
1613         int ret;
1614
1615         switch (adev->asic_type) {
1616         case CHIP_TOPAZ:
1617                 adev->gfx.config.max_shader_engines = 1;
1618                 adev->gfx.config.max_tile_pipes = 2;
1619                 adev->gfx.config.max_cu_per_sh = 6;
1620                 adev->gfx.config.max_sh_per_se = 1;
1621                 adev->gfx.config.max_backends_per_se = 2;
1622                 adev->gfx.config.max_texture_channel_caches = 2;
1623                 adev->gfx.config.max_gprs = 256;
1624                 adev->gfx.config.max_gs_threads = 32;
1625                 adev->gfx.config.max_hw_contexts = 8;
1626
1627                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1628                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1629                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1630                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1631                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1632                 break;
1633         case CHIP_FIJI:
1634                 adev->gfx.config.max_shader_engines = 4;
1635                 adev->gfx.config.max_tile_pipes = 16;
1636                 adev->gfx.config.max_cu_per_sh = 16;
1637                 adev->gfx.config.max_sh_per_se = 1;
1638                 adev->gfx.config.max_backends_per_se = 4;
1639                 adev->gfx.config.max_texture_channel_caches = 16;
1640                 adev->gfx.config.max_gprs = 256;
1641                 adev->gfx.config.max_gs_threads = 32;
1642                 adev->gfx.config.max_hw_contexts = 8;
1643
1644                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1645                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1646                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1647                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1648                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1649                 break;
1650         case CHIP_POLARIS11:
1651         case CHIP_POLARIS12:
1652                 ret = amdgpu_atombios_get_gfx_info(adev);
1653                 if (ret)
1654                         return ret;
1655                 adev->gfx.config.max_gprs = 256;
1656                 adev->gfx.config.max_gs_threads = 32;
1657                 adev->gfx.config.max_hw_contexts = 8;
1658
1659                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1660                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1661                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1662                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1663                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1664                 break;
1665         case CHIP_POLARIS10:
1666         case CHIP_VEGAM:
1667                 ret = amdgpu_atombios_get_gfx_info(adev);
1668                 if (ret)
1669                         return ret;
1670                 adev->gfx.config.max_gprs = 256;
1671                 adev->gfx.config.max_gs_threads = 32;
1672                 adev->gfx.config.max_hw_contexts = 8;
1673
1674                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1675                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1676                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1677                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1678                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1679                 break;
1680         case CHIP_TONGA:
1681                 adev->gfx.config.max_shader_engines = 4;
1682                 adev->gfx.config.max_tile_pipes = 8;
1683                 adev->gfx.config.max_cu_per_sh = 8;
1684                 adev->gfx.config.max_sh_per_se = 1;
1685                 adev->gfx.config.max_backends_per_se = 2;
1686                 adev->gfx.config.max_texture_channel_caches = 8;
1687                 adev->gfx.config.max_gprs = 256;
1688                 adev->gfx.config.max_gs_threads = 32;
1689                 adev->gfx.config.max_hw_contexts = 8;
1690
1691                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1692                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1693                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1694                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1695                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1696                 break;
1697         case CHIP_CARRIZO:
1698                 adev->gfx.config.max_shader_engines = 1;
1699                 adev->gfx.config.max_tile_pipes = 2;
1700                 adev->gfx.config.max_sh_per_se = 1;
1701                 adev->gfx.config.max_backends_per_se = 2;
1702                 adev->gfx.config.max_cu_per_sh = 8;
1703                 adev->gfx.config.max_texture_channel_caches = 2;
1704                 adev->gfx.config.max_gprs = 256;
1705                 adev->gfx.config.max_gs_threads = 32;
1706                 adev->gfx.config.max_hw_contexts = 8;
1707
1708                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1709                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1710                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1711                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1712                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1713                 break;
1714         case CHIP_STONEY:
1715                 adev->gfx.config.max_shader_engines = 1;
1716                 adev->gfx.config.max_tile_pipes = 2;
1717                 adev->gfx.config.max_sh_per_se = 1;
1718                 adev->gfx.config.max_backends_per_se = 1;
1719                 adev->gfx.config.max_cu_per_sh = 3;
1720                 adev->gfx.config.max_texture_channel_caches = 2;
1721                 adev->gfx.config.max_gprs = 256;
1722                 adev->gfx.config.max_gs_threads = 16;
1723                 adev->gfx.config.max_hw_contexts = 8;
1724
1725                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1726                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1727                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1728                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1729                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1730                 break;
1731         default:
1732                 adev->gfx.config.max_shader_engines = 2;
1733                 adev->gfx.config.max_tile_pipes = 4;
1734                 adev->gfx.config.max_cu_per_sh = 2;
1735                 adev->gfx.config.max_sh_per_se = 1;
1736                 adev->gfx.config.max_backends_per_se = 2;
1737                 adev->gfx.config.max_texture_channel_caches = 4;
1738                 adev->gfx.config.max_gprs = 256;
1739                 adev->gfx.config.max_gs_threads = 32;
1740                 adev->gfx.config.max_hw_contexts = 8;
1741
1742                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1743                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1744                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1745                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1746                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1747                 break;
1748         }
1749
1750         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1751         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1752
1753         adev->gfx.config.num_banks = REG_GET_FIELD(mc_arb_ramcfg,
1754                                 MC_ARB_RAMCFG, NOOFBANK);
1755         adev->gfx.config.num_ranks = REG_GET_FIELD(mc_arb_ramcfg,
1756                                 MC_ARB_RAMCFG, NOOFRANKS);
1757
1758         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1759         adev->gfx.config.mem_max_burst_length_bytes = 256;
1760         if (adev->flags & AMD_IS_APU) {
1761                 /* Get memory bank mapping mode. */
1762                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1763                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1764                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1765
1766                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1767                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1768                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1769
1770                 /* Validate settings in case only one DIMM installed. */
1771                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1772                         dimm00_addr_map = 0;
1773                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1774                         dimm01_addr_map = 0;
1775                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1776                         dimm10_addr_map = 0;
1777                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1778                         dimm11_addr_map = 0;
1779
1780                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1781                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1782                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1783                         adev->gfx.config.mem_row_size_in_kb = 2;
1784                 else
1785                         adev->gfx.config.mem_row_size_in_kb = 1;
1786         } else {
1787                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1788                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1789                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1790                         adev->gfx.config.mem_row_size_in_kb = 4;
1791         }
1792
1793         adev->gfx.config.shader_engine_tile_size = 32;
1794         adev->gfx.config.num_gpus = 1;
1795         adev->gfx.config.multi_gpu_tile_size = 64;
1796
1797         /* fix up row size */
1798         switch (adev->gfx.config.mem_row_size_in_kb) {
1799         case 1:
1800         default:
1801                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1802                 break;
1803         case 2:
1804                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1805                 break;
1806         case 4:
1807                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1808                 break;
1809         }
1810         adev->gfx.config.gb_addr_config = gb_addr_config;
1811
1812         return 0;
1813 }
1814
1815 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1816                                         int mec, int pipe, int queue)
1817 {
1818         int r;
1819         unsigned irq_type;
1820         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1821
1822         ring = &adev->gfx.compute_ring[ring_id];
1823
1824         /* mec0 is me1 */
1825         ring->me = mec + 1;
1826         ring->pipe = pipe;
1827         ring->queue = queue;
1828
1829         ring->ring_obj = NULL;
1830         ring->use_doorbell = true;
1831         ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
1832         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1833                                 + (ring_id * GFX8_MEC_HPD_SIZE);
1834         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1835
1836         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1837                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1838                 + ring->pipe;
1839
1840         /* type-2 packets are deprecated on MEC, use type-3 instead */
1841         r = amdgpu_ring_init(adev, ring, 1024,
1842                         &adev->gfx.eop_irq, irq_type);
1843         if (r)
1844                 return r;
1845
1846
1847         return 0;
1848 }
1849
1850 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
1851
1852 static int gfx_v8_0_sw_init(void *handle)
1853 {
1854         int i, j, k, r, ring_id;
1855         struct amdgpu_ring *ring;
1856         struct amdgpu_kiq *kiq;
1857         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1858
1859         switch (adev->asic_type) {
1860         case CHIP_TONGA:
1861         case CHIP_CARRIZO:
1862         case CHIP_FIJI:
1863         case CHIP_POLARIS10:
1864         case CHIP_POLARIS11:
1865         case CHIP_POLARIS12:
1866         case CHIP_VEGAM:
1867                 adev->gfx.mec.num_mec = 2;
1868                 break;
1869         case CHIP_TOPAZ:
1870         case CHIP_STONEY:
1871         default:
1872                 adev->gfx.mec.num_mec = 1;
1873                 break;
1874         }
1875
1876         adev->gfx.mec.num_pipe_per_mec = 4;
1877         adev->gfx.mec.num_queue_per_pipe = 8;
1878
1879         /* EOP Event */
1880         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
1881         if (r)
1882                 return r;
1883
1884         /* Privileged reg */
1885         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
1886                               &adev->gfx.priv_reg_irq);
1887         if (r)
1888                 return r;
1889
1890         /* Privileged inst */
1891         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
1892                               &adev->gfx.priv_inst_irq);
1893         if (r)
1894                 return r;
1895
1896         /* Add CP EDC/ECC irq  */
1897         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
1898                               &adev->gfx.cp_ecc_error_irq);
1899         if (r)
1900                 return r;
1901
1902         /* SQ interrupts. */
1903         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
1904                               &adev->gfx.sq_irq);
1905         if (r) {
1906                 DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
1907                 return r;
1908         }
1909
1910         INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
1911
1912         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1913
1914         gfx_v8_0_scratch_init(adev);
1915
1916         r = gfx_v8_0_init_microcode(adev);
1917         if (r) {
1918                 DRM_ERROR("Failed to load gfx firmware!\n");
1919                 return r;
1920         }
1921
1922         r = adev->gfx.rlc.funcs->init(adev);
1923         if (r) {
1924                 DRM_ERROR("Failed to init rlc BOs!\n");
1925                 return r;
1926         }
1927
1928         r = gfx_v8_0_mec_init(adev);
1929         if (r) {
1930                 DRM_ERROR("Failed to init MEC BOs!\n");
1931                 return r;
1932         }
1933
1934         /* set up the gfx ring */
1935         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1936                 ring = &adev->gfx.gfx_ring[i];
1937                 ring->ring_obj = NULL;
1938                 sprintf(ring->name, "gfx");
1939                 /* no gfx doorbells on iceland */
1940                 if (adev->asic_type != CHIP_TOPAZ) {
1941                         ring->use_doorbell = true;
1942                         ring->doorbell_index = adev->doorbell_index.gfx_ring0;
1943                 }
1944
1945                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
1946                                      AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
1947                 if (r)
1948                         return r;
1949         }
1950
1951
1952         /* set up the compute queues - allocate horizontally across pipes */
1953         ring_id = 0;
1954         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1955                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1956                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1957                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
1958                                         continue;
1959
1960                                 r = gfx_v8_0_compute_ring_init(adev,
1961                                                                 ring_id,
1962                                                                 i, k, j);
1963                                 if (r)
1964                                         return r;
1965
1966                                 ring_id++;
1967                         }
1968                 }
1969         }
1970
1971         r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
1972         if (r) {
1973                 DRM_ERROR("Failed to init KIQ BOs!\n");
1974                 return r;
1975         }
1976
1977         kiq = &adev->gfx.kiq;
1978         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
1979         if (r)
1980                 return r;
1981
1982         /* create MQD for all compute queues as well as KIQ for SRIOV case */
1983         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
1984         if (r)
1985                 return r;
1986
1987         adev->gfx.ce_ram_size = 0x8000;
1988
1989         r = gfx_v8_0_gpu_early_init(adev);
1990         if (r)
1991                 return r;
1992
1993         return 0;
1994 }
1995
1996 static int gfx_v8_0_sw_fini(void *handle)
1997 {
1998         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1999         int i;
2000
2001         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2002                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2003         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2004                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2005
2006         amdgpu_gfx_mqd_sw_fini(adev);
2007         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2008         amdgpu_gfx_kiq_fini(adev);
2009
2010         gfx_v8_0_mec_fini(adev);
2011         amdgpu_gfx_rlc_fini(adev);
2012         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2013                                 &adev->gfx.rlc.clear_state_gpu_addr,
2014                                 (void **)&adev->gfx.rlc.cs_ptr);
2015         if ((adev->asic_type == CHIP_CARRIZO) ||
2016             (adev->asic_type == CHIP_STONEY)) {
2017                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2018                                 &adev->gfx.rlc.cp_table_gpu_addr,
2019                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2020         }
2021         gfx_v8_0_free_microcode(adev);
2022
2023         return 0;
2024 }
2025
2026 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2027 {
2028         uint32_t *modearray, *mod2array;
2029         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2030         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2031         u32 reg_offset;
2032
2033         modearray = adev->gfx.config.tile_mode_array;
2034         mod2array = adev->gfx.config.macrotile_mode_array;
2035
2036         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2037                 modearray[reg_offset] = 0;
2038
2039         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2040                 mod2array[reg_offset] = 0;
2041
2042         switch (adev->asic_type) {
2043         case CHIP_TOPAZ:
2044                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2045                                 PIPE_CONFIG(ADDR_SURF_P2) |
2046                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2047                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2048                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2049                                 PIPE_CONFIG(ADDR_SURF_P2) |
2050                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2051                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2052                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2053                                 PIPE_CONFIG(ADDR_SURF_P2) |
2054                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2055                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2056                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2057                                 PIPE_CONFIG(ADDR_SURF_P2) |
2058                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2059                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2060                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2061                                 PIPE_CONFIG(ADDR_SURF_P2) |
2062                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2063                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2064                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2065                                 PIPE_CONFIG(ADDR_SURF_P2) |
2066                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2067                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2068                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2069                                 PIPE_CONFIG(ADDR_SURF_P2) |
2070                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2071                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2072                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2073                                 PIPE_CONFIG(ADDR_SURF_P2));
2074                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2075                                 PIPE_CONFIG(ADDR_SURF_P2) |
2076                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2077                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2078                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2079                                  PIPE_CONFIG(ADDR_SURF_P2) |
2080                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2081                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2082                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2083                                  PIPE_CONFIG(ADDR_SURF_P2) |
2084                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2085                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2086                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2087                                  PIPE_CONFIG(ADDR_SURF_P2) |
2088                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2089                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2090                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2091                                  PIPE_CONFIG(ADDR_SURF_P2) |
2092                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2093                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2094                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2095                                  PIPE_CONFIG(ADDR_SURF_P2) |
2096                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2097                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2098                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2099                                  PIPE_CONFIG(ADDR_SURF_P2) |
2100                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2101                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2102                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2103                                  PIPE_CONFIG(ADDR_SURF_P2) |
2104                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2105                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2106                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2107                                  PIPE_CONFIG(ADDR_SURF_P2) |
2108                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2109                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2110                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2111                                  PIPE_CONFIG(ADDR_SURF_P2) |
2112                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2113                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2114                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2115                                  PIPE_CONFIG(ADDR_SURF_P2) |
2116                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2117                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2118                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2119                                  PIPE_CONFIG(ADDR_SURF_P2) |
2120                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2121                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2122                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2123                                  PIPE_CONFIG(ADDR_SURF_P2) |
2124                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2125                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2126                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2127                                  PIPE_CONFIG(ADDR_SURF_P2) |
2128                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2129                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2130                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2131                                  PIPE_CONFIG(ADDR_SURF_P2) |
2132                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2133                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2134                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2135                                  PIPE_CONFIG(ADDR_SURF_P2) |
2136                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2137                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2138                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2139                                  PIPE_CONFIG(ADDR_SURF_P2) |
2140                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2141                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2142                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2143                                  PIPE_CONFIG(ADDR_SURF_P2) |
2144                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2145                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2146
2147                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2148                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2149                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2150                                 NUM_BANKS(ADDR_SURF_8_BANK));
2151                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2152                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2153                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2154                                 NUM_BANKS(ADDR_SURF_8_BANK));
2155                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2156                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2157                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2158                                 NUM_BANKS(ADDR_SURF_8_BANK));
2159                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2160                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2161                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2162                                 NUM_BANKS(ADDR_SURF_8_BANK));
2163                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2164                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2165                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2166                                 NUM_BANKS(ADDR_SURF_8_BANK));
2167                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2168                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2169                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2170                                 NUM_BANKS(ADDR_SURF_8_BANK));
2171                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2172                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2173                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2174                                 NUM_BANKS(ADDR_SURF_8_BANK));
2175                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2176                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2177                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2178                                 NUM_BANKS(ADDR_SURF_16_BANK));
2179                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2180                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2181                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2182                                 NUM_BANKS(ADDR_SURF_16_BANK));
2183                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2184                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2185                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2186                                  NUM_BANKS(ADDR_SURF_16_BANK));
2187                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2188                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2189                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2190                                  NUM_BANKS(ADDR_SURF_16_BANK));
2191                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2192                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2193                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2194                                  NUM_BANKS(ADDR_SURF_16_BANK));
2195                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2196                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2197                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2198                                  NUM_BANKS(ADDR_SURF_16_BANK));
2199                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2200                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2201                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2202                                  NUM_BANKS(ADDR_SURF_8_BANK));
2203
2204                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2205                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2206                             reg_offset != 23)
2207                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2208
2209                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2210                         if (reg_offset != 7)
2211                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2212
2213                 break;
2214         case CHIP_FIJI:
2215         case CHIP_VEGAM:
2216                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2217                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2218                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2219                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2220                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2221                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2222                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2223                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2224                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2225                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2226                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2227                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2228                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2229                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2230                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2231                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2232                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2233                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2234                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2235                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2236                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2237                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2238                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2239                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2240                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2241                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2242                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2243                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2244                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2245                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2246                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2247                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2248                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2249                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2250                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2251                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2252                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2253                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2254                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2255                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2256                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2257                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2258                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2259                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2260                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2261                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2262                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2263                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2264                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2265                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2266                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2267                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2268                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2269                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2270                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2271                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2272                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2273                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2274                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2275                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2276                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2277                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2278                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2279                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2280                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2281                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2282                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2283                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2284                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2285                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2286                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2287                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2288                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2289                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2290                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2291                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2292                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2293                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2294                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2295                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2296                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2297                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2298                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2299                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2300                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2301                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2302                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2303                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2304                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2305                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2306                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2307                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2308                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2309                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2310                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2311                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2312                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2313                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2314                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2315                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2316                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2317                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2318                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2319                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2320                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2321                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2322                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2323                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2324                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2325                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2326                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2327                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2328                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2329                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2330                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2331                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2332                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2333                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2334                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2335                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2336                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2337                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2338
2339                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2340                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2341                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2342                                 NUM_BANKS(ADDR_SURF_8_BANK));
2343                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2344                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2345                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2346                                 NUM_BANKS(ADDR_SURF_8_BANK));
2347                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2348                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2349                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2350                                 NUM_BANKS(ADDR_SURF_8_BANK));
2351                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2352                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2353                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2354                                 NUM_BANKS(ADDR_SURF_8_BANK));
2355                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2356                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2357                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2358                                 NUM_BANKS(ADDR_SURF_8_BANK));
2359                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2360                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2361                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2362                                 NUM_BANKS(ADDR_SURF_8_BANK));
2363                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2364                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2365                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2366                                 NUM_BANKS(ADDR_SURF_8_BANK));
2367                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2368                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2369                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2370                                 NUM_BANKS(ADDR_SURF_8_BANK));
2371                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2372                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2373                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2374                                 NUM_BANKS(ADDR_SURF_8_BANK));
2375                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2376                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2377                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2378                                  NUM_BANKS(ADDR_SURF_8_BANK));
2379                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2380                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2381                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2382                                  NUM_BANKS(ADDR_SURF_8_BANK));
2383                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2384                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2385                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2386                                  NUM_BANKS(ADDR_SURF_8_BANK));
2387                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2388                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2389                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2390                                  NUM_BANKS(ADDR_SURF_8_BANK));
2391                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2392                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2393                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2394                                  NUM_BANKS(ADDR_SURF_4_BANK));
2395
2396                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2397                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2398
2399                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2400                         if (reg_offset != 7)
2401                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2402
2403                 break;
2404         case CHIP_TONGA:
2405                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2406                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2407                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2408                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2409                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2410                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2411                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2412                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2413                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2414                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2415                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2416                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2417                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2418                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2419                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2420                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2421                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2422                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2423                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2424                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2425                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2426                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2427                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2428                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2429                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2430                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2431                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2432                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2433                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2434                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2435                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2436                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2437                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2438                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2439                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2440                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2441                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2442                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2443                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2444                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2445                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2446                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2447                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2448                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2449                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2450                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2451                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2452                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2453                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2454                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2455                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2456                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2457                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2458                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2459                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2460                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2461                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2462                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2463                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2464                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2465                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2466                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2467                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2468                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2469                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2470                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2471                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2472                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2473                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2474                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2475                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2476                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2477                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2478                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2479                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2480                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2481                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2482                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2483                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2484                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2485                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2486                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2487                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2488                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2489                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2490                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2491                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2492                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2493                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2494                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2495                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2496                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2497                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2498                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2499                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2500                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2501                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2502                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2503                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2504                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2505                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2506                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2507                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2508                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2509                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2510                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2511                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2512                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2513                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2514                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2515                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2516                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2517                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2518                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2519                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2520                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2521                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2522                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2523                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2524                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2525                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2526                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2527
2528                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2529                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2530                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2531                                 NUM_BANKS(ADDR_SURF_16_BANK));
2532                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2533                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2534                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2535                                 NUM_BANKS(ADDR_SURF_16_BANK));
2536                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2537                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2538                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2539                                 NUM_BANKS(ADDR_SURF_16_BANK));
2540                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2541                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2542                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2543                                 NUM_BANKS(ADDR_SURF_16_BANK));
2544                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2545                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2546                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2547                                 NUM_BANKS(ADDR_SURF_16_BANK));
2548                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2549                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2550                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2551                                 NUM_BANKS(ADDR_SURF_16_BANK));
2552                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2553                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2554                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2555                                 NUM_BANKS(ADDR_SURF_16_BANK));
2556                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2557                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2558                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2559                                 NUM_BANKS(ADDR_SURF_16_BANK));
2560                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2561                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2562                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2563                                 NUM_BANKS(ADDR_SURF_16_BANK));
2564                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2565                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2566                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2567                                  NUM_BANKS(ADDR_SURF_16_BANK));
2568                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2569                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2570                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2571                                  NUM_BANKS(ADDR_SURF_16_BANK));
2572                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2573                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2574                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2575                                  NUM_BANKS(ADDR_SURF_8_BANK));
2576                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2577                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2578                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2579                                  NUM_BANKS(ADDR_SURF_4_BANK));
2580                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2581                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2582                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2583                                  NUM_BANKS(ADDR_SURF_4_BANK));
2584
2585                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2586                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2587
2588                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2589                         if (reg_offset != 7)
2590                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2591
2592                 break;
2593         case CHIP_POLARIS11:
2594         case CHIP_POLARIS12:
2595                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2596                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2597                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2598                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2599                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2600                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2601                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2602                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2603                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2604                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2605                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2606                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2607                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2608                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2609                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2610                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2611                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2612                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2613                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2614                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2615                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2616                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2617                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2618                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2619                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2620                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2621                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2622                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2623                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2624                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2625                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2626                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2627                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2628                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2629                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2630                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2631                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2632                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2633                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2634                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2635                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2636                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2637                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2638                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2639                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2640                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2641                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2642                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2643                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2644                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2645                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2646                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2647                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2648                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2649                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2650                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2651                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2652                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2653                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2654                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2655                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2656                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2657                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2658                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2659                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2660                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2661                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2662                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2663                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2664                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2665                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2666                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2667                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2668                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2669                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2670                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2671                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2672                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2673                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2674                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2675                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2676                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2677                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2678                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2680                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2681                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2682                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2684                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2685                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2686                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2688                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2689                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2690                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2691                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2692                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2693                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2694                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2695                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2696                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2697                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2698                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2699                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2700                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2701                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2702                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2703                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2704                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2705                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2706                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2708                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2709                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2710                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2711                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2712                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2713                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2714                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2715                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2716                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2717
2718                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2719                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2720                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2721                                 NUM_BANKS(ADDR_SURF_16_BANK));
2722
2723                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2724                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2725                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2726                                 NUM_BANKS(ADDR_SURF_16_BANK));
2727
2728                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2729                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2730                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2731                                 NUM_BANKS(ADDR_SURF_16_BANK));
2732
2733                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2734                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2735                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2736                                 NUM_BANKS(ADDR_SURF_16_BANK));
2737
2738                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2739                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2740                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2741                                 NUM_BANKS(ADDR_SURF_16_BANK));
2742
2743                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2744                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2745                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2746                                 NUM_BANKS(ADDR_SURF_16_BANK));
2747
2748                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2749                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2750                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2751                                 NUM_BANKS(ADDR_SURF_16_BANK));
2752
2753                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2754                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2755                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2756                                 NUM_BANKS(ADDR_SURF_16_BANK));
2757
2758                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2759                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2760                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2761                                 NUM_BANKS(ADDR_SURF_16_BANK));
2762
2763                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2764                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2765                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2766                                 NUM_BANKS(ADDR_SURF_16_BANK));
2767
2768                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2769                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2770                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2771                                 NUM_BANKS(ADDR_SURF_16_BANK));
2772
2773                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2774                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2775                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2776                                 NUM_BANKS(ADDR_SURF_16_BANK));
2777
2778                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2779                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2780                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2781                                 NUM_BANKS(ADDR_SURF_8_BANK));
2782
2783                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2784                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2785                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2786                                 NUM_BANKS(ADDR_SURF_4_BANK));
2787
2788                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2789                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2790
2791                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2792                         if (reg_offset != 7)
2793                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2794
2795                 break;
2796         case CHIP_POLARIS10:
2797                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2798                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2799                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2800                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2801                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2802                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2803                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2804                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2805                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2806                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2807                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2808                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2809                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2810                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2811                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2812                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2813                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2814                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2815                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2816                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2817                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2818                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2819                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2820                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2821                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2822                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2823                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2824                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2825                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2826                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2827                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2828                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2829                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2830                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2831                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2832                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2833                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2834                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2835                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2836                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2837                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2838                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2839                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2840                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2841                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2842                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2843                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2844                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2845                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2846                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2847                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2848                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2849                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2850                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2851                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2852                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2853                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2854                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2855                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2856                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2857                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2858                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2859                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2860                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2861                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2862                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2863                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2864                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2865                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2866                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2867                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2868                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2869                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2870                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2871                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2872                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2873                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2874                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2875                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2876                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2877                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2878                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2879                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2880                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2881                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2882                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2883                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2884                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2885                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2886                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2887                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2888                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2889                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2890                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2891                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2892                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2893                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2894                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2895                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2896                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2897                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2898                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2899                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2900                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2901                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2902                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2903                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2904                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2905                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2906                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2907                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2908                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2909                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2910                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2911                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2912                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2913                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2914                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2915                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2916                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2917                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2918                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2919
2920                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2921                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2922                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2923                                 NUM_BANKS(ADDR_SURF_16_BANK));
2924
2925                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2926                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2927                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2928                                 NUM_BANKS(ADDR_SURF_16_BANK));
2929
2930                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2931                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2932                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2933                                 NUM_BANKS(ADDR_SURF_16_BANK));
2934
2935                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2936                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2937                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2938                                 NUM_BANKS(ADDR_SURF_16_BANK));
2939
2940                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2941                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2942                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2943                                 NUM_BANKS(ADDR_SURF_16_BANK));
2944
2945                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2946                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2947                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2948                                 NUM_BANKS(ADDR_SURF_16_BANK));
2949
2950                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2951                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2952                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2953                                 NUM_BANKS(ADDR_SURF_16_BANK));
2954
2955                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2956                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2957                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2958                                 NUM_BANKS(ADDR_SURF_16_BANK));
2959
2960                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2961                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2962                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2963                                 NUM_BANKS(ADDR_SURF_16_BANK));
2964
2965                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2966                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2967                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2968                                 NUM_BANKS(ADDR_SURF_16_BANK));
2969
2970                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2971                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2972                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2973                                 NUM_BANKS(ADDR_SURF_16_BANK));
2974
2975                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2976                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2977                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2978                                 NUM_BANKS(ADDR_SURF_8_BANK));
2979
2980                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2981                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2982                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2983                                 NUM_BANKS(ADDR_SURF_4_BANK));
2984
2985                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2986                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2987                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2988                                 NUM_BANKS(ADDR_SURF_4_BANK));
2989
2990                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2991                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2992
2993                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2994                         if (reg_offset != 7)
2995                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2996
2997                 break;
2998         case CHIP_STONEY:
2999                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3000                                 PIPE_CONFIG(ADDR_SURF_P2) |
3001                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3002                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3003                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3004                                 PIPE_CONFIG(ADDR_SURF_P2) |
3005                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3006                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3007                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3008                                 PIPE_CONFIG(ADDR_SURF_P2) |
3009                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3010                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3011                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3012                                 PIPE_CONFIG(ADDR_SURF_P2) |
3013                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3014                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3015                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3016                                 PIPE_CONFIG(ADDR_SURF_P2) |
3017                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3018                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3019                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3020                                 PIPE_CONFIG(ADDR_SURF_P2) |
3021                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3022                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3023                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3024                                 PIPE_CONFIG(ADDR_SURF_P2) |
3025                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3026                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3027                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3028                                 PIPE_CONFIG(ADDR_SURF_P2));
3029                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3030                                 PIPE_CONFIG(ADDR_SURF_P2) |
3031                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3032                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3033                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3034                                  PIPE_CONFIG(ADDR_SURF_P2) |
3035                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3036                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3037                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3038                                  PIPE_CONFIG(ADDR_SURF_P2) |
3039                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3040                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3041                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3042                                  PIPE_CONFIG(ADDR_SURF_P2) |
3043                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3044                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3045                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3046                                  PIPE_CONFIG(ADDR_SURF_P2) |
3047                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3048                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3049                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3050                                  PIPE_CONFIG(ADDR_SURF_P2) |
3051                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3052                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3053                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3054                                  PIPE_CONFIG(ADDR_SURF_P2) |
3055                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3056                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3057                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3058                                  PIPE_CONFIG(ADDR_SURF_P2) |
3059                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3060                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3061                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3062                                  PIPE_CONFIG(ADDR_SURF_P2) |
3063                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3064                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3065                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3066                                  PIPE_CONFIG(ADDR_SURF_P2) |
3067                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3068                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3069                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3070                                  PIPE_CONFIG(ADDR_SURF_P2) |
3071                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3072                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3073                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3074                                  PIPE_CONFIG(ADDR_SURF_P2) |
3075                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3076                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3077                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3078                                  PIPE_CONFIG(ADDR_SURF_P2) |
3079                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3080                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3081                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3082                                  PIPE_CONFIG(ADDR_SURF_P2) |
3083                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3084                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3085                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3086                                  PIPE_CONFIG(ADDR_SURF_P2) |
3087                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3088                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3089                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3090                                  PIPE_CONFIG(ADDR_SURF_P2) |
3091                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3092                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3093                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3094                                  PIPE_CONFIG(ADDR_SURF_P2) |
3095                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3096                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3097                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3098                                  PIPE_CONFIG(ADDR_SURF_P2) |
3099                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3100                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3101
3102                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3103                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3104                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3105                                 NUM_BANKS(ADDR_SURF_8_BANK));
3106                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3107                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3108                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3109                                 NUM_BANKS(ADDR_SURF_8_BANK));
3110                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3111                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3112                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3113                                 NUM_BANKS(ADDR_SURF_8_BANK));
3114                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3115                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3116                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3117                                 NUM_BANKS(ADDR_SURF_8_BANK));
3118                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3119                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3120                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3121                                 NUM_BANKS(ADDR_SURF_8_BANK));
3122                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3123                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3124                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3125                                 NUM_BANKS(ADDR_SURF_8_BANK));
3126                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3127                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3128                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3129                                 NUM_BANKS(ADDR_SURF_8_BANK));
3130                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3131                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3132                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3133                                 NUM_BANKS(ADDR_SURF_16_BANK));
3134                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3135                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3136                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3137                                 NUM_BANKS(ADDR_SURF_16_BANK));
3138                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3139                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3140                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3141                                  NUM_BANKS(ADDR_SURF_16_BANK));
3142                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3143                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3144                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3145                                  NUM_BANKS(ADDR_SURF_16_BANK));
3146                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3147                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3148                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3149                                  NUM_BANKS(ADDR_SURF_16_BANK));
3150                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3151                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3152                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3153                                  NUM_BANKS(ADDR_SURF_16_BANK));
3154                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3155                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3156                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3157                                  NUM_BANKS(ADDR_SURF_8_BANK));
3158
3159                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3160                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3161                             reg_offset != 23)
3162                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3163
3164                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3165                         if (reg_offset != 7)
3166                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3167
3168                 break;
3169         default:
3170                 dev_warn(adev->dev,
3171                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3172                          adev->asic_type);
3173                 /* fall through */
3174
3175         case CHIP_CARRIZO:
3176                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3177                                 PIPE_CONFIG(ADDR_SURF_P2) |
3178                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3179                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3180                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3181                                 PIPE_CONFIG(ADDR_SURF_P2) |
3182                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3183                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3184                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3185                                 PIPE_CONFIG(ADDR_SURF_P2) |
3186                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3187                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3188                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3189                                 PIPE_CONFIG(ADDR_SURF_P2) |
3190                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3191                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3192                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3193                                 PIPE_CONFIG(ADDR_SURF_P2) |
3194                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3195                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3196                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3197                                 PIPE_CONFIG(ADDR_SURF_P2) |
3198                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3199                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3200                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3201                                 PIPE_CONFIG(ADDR_SURF_P2) |
3202                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3203                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3204                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3205                                 PIPE_CONFIG(ADDR_SURF_P2));
3206                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3207                                 PIPE_CONFIG(ADDR_SURF_P2) |
3208                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3209                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3210                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3211                                  PIPE_CONFIG(ADDR_SURF_P2) |
3212                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3213                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3214                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3215                                  PIPE_CONFIG(ADDR_SURF_P2) |
3216                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3217                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3218                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3219                                  PIPE_CONFIG(ADDR_SURF_P2) |
3220                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3221                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3222                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3223                                  PIPE_CONFIG(ADDR_SURF_P2) |
3224                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3225                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3226                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3227                                  PIPE_CONFIG(ADDR_SURF_P2) |
3228                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3229                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3230                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3231                                  PIPE_CONFIG(ADDR_SURF_P2) |
3232                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3233                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3234                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3235                                  PIPE_CONFIG(ADDR_SURF_P2) |
3236                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3237                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3238                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3239                                  PIPE_CONFIG(ADDR_SURF_P2) |
3240                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3241                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3242                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3243                                  PIPE_CONFIG(ADDR_SURF_P2) |
3244                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3245                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3246                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3247                                  PIPE_CONFIG(ADDR_SURF_P2) |
3248                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3249                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3250                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3251                                  PIPE_CONFIG(ADDR_SURF_P2) |
3252                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3253                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3254                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3255                                  PIPE_CONFIG(ADDR_SURF_P2) |
3256                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3257                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3258                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3259                                  PIPE_CONFIG(ADDR_SURF_P2) |
3260                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3261                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3262                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3263                                  PIPE_CONFIG(ADDR_SURF_P2) |
3264                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3265                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3266                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3267                                  PIPE_CONFIG(ADDR_SURF_P2) |
3268                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3269                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3270                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3271                                  PIPE_CONFIG(ADDR_SURF_P2) |
3272                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3273                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3274                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3275                                  PIPE_CONFIG(ADDR_SURF_P2) |
3276                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3277                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3278
3279                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3280                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3281                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3282                                 NUM_BANKS(ADDR_SURF_8_BANK));
3283                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3284                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3285                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3286                                 NUM_BANKS(ADDR_SURF_8_BANK));
3287                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3288                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3289                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3290                                 NUM_BANKS(ADDR_SURF_8_BANK));
3291                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3292                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3293                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3294                                 NUM_BANKS(ADDR_SURF_8_BANK));
3295                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3296                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3297                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3298                                 NUM_BANKS(ADDR_SURF_8_BANK));
3299                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3300                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3301                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3302                                 NUM_BANKS(ADDR_SURF_8_BANK));
3303                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3304                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3305                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3306                                 NUM_BANKS(ADDR_SURF_8_BANK));
3307                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3308                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3309                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3310                                 NUM_BANKS(ADDR_SURF_16_BANK));
3311                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3312                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3313                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3314                                 NUM_BANKS(ADDR_SURF_16_BANK));
3315                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3316                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3317                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3318                                  NUM_BANKS(ADDR_SURF_16_BANK));
3319                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3320                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3321                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3322                                  NUM_BANKS(ADDR_SURF_16_BANK));
3323                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3324                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3325                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3326                                  NUM_BANKS(ADDR_SURF_16_BANK));
3327                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3328                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3329                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3330                                  NUM_BANKS(ADDR_SURF_16_BANK));
3331                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3332                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3333                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3334                                  NUM_BANKS(ADDR_SURF_8_BANK));
3335
3336                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3337                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3338                             reg_offset != 23)
3339                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3340
3341                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3342                         if (reg_offset != 7)
3343                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3344
3345                 break;
3346         }
3347 }
3348
3349 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3350                                   u32 se_num, u32 sh_num, u32 instance)
3351 {
3352         u32 data;
3353
3354         if (instance == 0xffffffff)
3355                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3356         else
3357                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3358
3359         if (se_num == 0xffffffff)
3360                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3361         else
3362                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3363
3364         if (sh_num == 0xffffffff)
3365                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3366         else
3367                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3368
3369         WREG32(mmGRBM_GFX_INDEX, data);
3370 }
3371
3372 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3373                                   u32 me, u32 pipe, u32 q, u32 vm)
3374 {
3375         vi_srbm_select(adev, me, pipe, q, vm);
3376 }
3377
3378 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3379 {
3380         u32 data, mask;
3381
3382         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3383                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3384
3385         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3386
3387         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3388                                          adev->gfx.config.max_sh_per_se);
3389
3390         return (~data) & mask;
3391 }
3392
3393 static void
3394 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3395 {
3396         switch (adev->asic_type) {
3397         case CHIP_FIJI:
3398         case CHIP_VEGAM:
3399                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3400                           RB_XSEL2(1) | PKR_MAP(2) |
3401                           PKR_XSEL(1) | PKR_YSEL(1) |
3402                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3403                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3404                            SE_PAIR_YSEL(2);
3405                 break;
3406         case CHIP_TONGA:
3407         case CHIP_POLARIS10:
3408                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3409                           SE_XSEL(1) | SE_YSEL(1);
3410                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3411                            SE_PAIR_YSEL(2);
3412                 break;
3413         case CHIP_TOPAZ:
3414         case CHIP_CARRIZO:
3415                 *rconf |= RB_MAP_PKR0(2);
3416                 *rconf1 |= 0x0;
3417                 break;
3418         case CHIP_POLARIS11:
3419         case CHIP_POLARIS12:
3420                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3421                           SE_XSEL(1) | SE_YSEL(1);
3422                 *rconf1 |= 0x0;
3423                 break;
3424         case CHIP_STONEY:
3425                 *rconf |= 0x0;
3426                 *rconf1 |= 0x0;
3427                 break;
3428         default:
3429                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3430                 break;
3431         }
3432 }
3433
3434 static void
3435 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3436                                         u32 raster_config, u32 raster_config_1,
3437                                         unsigned rb_mask, unsigned num_rb)
3438 {
3439         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3440         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3441         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3442         unsigned rb_per_se = num_rb / num_se;
3443         unsigned se_mask[4];
3444         unsigned se;
3445
3446         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3447         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3448         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3449         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3450
3451         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3452         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3453         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3454
3455         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3456                              (!se_mask[2] && !se_mask[3]))) {
3457                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3458
3459                 if (!se_mask[0] && !se_mask[1]) {
3460                         raster_config_1 |=
3461                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3462                 } else {
3463                         raster_config_1 |=
3464                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3465                 }
3466         }
3467
3468         for (se = 0; se < num_se; se++) {
3469                 unsigned raster_config_se = raster_config;
3470                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3471                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3472                 int idx = (se / 2) * 2;
3473
3474                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3475                         raster_config_se &= ~SE_MAP_MASK;
3476
3477                         if (!se_mask[idx]) {
3478                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3479                         } else {
3480                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3481                         }
3482                 }
3483
3484                 pkr0_mask &= rb_mask;
3485                 pkr1_mask &= rb_mask;
3486                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3487                         raster_config_se &= ~PKR_MAP_MASK;
3488
3489                         if (!pkr0_mask) {
3490                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3491                         } else {
3492                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3493                         }
3494                 }
3495
3496                 if (rb_per_se >= 2) {
3497                         unsigned rb0_mask = 1 << (se * rb_per_se);
3498                         unsigned rb1_mask = rb0_mask << 1;
3499
3500                         rb0_mask &= rb_mask;
3501                         rb1_mask &= rb_mask;
3502                         if (!rb0_mask || !rb1_mask) {
3503                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3504
3505                                 if (!rb0_mask) {
3506                                         raster_config_se |=
3507                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3508                                 } else {
3509                                         raster_config_se |=
3510                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3511                                 }
3512                         }
3513
3514                         if (rb_per_se > 2) {
3515                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3516                                 rb1_mask = rb0_mask << 1;
3517                                 rb0_mask &= rb_mask;
3518                                 rb1_mask &= rb_mask;
3519                                 if (!rb0_mask || !rb1_mask) {
3520                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3521
3522                                         if (!rb0_mask) {
3523                                                 raster_config_se |=
3524                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3525                                         } else {
3526                                                 raster_config_se |=
3527                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3528                                         }
3529                                 }
3530                         }
3531                 }
3532
3533                 /* GRBM_GFX_INDEX has a different offset on VI */
3534                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3535                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3536                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3537         }
3538
3539         /* GRBM_GFX_INDEX has a different offset on VI */
3540         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3541 }
3542
3543 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3544 {
3545         int i, j;
3546         u32 data;
3547         u32 raster_config = 0, raster_config_1 = 0;
3548         u32 active_rbs = 0;
3549         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3550                                         adev->gfx.config.max_sh_per_se;
3551         unsigned num_rb_pipes;
3552
3553         mutex_lock(&adev->grbm_idx_mutex);
3554         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3555                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3556                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3557                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3558                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3559                                                rb_bitmap_width_per_sh);
3560                 }
3561         }
3562         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3563
3564         adev->gfx.config.backend_enable_mask = active_rbs;
3565         adev->gfx.config.num_rbs = hweight32(active_rbs);
3566
3567         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3568                              adev->gfx.config.max_shader_engines, 16);
3569
3570         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3571
3572         if (!adev->gfx.config.backend_enable_mask ||
3573                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3574                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3575                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3576         } else {
3577                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3578                                                         adev->gfx.config.backend_enable_mask,
3579                                                         num_rb_pipes);
3580         }
3581
3582         /* cache the values for userspace */
3583         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3584                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3585                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3586                         adev->gfx.config.rb_config[i][j].rb_backend_disable =
3587                                 RREG32(mmCC_RB_BACKEND_DISABLE);
3588                         adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3589                                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3590                         adev->gfx.config.rb_config[i][j].raster_config =
3591                                 RREG32(mmPA_SC_RASTER_CONFIG);
3592                         adev->gfx.config.rb_config[i][j].raster_config_1 =
3593                                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3594                 }
3595         }
3596         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3597         mutex_unlock(&adev->grbm_idx_mutex);
3598 }
3599
3600 /**
3601  * gfx_v8_0_init_compute_vmid - gart enable
3602  *
3603  * @adev: amdgpu_device pointer
3604  *
3605  * Initialize compute vmid sh_mem registers
3606  *
3607  */
3608 #define DEFAULT_SH_MEM_BASES    (0x6000)
3609 #define FIRST_COMPUTE_VMID      (8)
3610 #define LAST_COMPUTE_VMID       (16)
3611 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3612 {
3613         int i;
3614         uint32_t sh_mem_config;
3615         uint32_t sh_mem_bases;
3616
3617         /*
3618          * Configure apertures:
3619          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3620          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3621          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3622          */
3623         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3624
3625         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3626                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3627                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3628                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3629                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3630                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3631
3632         mutex_lock(&adev->srbm_mutex);
3633         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3634                 vi_srbm_select(adev, 0, 0, 0, i);
3635                 /* CP and shaders */
3636                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3637                 WREG32(mmSH_MEM_APE1_BASE, 1);
3638                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3639                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3640         }
3641         vi_srbm_select(adev, 0, 0, 0, 0);
3642         mutex_unlock(&adev->srbm_mutex);
3643
3644         /* Initialize all compute VMIDs to have no GDS, GWS, or OA
3645            acccess. These should be enabled by FW for target VMIDs. */
3646         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3647                 WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
3648                 WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
3649                 WREG32(amdgpu_gds_reg_offset[i].gws, 0);
3650                 WREG32(amdgpu_gds_reg_offset[i].oa, 0);
3651         }
3652 }
3653
3654 static void gfx_v8_0_init_gds_vmid(struct amdgpu_device *adev)
3655 {
3656         int vmid;
3657
3658         /*
3659          * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
3660          * access. Compute VMIDs should be enabled by FW for target VMIDs,
3661          * the driver can enable them for graphics. VMID0 should maintain
3662          * access so that HWS firmware can save/restore entries.
3663          */
3664         for (vmid = 1; vmid < 16; vmid++) {
3665                 WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0);
3666                 WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0);
3667                 WREG32(amdgpu_gds_reg_offset[vmid].gws, 0);
3668                 WREG32(amdgpu_gds_reg_offset[vmid].oa, 0);
3669         }
3670 }
3671
3672 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3673 {
3674         switch (adev->asic_type) {
3675         default:
3676                 adev->gfx.config.double_offchip_lds_buf = 1;
3677                 break;
3678         case CHIP_CARRIZO:
3679         case CHIP_STONEY:
3680                 adev->gfx.config.double_offchip_lds_buf = 0;
3681                 break;
3682         }
3683 }
3684
3685 static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
3686 {
3687         u32 tmp, sh_static_mem_cfg;
3688         int i;
3689
3690         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3691         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3692         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3693         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3694
3695         gfx_v8_0_tiling_mode_table_init(adev);
3696         gfx_v8_0_setup_rb(adev);
3697         gfx_v8_0_get_cu_info(adev);
3698         gfx_v8_0_config_init(adev);
3699
3700         /* XXX SH_MEM regs */
3701         /* where to put LDS, scratch, GPUVM in FSA64 space */
3702         sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3703                                    SWIZZLE_ENABLE, 1);
3704         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3705                                    ELEMENT_SIZE, 1);
3706         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3707                                    INDEX_STRIDE, 3);
3708         WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3709
3710         mutex_lock(&adev->srbm_mutex);
3711         for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3712                 vi_srbm_select(adev, 0, 0, 0, i);
3713                 /* CP and shaders */
3714                 if (i == 0) {
3715                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3716                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3717                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3718                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3719                         WREG32(mmSH_MEM_CONFIG, tmp);
3720                         WREG32(mmSH_MEM_BASES, 0);
3721                 } else {
3722                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3723                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3724                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3725                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3726                         WREG32(mmSH_MEM_CONFIG, tmp);
3727                         tmp = adev->gmc.shared_aperture_start >> 48;
3728                         WREG32(mmSH_MEM_BASES, tmp);
3729                 }
3730
3731                 WREG32(mmSH_MEM_APE1_BASE, 1);
3732                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3733         }
3734         vi_srbm_select(adev, 0, 0, 0, 0);
3735         mutex_unlock(&adev->srbm_mutex);
3736
3737         gfx_v8_0_init_compute_vmid(adev);
3738         gfx_v8_0_init_gds_vmid(adev);
3739
3740         mutex_lock(&adev->grbm_idx_mutex);
3741         /*
3742          * making sure that the following register writes will be broadcasted
3743          * to all the shaders
3744          */
3745         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3746
3747         WREG32(mmPA_SC_FIFO_SIZE,
3748                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3749                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3750                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3751                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3752                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3753                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3754                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3755                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3756
3757         tmp = RREG32(mmSPI_ARB_PRIORITY);
3758         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3759         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3760         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3761         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3762         WREG32(mmSPI_ARB_PRIORITY, tmp);
3763
3764         mutex_unlock(&adev->grbm_idx_mutex);
3765
3766 }
3767
3768 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3769 {
3770         u32 i, j, k;
3771         u32 mask;
3772
3773         mutex_lock(&adev->grbm_idx_mutex);
3774         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3775                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3776                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3777                         for (k = 0; k < adev->usec_timeout; k++) {
3778                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3779                                         break;
3780                                 udelay(1);
3781                         }
3782                         if (k == adev->usec_timeout) {
3783                                 gfx_v8_0_select_se_sh(adev, 0xffffffff,
3784                                                       0xffffffff, 0xffffffff);
3785                                 mutex_unlock(&adev->grbm_idx_mutex);
3786                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3787                                          i, j);
3788                                 return;
3789                         }
3790                 }
3791         }
3792         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3793         mutex_unlock(&adev->grbm_idx_mutex);
3794
3795         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3796                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3797                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3798                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3799         for (k = 0; k < adev->usec_timeout; k++) {
3800                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3801                         break;
3802                 udelay(1);
3803         }
3804 }
3805
3806 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3807                                                bool enable)
3808 {
3809         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3810
3811         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3812         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3813         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3814         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3815
3816         WREG32(mmCP_INT_CNTL_RING0, tmp);
3817 }
3818
3819 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3820 {
3821         adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
3822         /* csib */
3823         WREG32(mmRLC_CSIB_ADDR_HI,
3824                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3825         WREG32(mmRLC_CSIB_ADDR_LO,
3826                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3827         WREG32(mmRLC_CSIB_LENGTH,
3828                         adev->gfx.rlc.clear_state_size);
3829 }
3830
3831 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3832                                 int ind_offset,
3833                                 int list_size,
3834                                 int *unique_indices,
3835                                 int *indices_count,
3836                                 int max_indices,
3837                                 int *ind_start_offsets,
3838                                 int *offset_count,
3839                                 int max_offset)
3840 {
3841         int indices;
3842         bool new_entry = true;
3843
3844         for (; ind_offset < list_size; ind_offset++) {
3845
3846                 if (new_entry) {
3847                         new_entry = false;
3848                         ind_start_offsets[*offset_count] = ind_offset;
3849                         *offset_count = *offset_count + 1;
3850                         BUG_ON(*offset_count >= max_offset);
3851                 }
3852
3853                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3854                         new_entry = true;
3855                         continue;
3856                 }
3857
3858                 ind_offset += 2;
3859
3860                 /* look for the matching indice */
3861                 for (indices = 0;
3862                         indices < *indices_count;
3863                         indices++) {
3864                         if (unique_indices[indices] ==
3865                                 register_list_format[ind_offset])
3866                                 break;
3867                 }
3868
3869                 if (indices >= *indices_count) {
3870                         unique_indices[*indices_count] =
3871                                 register_list_format[ind_offset];
3872                         indices = *indices_count;
3873                         *indices_count = *indices_count + 1;
3874                         BUG_ON(*indices_count >= max_indices);
3875                 }
3876
3877                 register_list_format[ind_offset] = indices;
3878         }
3879 }
3880
3881 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3882 {
3883         int i, temp, data;
3884         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3885         int indices_count = 0;
3886         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3887         int offset_count = 0;
3888
3889         int list_size;
3890         unsigned int *register_list_format =
3891                 kmemdup(adev->gfx.rlc.register_list_format,
3892                         adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3893         if (!register_list_format)
3894                 return -ENOMEM;
3895
3896         gfx_v8_0_parse_ind_reg_list(register_list_format,
3897                                 RLC_FormatDirectRegListLength,
3898                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3899                                 unique_indices,
3900                                 &indices_count,
3901                                 ARRAY_SIZE(unique_indices),
3902                                 indirect_start_offsets,
3903                                 &offset_count,
3904                                 ARRAY_SIZE(indirect_start_offsets));
3905
3906         /* save and restore list */
3907         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3908
3909         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3910         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3911                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3912
3913         /* indirect list */
3914         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3915         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3916                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3917
3918         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3919         list_size = list_size >> 1;
3920         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3921         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3922
3923         /* starting offsets starts */
3924         WREG32(mmRLC_GPM_SCRATCH_ADDR,
3925                 adev->gfx.rlc.starting_offsets_start);
3926         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
3927                 WREG32(mmRLC_GPM_SCRATCH_DATA,
3928                                 indirect_start_offsets[i]);
3929
3930         /* unique indices */
3931         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3932         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3933         for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
3934                 if (unique_indices[i] != 0) {
3935                         WREG32(temp + i, unique_indices[i] & 0x3FFFF);
3936                         WREG32(data + i, unique_indices[i] >> 20);
3937                 }
3938         }
3939         kfree(register_list_format);
3940
3941         return 0;
3942 }
3943
3944 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3945 {
3946         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
3947 }
3948
3949 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3950 {
3951         uint32_t data;
3952
3953         WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
3954
3955         data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
3956         data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
3957         data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
3958         data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
3959         WREG32(mmRLC_PG_DELAY, data);
3960
3961         WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
3962         WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
3963
3964 }
3965
3966 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
3967                                                 bool enable)
3968 {
3969         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
3970 }
3971
3972 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
3973                                                   bool enable)
3974 {
3975         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
3976 }
3977
3978 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
3979 {
3980         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
3981 }
3982
3983 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
3984 {
3985         if ((adev->asic_type == CHIP_CARRIZO) ||
3986             (adev->asic_type == CHIP_STONEY)) {
3987                 gfx_v8_0_init_csb(adev);
3988                 gfx_v8_0_init_save_restore_list(adev);
3989                 gfx_v8_0_enable_save_restore_machine(adev);
3990                 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
3991                 gfx_v8_0_init_power_gating(adev);
3992                 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
3993         } else if ((adev->asic_type == CHIP_POLARIS11) ||
3994                    (adev->asic_type == CHIP_POLARIS12) ||
3995                    (adev->asic_type == CHIP_VEGAM)) {
3996                 gfx_v8_0_init_csb(adev);
3997                 gfx_v8_0_init_save_restore_list(adev);
3998                 gfx_v8_0_enable_save_restore_machine(adev);
3999                 gfx_v8_0_init_power_gating(adev);
4000         }
4001
4002 }
4003
4004 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4005 {
4006         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4007
4008         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4009         gfx_v8_0_wait_for_rlc_serdes(adev);
4010 }
4011
4012 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4013 {
4014         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4015         udelay(50);
4016
4017         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4018         udelay(50);
4019 }
4020
4021 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4022 {
4023         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4024
4025         /* carrizo do enable cp interrupt after cp inited */
4026         if (!(adev->flags & AMD_IS_APU))
4027                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4028
4029         udelay(50);
4030 }
4031
4032 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4033 {
4034         if (amdgpu_sriov_vf(adev)) {
4035                 gfx_v8_0_init_csb(adev);
4036                 return 0;
4037         }
4038
4039         adev->gfx.rlc.funcs->stop(adev);
4040         adev->gfx.rlc.funcs->reset(adev);
4041         gfx_v8_0_init_pg(adev);
4042         adev->gfx.rlc.funcs->start(adev);
4043
4044         return 0;
4045 }
4046
4047 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4048 {
4049         int i;
4050         u32 tmp = RREG32(mmCP_ME_CNTL);
4051
4052         if (enable) {
4053                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4054                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4055                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4056         } else {
4057                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4058                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4059                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4060                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4061                         adev->gfx.gfx_ring[i].sched.ready = false;
4062         }
4063         WREG32(mmCP_ME_CNTL, tmp);
4064         udelay(50);
4065 }
4066
4067 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4068 {
4069         u32 count = 0;
4070         const struct cs_section_def *sect = NULL;
4071         const struct cs_extent_def *ext = NULL;
4072
4073         /* begin clear state */
4074         count += 2;
4075         /* context control state */
4076         count += 3;
4077
4078         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4079                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4080                         if (sect->id == SECT_CONTEXT)
4081                                 count += 2 + ext->reg_count;
4082                         else
4083                                 return 0;
4084                 }
4085         }
4086         /* pa_sc_raster_config/pa_sc_raster_config1 */
4087         count += 4;
4088         /* end clear state */
4089         count += 2;
4090         /* clear state */
4091         count += 2;
4092
4093         return count;
4094 }
4095
4096 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4097 {
4098         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4099         const struct cs_section_def *sect = NULL;
4100         const struct cs_extent_def *ext = NULL;
4101         int r, i;
4102
4103         /* init the CP */
4104         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4105         WREG32(mmCP_ENDIAN_SWAP, 0);
4106         WREG32(mmCP_DEVICE_ID, 1);
4107
4108         gfx_v8_0_cp_gfx_enable(adev, true);
4109
4110         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4111         if (r) {
4112                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4113                 return r;
4114         }
4115
4116         /* clear state buffer */
4117         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4118         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4119
4120         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4121         amdgpu_ring_write(ring, 0x80000000);
4122         amdgpu_ring_write(ring, 0x80000000);
4123
4124         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4125                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4126                         if (sect->id == SECT_CONTEXT) {
4127                                 amdgpu_ring_write(ring,
4128                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4129                                                ext->reg_count));
4130                                 amdgpu_ring_write(ring,
4131                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4132                                 for (i = 0; i < ext->reg_count; i++)
4133                                         amdgpu_ring_write(ring, ext->extent[i]);
4134                         }
4135                 }
4136         }
4137
4138         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4139         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4140         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4141         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4142
4143         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4144         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4145
4146         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4147         amdgpu_ring_write(ring, 0);
4148
4149         /* init the CE partitions */
4150         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4151         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4152         amdgpu_ring_write(ring, 0x8000);
4153         amdgpu_ring_write(ring, 0x8000);
4154
4155         amdgpu_ring_commit(ring);
4156
4157         return 0;
4158 }
4159 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4160 {
4161         u32 tmp;
4162         /* no gfx doorbells on iceland */
4163         if (adev->asic_type == CHIP_TOPAZ)
4164                 return;
4165
4166         tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4167
4168         if (ring->use_doorbell) {
4169                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4170                                 DOORBELL_OFFSET, ring->doorbell_index);
4171                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4172                                                 DOORBELL_HIT, 0);
4173                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4174                                             DOORBELL_EN, 1);
4175         } else {
4176                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4177         }
4178
4179         WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4180
4181         if (adev->flags & AMD_IS_APU)
4182                 return;
4183
4184         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4185                                         DOORBELL_RANGE_LOWER,
4186                                         adev->doorbell_index.gfx_ring0);
4187         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4188
4189         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4190                 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4191 }
4192
4193 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4194 {
4195         struct amdgpu_ring *ring;
4196         u32 tmp;
4197         u32 rb_bufsz;
4198         u64 rb_addr, rptr_addr, wptr_gpu_addr;
4199
4200         /* Set the write pointer delay */
4201         WREG32(mmCP_RB_WPTR_DELAY, 0);
4202
4203         /* set the RB to use vmid 0 */
4204         WREG32(mmCP_RB_VMID, 0);
4205
4206         /* Set ring buffer size */
4207         ring = &adev->gfx.gfx_ring[0];
4208         rb_bufsz = order_base_2(ring->ring_size / 8);
4209         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4210         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4211         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4212         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4213 #ifdef __BIG_ENDIAN
4214         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4215 #endif
4216         WREG32(mmCP_RB0_CNTL, tmp);
4217
4218         /* Initialize the ring buffer's read and write pointers */
4219         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4220         ring->wptr = 0;
4221         WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4222
4223         /* set the wb address wether it's enabled or not */
4224         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4225         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4226         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4227
4228         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4229         WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4230         WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4231         mdelay(1);
4232         WREG32(mmCP_RB0_CNTL, tmp);
4233
4234         rb_addr = ring->gpu_addr >> 8;
4235         WREG32(mmCP_RB0_BASE, rb_addr);
4236         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4237
4238         gfx_v8_0_set_cpg_door_bell(adev, ring);
4239         /* start the ring */
4240         amdgpu_ring_clear_ring(ring);
4241         gfx_v8_0_cp_gfx_start(adev);
4242         ring->sched.ready = true;
4243
4244         return 0;
4245 }
4246
4247 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4248 {
4249         int i;
4250
4251         if (enable) {
4252                 WREG32(mmCP_MEC_CNTL, 0);
4253         } else {
4254                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4255                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4256                         adev->gfx.compute_ring[i].sched.ready = false;
4257                 adev->gfx.kiq.ring.sched.ready = false;
4258         }
4259         udelay(50);
4260 }
4261
4262 /* KIQ functions */
4263 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4264 {
4265         uint32_t tmp;
4266         struct amdgpu_device *adev = ring->adev;
4267
4268         /* tell RLC which is KIQ queue */
4269         tmp = RREG32(mmRLC_CP_SCHEDULERS);
4270         tmp &= 0xffffff00;
4271         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4272         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4273         tmp |= 0x80;
4274         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4275 }
4276
4277 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4278 {
4279         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4280         uint64_t queue_mask = 0;
4281         int r, i;
4282
4283         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4284                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4285                         continue;
4286
4287                 /* This situation may be hit in the future if a new HW
4288                  * generation exposes more than 64 queues. If so, the
4289                  * definition of queue_mask needs updating */
4290                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4291                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4292                         break;
4293                 }
4294
4295                 queue_mask |= (1ull << i);
4296         }
4297
4298         r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
4299         if (r) {
4300                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4301                 return r;
4302         }
4303         /* set resources */
4304         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4305         amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4306         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4307         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4308         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4309         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4310         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4311         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4312         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4313                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4314                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4315                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4316
4317                 /* map queues */
4318                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4319                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4320                 amdgpu_ring_write(kiq_ring,
4321                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4322                 amdgpu_ring_write(kiq_ring,
4323                                   PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4324                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4325                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4326                                   PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4327                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4328                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4329                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4330                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4331         }
4332
4333         amdgpu_ring_commit(kiq_ring);
4334
4335         return 0;
4336 }
4337
4338 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4339 {
4340         int i, r = 0;
4341
4342         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4343                 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4344                 for (i = 0; i < adev->usec_timeout; i++) {
4345                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4346                                 break;
4347                         udelay(1);
4348                 }
4349                 if (i == adev->usec_timeout)
4350                         r = -ETIMEDOUT;
4351         }
4352         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4353         WREG32(mmCP_HQD_PQ_RPTR, 0);
4354         WREG32(mmCP_HQD_PQ_WPTR, 0);
4355
4356         return r;
4357 }
4358
4359 static void gfx_v8_0_mqd_set_priority(struct amdgpu_ring *ring, struct vi_mqd *mqd)
4360 {
4361         struct amdgpu_device *adev = ring->adev;
4362
4363         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4364                 if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue)) {
4365                         mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
4366                         ring->has_high_prio = true;
4367                         mqd->cp_hqd_queue_priority =
4368                                 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
4369                 } else {
4370                         ring->has_high_prio = false;
4371                 }
4372         }
4373 }
4374
4375 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4376 {
4377         struct amdgpu_device *adev = ring->adev;
4378         struct vi_mqd *mqd = ring->mqd_ptr;
4379         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4380         uint32_t tmp;
4381
4382         mqd->header = 0xC0310800;
4383         mqd->compute_pipelinestat_enable = 0x00000001;
4384         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4385         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4386         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4387         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4388         mqd->compute_misc_reserved = 0x00000003;
4389         mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4390                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4391         mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4392                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4393         eop_base_addr = ring->eop_gpu_addr >> 8;
4394         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4395         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4396
4397         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4398         tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4399         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4400                         (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4401
4402         mqd->cp_hqd_eop_control = tmp;
4403
4404         /* enable doorbell? */
4405         tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4406                             CP_HQD_PQ_DOORBELL_CONTROL,
4407                             DOORBELL_EN,
4408                             ring->use_doorbell ? 1 : 0);
4409
4410         mqd->cp_hqd_pq_doorbell_control = tmp;
4411
4412         /* set the pointer to the MQD */
4413         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4414         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4415
4416         /* set MQD vmid to 0 */
4417         tmp = RREG32(mmCP_MQD_CONTROL);
4418         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4419         mqd->cp_mqd_control = tmp;
4420
4421         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4422         hqd_gpu_addr = ring->gpu_addr >> 8;
4423         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4424         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4425
4426         /* set up the HQD, this is similar to CP_RB0_CNTL */
4427         tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4428         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4429                             (order_base_2(ring->ring_size / 4) - 1));
4430         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4431                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4432 #ifdef __BIG_ENDIAN
4433         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4434 #endif
4435         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4436         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4437         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4438         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4439         mqd->cp_hqd_pq_control = tmp;
4440
4441         /* set the wb address whether it's enabled or not */
4442         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4443         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4444         mqd->cp_hqd_pq_rptr_report_addr_hi =
4445                 upper_32_bits(wb_gpu_addr) & 0xffff;
4446
4447         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4448         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4449         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4450         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4451
4452         tmp = 0;
4453         /* enable the doorbell if requested */
4454         if (ring->use_doorbell) {
4455                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4456                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4457                                 DOORBELL_OFFSET, ring->doorbell_index);
4458
4459                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4460                                          DOORBELL_EN, 1);
4461                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4462                                          DOORBELL_SOURCE, 0);
4463                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4464                                          DOORBELL_HIT, 0);
4465         }
4466
4467         mqd->cp_hqd_pq_doorbell_control = tmp;
4468
4469         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4470         ring->wptr = 0;
4471         mqd->cp_hqd_pq_wptr = ring->wptr;
4472         mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4473
4474         /* set the vmid for the queue */
4475         mqd->cp_hqd_vmid = 0;
4476
4477         tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4478         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4479         mqd->cp_hqd_persistent_state = tmp;
4480
4481         /* set MTYPE */
4482         tmp = RREG32(mmCP_HQD_IB_CONTROL);
4483         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4484         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4485         mqd->cp_hqd_ib_control = tmp;
4486
4487         tmp = RREG32(mmCP_HQD_IQ_TIMER);
4488         tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4489         mqd->cp_hqd_iq_timer = tmp;
4490
4491         tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4492         tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4493         mqd->cp_hqd_ctx_save_control = tmp;
4494
4495         /* defaults */
4496         mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4497         mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4498         mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4499         mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4500         mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4501         mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4502         mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4503         mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4504         mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4505         mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4506         mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4507         mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4508
4509         /* set static priority for a queue/ring */
4510         gfx_v8_0_mqd_set_priority(ring, mqd);
4511         mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4512
4513         /* map_queues packet doesn't need activate the queue,
4514          * so only kiq need set this field.
4515          */
4516         if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
4517                 mqd->cp_hqd_active = 1;
4518
4519         return 0;
4520 }
4521
4522 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4523                         struct vi_mqd *mqd)
4524 {
4525         uint32_t mqd_reg;
4526         uint32_t *mqd_data;
4527
4528         /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4529         mqd_data = &mqd->cp_mqd_base_addr_lo;
4530
4531         /* disable wptr polling */
4532         WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4533
4534         /* program all HQD registers */
4535         for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4536                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4537
4538         /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4539          * This is safe since EOP RPTR==WPTR for any inactive HQD
4540          * on ASICs that do not support context-save.
4541          * EOP writes/reads can start anywhere in the ring.
4542          */
4543         if (adev->asic_type != CHIP_TONGA) {
4544                 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4545                 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4546                 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4547         }
4548
4549         for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4550                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4551
4552         /* activate the HQD */
4553         for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4554                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4555
4556         return 0;
4557 }
4558
4559 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4560 {
4561         struct amdgpu_device *adev = ring->adev;
4562         struct vi_mqd *mqd = ring->mqd_ptr;
4563         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4564
4565         gfx_v8_0_kiq_setting(ring);
4566
4567         if (adev->in_gpu_reset) { /* for GPU_RESET case */
4568                 /* reset MQD to a clean status */
4569                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4570                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4571
4572                 /* reset ring buffer */
4573                 ring->wptr = 0;
4574                 amdgpu_ring_clear_ring(ring);
4575                 mutex_lock(&adev->srbm_mutex);
4576                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4577                 gfx_v8_0_mqd_commit(adev, mqd);
4578                 vi_srbm_select(adev, 0, 0, 0, 0);
4579                 mutex_unlock(&adev->srbm_mutex);
4580         } else {
4581                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4582                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4583                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4584                 mutex_lock(&adev->srbm_mutex);
4585                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4586                 gfx_v8_0_mqd_init(ring);
4587                 gfx_v8_0_mqd_commit(adev, mqd);
4588                 vi_srbm_select(adev, 0, 0, 0, 0);
4589                 mutex_unlock(&adev->srbm_mutex);
4590
4591                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4592                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4593         }
4594
4595         return 0;
4596 }
4597
4598 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4599 {
4600         struct amdgpu_device *adev = ring->adev;
4601         struct vi_mqd *mqd = ring->mqd_ptr;
4602         int mqd_idx = ring - &adev->gfx.compute_ring[0];
4603
4604         if (!adev->in_gpu_reset && !adev->in_suspend) {
4605                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4606                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4607                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4608                 mutex_lock(&adev->srbm_mutex);
4609                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4610                 gfx_v8_0_mqd_init(ring);
4611                 vi_srbm_select(adev, 0, 0, 0, 0);
4612                 mutex_unlock(&adev->srbm_mutex);
4613
4614                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4615                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4616         } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
4617                 /* reset MQD to a clean status */
4618                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4619                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4620                 /* reset ring buffer */
4621                 ring->wptr = 0;
4622                 amdgpu_ring_clear_ring(ring);
4623         } else {
4624                 amdgpu_ring_clear_ring(ring);
4625         }
4626         return 0;
4627 }
4628
4629 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4630 {
4631         if (adev->asic_type > CHIP_TONGA) {
4632                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, adev->doorbell_index.kiq << 2);
4633                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, adev->doorbell_index.mec_ring7 << 2);
4634         }
4635         /* enable doorbells */
4636         WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4637 }
4638
4639 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4640 {
4641         struct amdgpu_ring *ring;
4642         int r;
4643
4644         ring = &adev->gfx.kiq.ring;
4645
4646         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4647         if (unlikely(r != 0))
4648                 return r;
4649
4650         r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4651         if (unlikely(r != 0))
4652                 return r;
4653
4654         gfx_v8_0_kiq_init_queue(ring);
4655         amdgpu_bo_kunmap(ring->mqd_obj);
4656         ring->mqd_ptr = NULL;
4657         amdgpu_bo_unreserve(ring->mqd_obj);
4658         ring->sched.ready = true;
4659         return 0;
4660 }
4661
4662 static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
4663 {
4664         struct amdgpu_ring *ring = NULL;
4665         int r = 0, i;
4666
4667         gfx_v8_0_cp_compute_enable(adev, true);
4668
4669         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4670                 ring = &adev->gfx.compute_ring[i];
4671
4672                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4673                 if (unlikely(r != 0))
4674                         goto done;
4675                 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4676                 if (!r) {
4677                         r = gfx_v8_0_kcq_init_queue(ring);
4678                         amdgpu_bo_kunmap(ring->mqd_obj);
4679                         ring->mqd_ptr = NULL;
4680                 }
4681                 amdgpu_bo_unreserve(ring->mqd_obj);
4682                 if (r)
4683                         goto done;
4684         }
4685
4686         gfx_v8_0_set_mec_doorbell_range(adev);
4687
4688         r = gfx_v8_0_kiq_kcq_enable(adev);
4689         if (r)
4690                 goto done;
4691
4692 done:
4693         return r;
4694 }
4695
4696 static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev)
4697 {
4698         int r, i;
4699         struct amdgpu_ring *ring;
4700
4701         /* collect all the ring_tests here, gfx, kiq, compute */
4702         ring = &adev->gfx.gfx_ring[0];
4703         r = amdgpu_ring_test_helper(ring);
4704         if (r)
4705                 return r;
4706
4707         ring = &adev->gfx.kiq.ring;
4708         r = amdgpu_ring_test_helper(ring);
4709         if (r)
4710                 return r;
4711
4712         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4713                 ring = &adev->gfx.compute_ring[i];
4714                 amdgpu_ring_test_helper(ring);
4715         }
4716
4717         return 0;
4718 }
4719
4720 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4721 {
4722         int r;
4723
4724         if (!(adev->flags & AMD_IS_APU))
4725                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4726
4727         r = gfx_v8_0_kiq_resume(adev);
4728         if (r)
4729                 return r;
4730
4731         r = gfx_v8_0_cp_gfx_resume(adev);
4732         if (r)
4733                 return r;
4734
4735         r = gfx_v8_0_kcq_resume(adev);
4736         if (r)
4737                 return r;
4738
4739         r = gfx_v8_0_cp_test_all_rings(adev);
4740         if (r)
4741                 return r;
4742
4743         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4744
4745         return 0;
4746 }
4747
4748 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4749 {
4750         gfx_v8_0_cp_gfx_enable(adev, enable);
4751         gfx_v8_0_cp_compute_enable(adev, enable);
4752 }
4753
4754 static int gfx_v8_0_hw_init(void *handle)
4755 {
4756         int r;
4757         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4758
4759         gfx_v8_0_init_golden_registers(adev);
4760         gfx_v8_0_constants_init(adev);
4761
4762         r = adev->gfx.rlc.funcs->resume(adev);
4763         if (r)
4764                 return r;
4765
4766         r = gfx_v8_0_cp_resume(adev);
4767
4768         return r;
4769 }
4770
4771 static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
4772 {
4773         int r, i;
4774         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4775
4776         r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
4777         if (r)
4778                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4779
4780         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4781                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4782
4783                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
4784                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
4785                                                 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
4786                                                 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
4787                                                 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
4788                                                 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
4789                 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
4790                 amdgpu_ring_write(kiq_ring, 0);
4791                 amdgpu_ring_write(kiq_ring, 0);
4792                 amdgpu_ring_write(kiq_ring, 0);
4793         }
4794         r = amdgpu_ring_test_helper(kiq_ring);
4795         if (r)
4796                 DRM_ERROR("KCQ disable failed\n");
4797
4798         return r;
4799 }
4800
4801 static bool gfx_v8_0_is_idle(void *handle)
4802 {
4803         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4804
4805         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
4806                 || RREG32(mmGRBM_STATUS2) != 0x8)
4807                 return false;
4808         else
4809                 return true;
4810 }
4811
4812 static bool gfx_v8_0_rlc_is_idle(void *handle)
4813 {
4814         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4815
4816         if (RREG32(mmGRBM_STATUS2) != 0x8)
4817                 return false;
4818         else
4819                 return true;
4820 }
4821
4822 static int gfx_v8_0_wait_for_rlc_idle(void *handle)
4823 {
4824         unsigned int i;
4825         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4826
4827         for (i = 0; i < adev->usec_timeout; i++) {
4828                 if (gfx_v8_0_rlc_is_idle(handle))
4829                         return 0;
4830
4831                 udelay(1);
4832         }
4833         return -ETIMEDOUT;
4834 }
4835
4836 static int gfx_v8_0_wait_for_idle(void *handle)
4837 {
4838         unsigned int i;
4839         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4840
4841         for (i = 0; i < adev->usec_timeout; i++) {
4842                 if (gfx_v8_0_is_idle(handle))
4843                         return 0;
4844
4845                 udelay(1);
4846         }
4847         return -ETIMEDOUT;
4848 }
4849
4850 static int gfx_v8_0_hw_fini(void *handle)
4851 {
4852         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4853
4854         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4855         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4856
4857         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4858
4859         amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
4860
4861         /* disable KCQ to avoid CPC touch memory not valid anymore */
4862         gfx_v8_0_kcq_disable(adev);
4863
4864         if (amdgpu_sriov_vf(adev)) {
4865                 pr_debug("For SRIOV client, shouldn't do anything.\n");
4866                 return 0;
4867         }
4868         amdgpu_gfx_rlc_enter_safe_mode(adev);
4869         if (!gfx_v8_0_wait_for_idle(adev))
4870                 gfx_v8_0_cp_enable(adev, false);
4871         else
4872                 pr_err("cp is busy, skip halt cp\n");
4873         if (!gfx_v8_0_wait_for_rlc_idle(adev))
4874                 adev->gfx.rlc.funcs->stop(adev);
4875         else
4876                 pr_err("rlc is busy, skip halt rlc\n");
4877         amdgpu_gfx_rlc_exit_safe_mode(adev);
4878
4879         return 0;
4880 }
4881
4882 static int gfx_v8_0_suspend(void *handle)
4883 {
4884         return gfx_v8_0_hw_fini(handle);
4885 }
4886
4887 static int gfx_v8_0_resume(void *handle)
4888 {
4889         return gfx_v8_0_hw_init(handle);
4890 }
4891
4892 static bool gfx_v8_0_check_soft_reset(void *handle)
4893 {
4894         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4895         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4896         u32 tmp;
4897
4898         /* GRBM_STATUS */
4899         tmp = RREG32(mmGRBM_STATUS);
4900         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4901                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4902                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4903                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4904                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4905                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
4906                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4907                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4908                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4909                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4910                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4911                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4912                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4913         }
4914
4915         /* GRBM_STATUS2 */
4916         tmp = RREG32(mmGRBM_STATUS2);
4917         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4918                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4919                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4920
4921         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
4922             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
4923             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
4924                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4925                                                 SOFT_RESET_CPF, 1);
4926                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4927                                                 SOFT_RESET_CPC, 1);
4928                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4929                                                 SOFT_RESET_CPG, 1);
4930                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
4931                                                 SOFT_RESET_GRBM, 1);
4932         }
4933
4934         /* SRBM_STATUS */
4935         tmp = RREG32(mmSRBM_STATUS);
4936         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4937                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4938                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4939         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
4940                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4941                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
4942
4943         if (grbm_soft_reset || srbm_soft_reset) {
4944                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
4945                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
4946                 return true;
4947         } else {
4948                 adev->gfx.grbm_soft_reset = 0;
4949                 adev->gfx.srbm_soft_reset = 0;
4950                 return false;
4951         }
4952 }
4953
4954 static int gfx_v8_0_pre_soft_reset(void *handle)
4955 {
4956         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4957         u32 grbm_soft_reset = 0;
4958
4959         if ((!adev->gfx.grbm_soft_reset) &&
4960             (!adev->gfx.srbm_soft_reset))
4961                 return 0;
4962
4963         grbm_soft_reset = adev->gfx.grbm_soft_reset;
4964
4965         /* stop the rlc */
4966         adev->gfx.rlc.funcs->stop(adev);
4967
4968         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
4969             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
4970                 /* Disable GFX parsing/prefetching */
4971                 gfx_v8_0_cp_gfx_enable(adev, false);
4972
4973         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
4974             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
4975             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
4976             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
4977                 int i;
4978
4979                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4980                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4981
4982                         mutex_lock(&adev->srbm_mutex);
4983                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4984                         gfx_v8_0_deactivate_hqd(adev, 2);
4985                         vi_srbm_select(adev, 0, 0, 0, 0);
4986                         mutex_unlock(&adev->srbm_mutex);
4987                 }
4988                 /* Disable MEC parsing/prefetching */
4989                 gfx_v8_0_cp_compute_enable(adev, false);
4990         }
4991
4992        return 0;
4993 }
4994
4995 static int gfx_v8_0_soft_reset(void *handle)
4996 {
4997         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4998         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4999         u32 tmp;
5000
5001         if ((!adev->gfx.grbm_soft_reset) &&
5002             (!adev->gfx.srbm_soft_reset))
5003                 return 0;
5004
5005         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5006         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5007
5008         if (grbm_soft_reset || srbm_soft_reset) {
5009                 tmp = RREG32(mmGMCON_DEBUG);
5010                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5011                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5012                 WREG32(mmGMCON_DEBUG, tmp);
5013                 udelay(50);
5014         }
5015
5016         if (grbm_soft_reset) {
5017                 tmp = RREG32(mmGRBM_SOFT_RESET);
5018                 tmp |= grbm_soft_reset;
5019                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5020                 WREG32(mmGRBM_SOFT_RESET, tmp);
5021                 tmp = RREG32(mmGRBM_SOFT_RESET);
5022
5023                 udelay(50);
5024
5025                 tmp &= ~grbm_soft_reset;
5026                 WREG32(mmGRBM_SOFT_RESET, tmp);
5027                 tmp = RREG32(mmGRBM_SOFT_RESET);
5028         }
5029
5030         if (srbm_soft_reset) {
5031                 tmp = RREG32(mmSRBM_SOFT_RESET);
5032                 tmp |= srbm_soft_reset;
5033                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5034                 WREG32(mmSRBM_SOFT_RESET, tmp);
5035                 tmp = RREG32(mmSRBM_SOFT_RESET);
5036
5037                 udelay(50);
5038
5039                 tmp &= ~srbm_soft_reset;
5040                 WREG32(mmSRBM_SOFT_RESET, tmp);
5041                 tmp = RREG32(mmSRBM_SOFT_RESET);
5042         }
5043
5044         if (grbm_soft_reset || srbm_soft_reset) {
5045                 tmp = RREG32(mmGMCON_DEBUG);
5046                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5047                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5048                 WREG32(mmGMCON_DEBUG, tmp);
5049         }
5050
5051         /* Wait a little for things to settle down */
5052         udelay(50);
5053
5054         return 0;
5055 }
5056
5057 static int gfx_v8_0_post_soft_reset(void *handle)
5058 {
5059         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5060         u32 grbm_soft_reset = 0;
5061
5062         if ((!adev->gfx.grbm_soft_reset) &&
5063             (!adev->gfx.srbm_soft_reset))
5064                 return 0;
5065
5066         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5067
5068         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5069             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5070             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5071             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5072                 int i;
5073
5074                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5075                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5076
5077                         mutex_lock(&adev->srbm_mutex);
5078                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5079                         gfx_v8_0_deactivate_hqd(adev, 2);
5080                         vi_srbm_select(adev, 0, 0, 0, 0);
5081                         mutex_unlock(&adev->srbm_mutex);
5082                 }
5083                 gfx_v8_0_kiq_resume(adev);
5084                 gfx_v8_0_kcq_resume(adev);
5085         }
5086
5087         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5088             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5089                 gfx_v8_0_cp_gfx_resume(adev);
5090
5091         gfx_v8_0_cp_test_all_rings(adev);
5092
5093         adev->gfx.rlc.funcs->start(adev);
5094
5095         return 0;
5096 }
5097
5098 /**
5099  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5100  *
5101  * @adev: amdgpu_device pointer
5102  *
5103  * Fetches a GPU clock counter snapshot.
5104  * Returns the 64 bit clock counter snapshot.
5105  */
5106 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5107 {
5108         uint64_t clock;
5109
5110         mutex_lock(&adev->gfx.gpu_clock_mutex);
5111         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5112         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5113                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5114         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5115         return clock;
5116 }
5117
5118 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5119                                           uint32_t vmid,
5120                                           uint32_t gds_base, uint32_t gds_size,
5121                                           uint32_t gws_base, uint32_t gws_size,
5122                                           uint32_t oa_base, uint32_t oa_size)
5123 {
5124         /* GDS Base */
5125         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5126         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5127                                 WRITE_DATA_DST_SEL(0)));
5128         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5129         amdgpu_ring_write(ring, 0);
5130         amdgpu_ring_write(ring, gds_base);
5131
5132         /* GDS Size */
5133         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5134         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5135                                 WRITE_DATA_DST_SEL(0)));
5136         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5137         amdgpu_ring_write(ring, 0);
5138         amdgpu_ring_write(ring, gds_size);
5139
5140         /* GWS */
5141         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5142         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5143                                 WRITE_DATA_DST_SEL(0)));
5144         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5145         amdgpu_ring_write(ring, 0);
5146         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5147
5148         /* OA */
5149         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5150         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5151                                 WRITE_DATA_DST_SEL(0)));
5152         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5153         amdgpu_ring_write(ring, 0);
5154         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5155 }
5156
5157 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5158 {
5159         WREG32(mmSQ_IND_INDEX,
5160                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5161                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5162                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5163                 (SQ_IND_INDEX__FORCE_READ_MASK));
5164         return RREG32(mmSQ_IND_DATA);
5165 }
5166
5167 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5168                            uint32_t wave, uint32_t thread,
5169                            uint32_t regno, uint32_t num, uint32_t *out)
5170 {
5171         WREG32(mmSQ_IND_INDEX,
5172                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5173                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5174                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5175                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5176                 (SQ_IND_INDEX__FORCE_READ_MASK) |
5177                 (SQ_IND_INDEX__AUTO_INCR_MASK));
5178         while (num--)
5179                 *(out++) = RREG32(mmSQ_IND_DATA);
5180 }
5181
5182 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5183 {
5184         /* type 0 wave data */
5185         dst[(*no_fields)++] = 0;
5186         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5187         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5188         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5189         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5190         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5191         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5192         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5193         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5194         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5195         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5196         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5197         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5198         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5199         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5200         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5201         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5202         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5203         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5204 }
5205
5206 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5207                                      uint32_t wave, uint32_t start,
5208                                      uint32_t size, uint32_t *dst)
5209 {
5210         wave_read_regs(
5211                 adev, simd, wave, 0,
5212                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5213 }
5214
5215
5216 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5217         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5218         .select_se_sh = &gfx_v8_0_select_se_sh,
5219         .read_wave_data = &gfx_v8_0_read_wave_data,
5220         .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5221         .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5222 };
5223
5224 static int gfx_v8_0_early_init(void *handle)
5225 {
5226         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5227
5228         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5229         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5230         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5231         gfx_v8_0_set_ring_funcs(adev);
5232         gfx_v8_0_set_irq_funcs(adev);
5233         gfx_v8_0_set_gds_init(adev);
5234         gfx_v8_0_set_rlc_funcs(adev);
5235
5236         return 0;
5237 }
5238
5239 static int gfx_v8_0_late_init(void *handle)
5240 {
5241         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5242         int r;
5243
5244         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5245         if (r)
5246                 return r;
5247
5248         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5249         if (r)
5250                 return r;
5251
5252         /* requires IBs so do in late init after IB pool is initialized */
5253         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5254         if (r)
5255                 return r;
5256
5257         r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5258         if (r) {
5259                 DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5260                 return r;
5261         }
5262
5263         r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5264         if (r) {
5265                 DRM_ERROR(
5266                         "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5267                         r);
5268                 return r;
5269         }
5270
5271         return 0;
5272 }
5273
5274 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5275                                                        bool enable)
5276 {
5277         if (((adev->asic_type == CHIP_POLARIS11) ||
5278             (adev->asic_type == CHIP_POLARIS12) ||
5279             (adev->asic_type == CHIP_VEGAM)) &&
5280             adev->powerplay.pp_funcs->set_powergating_by_smu)
5281                 /* Send msg to SMU via Powerplay */
5282                 amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
5283
5284         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5285 }
5286
5287 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5288                                                         bool enable)
5289 {
5290         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5291 }
5292
5293 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5294                 bool enable)
5295 {
5296         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5297 }
5298
5299 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5300                                           bool enable)
5301 {
5302         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5303 }
5304
5305 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5306                                                 bool enable)
5307 {
5308         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5309
5310         /* Read any GFX register to wake up GFX. */
5311         if (!enable)
5312                 RREG32(mmDB_RENDER_CONTROL);
5313 }
5314
5315 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5316                                           bool enable)
5317 {
5318         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5319                 cz_enable_gfx_cg_power_gating(adev, true);
5320                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5321                         cz_enable_gfx_pipeline_power_gating(adev, true);
5322         } else {
5323                 cz_enable_gfx_cg_power_gating(adev, false);
5324                 cz_enable_gfx_pipeline_power_gating(adev, false);
5325         }
5326 }
5327
5328 static int gfx_v8_0_set_powergating_state(void *handle,
5329                                           enum amd_powergating_state state)
5330 {
5331         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5332         bool enable = (state == AMD_PG_STATE_GATE);
5333
5334         if (amdgpu_sriov_vf(adev))
5335                 return 0;
5336
5337         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5338                                 AMD_PG_SUPPORT_RLC_SMU_HS |
5339                                 AMD_PG_SUPPORT_CP |
5340                                 AMD_PG_SUPPORT_GFX_DMG))
5341                 amdgpu_gfx_rlc_enter_safe_mode(adev);
5342         switch (adev->asic_type) {
5343         case CHIP_CARRIZO:
5344         case CHIP_STONEY:
5345
5346                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5347                         cz_enable_sck_slow_down_on_power_up(adev, true);
5348                         cz_enable_sck_slow_down_on_power_down(adev, true);
5349                 } else {
5350                         cz_enable_sck_slow_down_on_power_up(adev, false);
5351                         cz_enable_sck_slow_down_on_power_down(adev, false);
5352                 }
5353                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5354                         cz_enable_cp_power_gating(adev, true);
5355                 else
5356                         cz_enable_cp_power_gating(adev, false);
5357
5358                 cz_update_gfx_cg_power_gating(adev, enable);
5359
5360                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5361                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5362                 else
5363                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5364
5365                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5366                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5367                 else
5368                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5369                 break;
5370         case CHIP_POLARIS11:
5371         case CHIP_POLARIS12:
5372         case CHIP_VEGAM:
5373                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5374                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5375                 else
5376                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5377
5378                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5379                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5380                 else
5381                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5382
5383                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5384                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5385                 else
5386                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5387                 break;
5388         default:
5389                 break;
5390         }
5391         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5392                                 AMD_PG_SUPPORT_RLC_SMU_HS |
5393                                 AMD_PG_SUPPORT_CP |
5394                                 AMD_PG_SUPPORT_GFX_DMG))
5395                 amdgpu_gfx_rlc_exit_safe_mode(adev);
5396         return 0;
5397 }
5398
5399 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5400 {
5401         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5402         int data;
5403
5404         if (amdgpu_sriov_vf(adev))
5405                 *flags = 0;
5406
5407         /* AMD_CG_SUPPORT_GFX_MGCG */
5408         data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5409         if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5410                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5411
5412         /* AMD_CG_SUPPORT_GFX_CGLG */
5413         data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5414         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5415                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5416
5417         /* AMD_CG_SUPPORT_GFX_CGLS */
5418         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5419                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5420
5421         /* AMD_CG_SUPPORT_GFX_CGTS */
5422         data = RREG32(mmCGTS_SM_CTRL_REG);
5423         if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5424                 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5425
5426         /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5427         if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5428                 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5429
5430         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5431         data = RREG32(mmRLC_MEM_SLP_CNTL);
5432         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5433                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5434
5435         /* AMD_CG_SUPPORT_GFX_CP_LS */
5436         data = RREG32(mmCP_MEM_SLP_CNTL);
5437         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5438                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5439 }
5440
5441 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5442                                      uint32_t reg_addr, uint32_t cmd)
5443 {
5444         uint32_t data;
5445
5446         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5447
5448         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5449         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5450
5451         data = RREG32(mmRLC_SERDES_WR_CTRL);
5452         if (adev->asic_type == CHIP_STONEY)
5453                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5454                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5455                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5456                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5457                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5458                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5459                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5460                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5461                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5462         else
5463                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5464                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5465                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5466                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5467                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5468                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5469                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5470                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5471                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5472                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5473                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5474         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5475                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5476                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5477                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5478
5479         WREG32(mmRLC_SERDES_WR_CTRL, data);
5480 }
5481
5482 #define MSG_ENTER_RLC_SAFE_MODE     1
5483 #define MSG_EXIT_RLC_SAFE_MODE      0
5484 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5485 #define RLC_GPR_REG2__REQ__SHIFT 0
5486 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5487 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5488
5489 static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev)
5490 {
5491         uint32_t rlc_setting;
5492
5493         rlc_setting = RREG32(mmRLC_CNTL);
5494         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
5495                 return false;
5496
5497         return true;
5498 }
5499
5500 static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev)
5501 {
5502         uint32_t data;
5503         unsigned i;
5504         data = RREG32(mmRLC_CNTL);
5505         data |= RLC_SAFE_MODE__CMD_MASK;
5506         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5507         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5508         WREG32(mmRLC_SAFE_MODE, data);
5509
5510         /* wait for RLC_SAFE_MODE */
5511         for (i = 0; i < adev->usec_timeout; i++) {
5512                 if ((RREG32(mmRLC_GPM_STAT) &
5513                      (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5514                       RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5515                     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5516                      RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5517                         break;
5518                 udelay(1);
5519         }
5520         for (i = 0; i < adev->usec_timeout; i++) {
5521                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5522                         break;
5523                 udelay(1);
5524         }
5525 }
5526
5527 static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev)
5528 {
5529         uint32_t data;
5530         unsigned i;
5531
5532         data = RREG32(mmRLC_CNTL);
5533         data |= RLC_SAFE_MODE__CMD_MASK;
5534         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5535         WREG32(mmRLC_SAFE_MODE, data);
5536
5537         for (i = 0; i < adev->usec_timeout; i++) {
5538                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5539                         break;
5540                 udelay(1);
5541         }
5542 }
5543
5544 static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
5545 {
5546         u32 data;
5547
5548         data = RREG32(mmRLC_SPM_VMID);
5549
5550         data &= ~RLC_SPM_VMID__RLC_SPM_VMID_MASK;
5551         data |= (vmid & RLC_SPM_VMID__RLC_SPM_VMID_MASK) << RLC_SPM_VMID__RLC_SPM_VMID__SHIFT;
5552
5553         WREG32(mmRLC_SPM_VMID, data);
5554 }
5555
5556 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5557         .is_rlc_enabled = gfx_v8_0_is_rlc_enabled,
5558         .set_safe_mode = gfx_v8_0_set_safe_mode,
5559         .unset_safe_mode = gfx_v8_0_unset_safe_mode,
5560         .init = gfx_v8_0_rlc_init,
5561         .get_csb_size = gfx_v8_0_get_csb_size,
5562         .get_csb_buffer = gfx_v8_0_get_csb_buffer,
5563         .get_cp_table_num = gfx_v8_0_cp_jump_table_num,
5564         .resume = gfx_v8_0_rlc_resume,
5565         .stop = gfx_v8_0_rlc_stop,
5566         .reset = gfx_v8_0_rlc_reset,
5567         .start = gfx_v8_0_rlc_start,
5568         .update_spm_vmid = gfx_v8_0_update_spm_vmid
5569 };
5570
5571 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5572                                                       bool enable)
5573 {
5574         uint32_t temp, data;
5575
5576         amdgpu_gfx_rlc_enter_safe_mode(adev);
5577
5578         /* It is disabled by HW by default */
5579         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5580                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5581                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5582                                 /* 1 - RLC memory Light sleep */
5583                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5584
5585                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5586                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5587                 }
5588
5589                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5590                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5591                 if (adev->flags & AMD_IS_APU)
5592                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5593                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5594                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5595                 else
5596                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5597                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5598                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5599                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5600
5601                 if (temp != data)
5602                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5603
5604                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5605                 gfx_v8_0_wait_for_rlc_serdes(adev);
5606
5607                 /* 5 - clear mgcg override */
5608                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5609
5610                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5611                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5612                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5613                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5614                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5615                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5616                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5617                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5618                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5619                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5620                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5621                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5622                         if (temp != data)
5623                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5624                 }
5625                 udelay(50);
5626
5627                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5628                 gfx_v8_0_wait_for_rlc_serdes(adev);
5629         } else {
5630                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5631                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5632                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5633                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5634                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5635                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5636                 if (temp != data)
5637                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5638
5639                 /* 2 - disable MGLS in RLC */
5640                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5641                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5642                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5643                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5644                 }
5645
5646                 /* 3 - disable MGLS in CP */
5647                 data = RREG32(mmCP_MEM_SLP_CNTL);
5648                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5649                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5650                         WREG32(mmCP_MEM_SLP_CNTL, data);
5651                 }
5652
5653                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5654                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5655                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5656                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5657                 if (temp != data)
5658                         WREG32(mmCGTS_SM_CTRL_REG, data);
5659
5660                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5661                 gfx_v8_0_wait_for_rlc_serdes(adev);
5662
5663                 /* 6 - set mgcg override */
5664                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5665
5666                 udelay(50);
5667
5668                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5669                 gfx_v8_0_wait_for_rlc_serdes(adev);
5670         }
5671
5672         amdgpu_gfx_rlc_exit_safe_mode(adev);
5673 }
5674
5675 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5676                                                       bool enable)
5677 {
5678         uint32_t temp, temp1, data, data1;
5679
5680         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5681
5682         amdgpu_gfx_rlc_enter_safe_mode(adev);
5683
5684         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5685                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5686                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5687                 if (temp1 != data1)
5688                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5689
5690                 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5691                 gfx_v8_0_wait_for_rlc_serdes(adev);
5692
5693                 /* 2 - clear cgcg override */
5694                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5695
5696                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5697                 gfx_v8_0_wait_for_rlc_serdes(adev);
5698
5699                 /* 3 - write cmd to set CGLS */
5700                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5701
5702                 /* 4 - enable cgcg */
5703                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5704
5705                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5706                         /* enable cgls*/
5707                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5708
5709                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5710                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5711
5712                         if (temp1 != data1)
5713                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5714                 } else {
5715                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5716                 }
5717
5718                 if (temp != data)
5719                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5720
5721                 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5722                  * Cmp_busy/GFX_Idle interrupts
5723                  */
5724                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5725         } else {
5726                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5727                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5728
5729                 /* TEST CGCG */
5730                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5731                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5732                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5733                 if (temp1 != data1)
5734                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5735
5736                 /* read gfx register to wake up cgcg */
5737                 RREG32(mmCB_CGTT_SCLK_CTRL);
5738                 RREG32(mmCB_CGTT_SCLK_CTRL);
5739                 RREG32(mmCB_CGTT_SCLK_CTRL);
5740                 RREG32(mmCB_CGTT_SCLK_CTRL);
5741
5742                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5743                 gfx_v8_0_wait_for_rlc_serdes(adev);
5744
5745                 /* write cmd to Set CGCG Overrride */
5746                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5747
5748                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5749                 gfx_v8_0_wait_for_rlc_serdes(adev);
5750
5751                 /* write cmd to Clear CGLS */
5752                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5753
5754                 /* disable cgcg, cgls should be disabled too. */
5755                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5756                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5757                 if (temp != data)
5758                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5759                 /* enable interrupts again for PG */
5760                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5761         }
5762
5763         gfx_v8_0_wait_for_rlc_serdes(adev);
5764
5765         amdgpu_gfx_rlc_exit_safe_mode(adev);
5766 }
5767 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5768                                             bool enable)
5769 {
5770         if (enable) {
5771                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5772                  * ===  MGCG + MGLS + TS(CG/LS) ===
5773                  */
5774                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5775                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5776         } else {
5777                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5778                  * ===  CGCG + CGLS ===
5779                  */
5780                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5781                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5782         }
5783         return 0;
5784 }
5785
5786 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5787                                           enum amd_clockgating_state state)
5788 {
5789         uint32_t msg_id, pp_state = 0;
5790         uint32_t pp_support_state = 0;
5791
5792         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5793                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5794                         pp_support_state = PP_STATE_SUPPORT_LS;
5795                         pp_state = PP_STATE_LS;
5796                 }
5797                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5798                         pp_support_state |= PP_STATE_SUPPORT_CG;
5799                         pp_state |= PP_STATE_CG;
5800                 }
5801                 if (state == AMD_CG_STATE_UNGATE)
5802                         pp_state = 0;
5803
5804                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5805                                 PP_BLOCK_GFX_CG,
5806                                 pp_support_state,
5807                                 pp_state);
5808                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5809                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5810         }
5811
5812         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5813                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5814                         pp_support_state = PP_STATE_SUPPORT_LS;
5815                         pp_state = PP_STATE_LS;
5816                 }
5817
5818                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5819                         pp_support_state |= PP_STATE_SUPPORT_CG;
5820                         pp_state |= PP_STATE_CG;
5821                 }
5822
5823                 if (state == AMD_CG_STATE_UNGATE)
5824                         pp_state = 0;
5825
5826                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5827                                 PP_BLOCK_GFX_MG,
5828                                 pp_support_state,
5829                                 pp_state);
5830                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5831                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5832         }
5833
5834         return 0;
5835 }
5836
5837 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5838                                           enum amd_clockgating_state state)
5839 {
5840
5841         uint32_t msg_id, pp_state = 0;
5842         uint32_t pp_support_state = 0;
5843
5844         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5845                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5846                         pp_support_state = PP_STATE_SUPPORT_LS;
5847                         pp_state = PP_STATE_LS;
5848                 }
5849                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5850                         pp_support_state |= PP_STATE_SUPPORT_CG;
5851                         pp_state |= PP_STATE_CG;
5852                 }
5853                 if (state == AMD_CG_STATE_UNGATE)
5854                         pp_state = 0;
5855
5856                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5857                                 PP_BLOCK_GFX_CG,
5858                                 pp_support_state,
5859                                 pp_state);
5860                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5861                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5862         }
5863
5864         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5865                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5866                         pp_support_state = PP_STATE_SUPPORT_LS;
5867                         pp_state = PP_STATE_LS;
5868                 }
5869                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5870                         pp_support_state |= PP_STATE_SUPPORT_CG;
5871                         pp_state |= PP_STATE_CG;
5872                 }
5873                 if (state == AMD_CG_STATE_UNGATE)
5874                         pp_state = 0;
5875
5876                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5877                                 PP_BLOCK_GFX_3D,
5878                                 pp_support_state,
5879                                 pp_state);
5880                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5881                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5882         }
5883
5884         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5885                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5886                         pp_support_state = PP_STATE_SUPPORT_LS;
5887                         pp_state = PP_STATE_LS;
5888                 }
5889
5890                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5891                         pp_support_state |= PP_STATE_SUPPORT_CG;
5892                         pp_state |= PP_STATE_CG;
5893                 }
5894
5895                 if (state == AMD_CG_STATE_UNGATE)
5896                         pp_state = 0;
5897
5898                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5899                                 PP_BLOCK_GFX_MG,
5900                                 pp_support_state,
5901                                 pp_state);
5902                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5903                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5904         }
5905
5906         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5907                 pp_support_state = PP_STATE_SUPPORT_LS;
5908
5909                 if (state == AMD_CG_STATE_UNGATE)
5910                         pp_state = 0;
5911                 else
5912                         pp_state = PP_STATE_LS;
5913
5914                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5915                                 PP_BLOCK_GFX_RLC,
5916                                 pp_support_state,
5917                                 pp_state);
5918                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5919                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5920         }
5921
5922         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5923                 pp_support_state = PP_STATE_SUPPORT_LS;
5924
5925                 if (state == AMD_CG_STATE_UNGATE)
5926                         pp_state = 0;
5927                 else
5928                         pp_state = PP_STATE_LS;
5929                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5930                         PP_BLOCK_GFX_CP,
5931                         pp_support_state,
5932                         pp_state);
5933                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5934                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5935         }
5936
5937         return 0;
5938 }
5939
5940 static int gfx_v8_0_set_clockgating_state(void *handle,
5941                                           enum amd_clockgating_state state)
5942 {
5943         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5944
5945         if (amdgpu_sriov_vf(adev))
5946                 return 0;
5947
5948         switch (adev->asic_type) {
5949         case CHIP_FIJI:
5950         case CHIP_CARRIZO:
5951         case CHIP_STONEY:
5952                 gfx_v8_0_update_gfx_clock_gating(adev,
5953                                                  state == AMD_CG_STATE_GATE);
5954                 break;
5955         case CHIP_TONGA:
5956                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
5957                 break;
5958         case CHIP_POLARIS10:
5959         case CHIP_POLARIS11:
5960         case CHIP_POLARIS12:
5961         case CHIP_VEGAM:
5962                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
5963                 break;
5964         default:
5965                 break;
5966         }
5967         return 0;
5968 }
5969
5970 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
5971 {
5972         return ring->adev->wb.wb[ring->rptr_offs];
5973 }
5974
5975 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5976 {
5977         struct amdgpu_device *adev = ring->adev;
5978
5979         if (ring->use_doorbell)
5980                 /* XXX check if swapping is necessary on BE */
5981                 return ring->adev->wb.wb[ring->wptr_offs];
5982         else
5983                 return RREG32(mmCP_RB0_WPTR);
5984 }
5985
5986 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5987 {
5988         struct amdgpu_device *adev = ring->adev;
5989
5990         if (ring->use_doorbell) {
5991                 /* XXX check if swapping is necessary on BE */
5992                 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
5993                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
5994         } else {
5995                 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5996                 (void)RREG32(mmCP_RB0_WPTR);
5997         }
5998 }
5999
6000 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6001 {
6002         u32 ref_and_mask, reg_mem_engine;
6003
6004         if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6005             (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6006                 switch (ring->me) {
6007                 case 1:
6008                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6009                         break;
6010                 case 2:
6011                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6012                         break;
6013                 default:
6014                         return;
6015                 }
6016                 reg_mem_engine = 0;
6017         } else {
6018                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6019                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6020         }
6021
6022         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6023         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6024                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6025                                  reg_mem_engine));
6026         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6027         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6028         amdgpu_ring_write(ring, ref_and_mask);
6029         amdgpu_ring_write(ring, ref_and_mask);
6030         amdgpu_ring_write(ring, 0x20); /* poll interval */
6031 }
6032
6033 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6034 {
6035         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6036         amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6037                 EVENT_INDEX(4));
6038
6039         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6040         amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6041                 EVENT_INDEX(0));
6042 }
6043
6044 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6045                                         struct amdgpu_job *job,
6046                                         struct amdgpu_ib *ib,
6047                                         uint32_t flags)
6048 {
6049         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6050         u32 header, control = 0;
6051
6052         if (ib->flags & AMDGPU_IB_FLAG_CE)
6053                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6054         else
6055                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6056
6057         control |= ib->length_dw | (vmid << 24);
6058
6059         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6060                 control |= INDIRECT_BUFFER_PRE_ENB(1);
6061
6062                 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
6063                         gfx_v8_0_ring_emit_de_meta(ring);
6064         }
6065
6066         amdgpu_ring_write(ring, header);
6067         amdgpu_ring_write(ring,
6068 #ifdef __BIG_ENDIAN
6069                           (2 << 0) |
6070 #endif
6071                           (ib->gpu_addr & 0xFFFFFFFC));
6072         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6073         amdgpu_ring_write(ring, control);
6074 }
6075
6076 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6077                                           struct amdgpu_job *job,
6078                                           struct amdgpu_ib *ib,
6079                                           uint32_t flags)
6080 {
6081         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6082         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6083
6084         /* Currently, there is a high possibility to get wave ID mismatch
6085          * between ME and GDS, leading to a hw deadlock, because ME generates
6086          * different wave IDs than the GDS expects. This situation happens
6087          * randomly when at least 5 compute pipes use GDS ordered append.
6088          * The wave IDs generated by ME are also wrong after suspend/resume.
6089          * Those are probably bugs somewhere else in the kernel driver.
6090          *
6091          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
6092          * GDS to 0 for this ring (me/pipe).
6093          */
6094         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
6095                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
6096                 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
6097                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
6098         }
6099
6100         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6101         amdgpu_ring_write(ring,
6102 #ifdef __BIG_ENDIAN
6103                                 (2 << 0) |
6104 #endif
6105                                 (ib->gpu_addr & 0xFFFFFFFC));
6106         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6107         amdgpu_ring_write(ring, control);
6108 }
6109
6110 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6111                                          u64 seq, unsigned flags)
6112 {
6113         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6114         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6115
6116         /* Workaround for cache flush problems. First send a dummy EOP
6117          * event down the pipe with seq one below.
6118          */
6119         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6120         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6121                                  EOP_TC_ACTION_EN |
6122                                  EOP_TC_WB_ACTION_EN |
6123                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6124                                  EVENT_INDEX(5)));
6125         amdgpu_ring_write(ring, addr & 0xfffffffc);
6126         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6127                                 DATA_SEL(1) | INT_SEL(0));
6128         amdgpu_ring_write(ring, lower_32_bits(seq - 1));
6129         amdgpu_ring_write(ring, upper_32_bits(seq - 1));
6130
6131         /* Then send the real EOP event down the pipe:
6132          * EVENT_WRITE_EOP - flush caches, send int */
6133         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6134         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6135                                  EOP_TC_ACTION_EN |
6136                                  EOP_TC_WB_ACTION_EN |
6137                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6138                                  EVENT_INDEX(5)));
6139         amdgpu_ring_write(ring, addr & 0xfffffffc);
6140         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6141                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6142         amdgpu_ring_write(ring, lower_32_bits(seq));
6143         amdgpu_ring_write(ring, upper_32_bits(seq));
6144
6145 }
6146
6147 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6148 {
6149         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6150         uint32_t seq = ring->fence_drv.sync_seq;
6151         uint64_t addr = ring->fence_drv.gpu_addr;
6152
6153         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6154         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6155                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6156                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6157         amdgpu_ring_write(ring, addr & 0xfffffffc);
6158         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6159         amdgpu_ring_write(ring, seq);
6160         amdgpu_ring_write(ring, 0xffffffff);
6161         amdgpu_ring_write(ring, 4); /* poll interval */
6162 }
6163
6164 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6165                                         unsigned vmid, uint64_t pd_addr)
6166 {
6167         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6168
6169         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6170
6171         /* wait for the invalidate to complete */
6172         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6173         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6174                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6175                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6176         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6177         amdgpu_ring_write(ring, 0);
6178         amdgpu_ring_write(ring, 0); /* ref */
6179         amdgpu_ring_write(ring, 0); /* mask */
6180         amdgpu_ring_write(ring, 0x20); /* poll interval */
6181
6182         /* compute doesn't have PFP */
6183         if (usepfp) {
6184                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6185                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6186                 amdgpu_ring_write(ring, 0x0);
6187         }
6188 }
6189
6190 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6191 {
6192         return ring->adev->wb.wb[ring->wptr_offs];
6193 }
6194
6195 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6196 {
6197         struct amdgpu_device *adev = ring->adev;
6198
6199         /* XXX check if swapping is necessary on BE */
6200         adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6201         WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6202 }
6203
6204 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6205                                              u64 addr, u64 seq,
6206                                              unsigned flags)
6207 {
6208         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6209         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6210
6211         /* RELEASE_MEM - flush caches, send int */
6212         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6213         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6214                                  EOP_TC_ACTION_EN |
6215                                  EOP_TC_WB_ACTION_EN |
6216                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6217                                  EVENT_INDEX(5)));
6218         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6219         amdgpu_ring_write(ring, addr & 0xfffffffc);
6220         amdgpu_ring_write(ring, upper_32_bits(addr));
6221         amdgpu_ring_write(ring, lower_32_bits(seq));
6222         amdgpu_ring_write(ring, upper_32_bits(seq));
6223 }
6224
6225 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6226                                          u64 seq, unsigned int flags)
6227 {
6228         /* we only allocate 32bit for each seq wb address */
6229         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6230
6231         /* write fence seq to the "addr" */
6232         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6233         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6234                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6235         amdgpu_ring_write(ring, lower_32_bits(addr));
6236         amdgpu_ring_write(ring, upper_32_bits(addr));
6237         amdgpu_ring_write(ring, lower_32_bits(seq));
6238
6239         if (flags & AMDGPU_FENCE_FLAG_INT) {
6240                 /* set register to trigger INT */
6241                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6242                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6243                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6244                 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6245                 amdgpu_ring_write(ring, 0);
6246                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6247         }
6248 }
6249
6250 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6251 {
6252         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6253         amdgpu_ring_write(ring, 0);
6254 }
6255
6256 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6257 {
6258         uint32_t dw2 = 0;
6259
6260         if (amdgpu_sriov_vf(ring->adev))
6261                 gfx_v8_0_ring_emit_ce_meta(ring);
6262
6263         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6264         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6265                 gfx_v8_0_ring_emit_vgt_flush(ring);
6266                 /* set load_global_config & load_global_uconfig */
6267                 dw2 |= 0x8001;
6268                 /* set load_cs_sh_regs */
6269                 dw2 |= 0x01000000;
6270                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6271                 dw2 |= 0x10002;
6272
6273                 /* set load_ce_ram if preamble presented */
6274                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6275                         dw2 |= 0x10000000;
6276         } else {
6277                 /* still load_ce_ram if this is the first time preamble presented
6278                  * although there is no context switch happens.
6279                  */
6280                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6281                         dw2 |= 0x10000000;
6282         }
6283
6284         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6285         amdgpu_ring_write(ring, dw2);
6286         amdgpu_ring_write(ring, 0);
6287 }
6288
6289 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6290 {
6291         unsigned ret;
6292
6293         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6294         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6295         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6296         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6297         ret = ring->wptr & ring->buf_mask;
6298         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6299         return ret;
6300 }
6301
6302 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6303 {
6304         unsigned cur;
6305
6306         BUG_ON(offset > ring->buf_mask);
6307         BUG_ON(ring->ring[offset] != 0x55aa55aa);
6308
6309         cur = (ring->wptr & ring->buf_mask) - 1;
6310         if (likely(cur > offset))
6311                 ring->ring[offset] = cur - offset;
6312         else
6313                 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6314 }
6315
6316 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6317 {
6318         struct amdgpu_device *adev = ring->adev;
6319         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
6320
6321         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6322         amdgpu_ring_write(ring, 0 |     /* src: register*/
6323                                 (5 << 8) |      /* dst: memory */
6324                                 (1 << 20));     /* write confirm */
6325         amdgpu_ring_write(ring, reg);
6326         amdgpu_ring_write(ring, 0);
6327         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6328                                 kiq->reg_val_offs * 4));
6329         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6330                                 kiq->reg_val_offs * 4));
6331 }
6332
6333 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6334                                   uint32_t val)
6335 {
6336         uint32_t cmd;
6337
6338         switch (ring->funcs->type) {
6339         case AMDGPU_RING_TYPE_GFX:
6340                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6341                 break;
6342         case AMDGPU_RING_TYPE_KIQ:
6343                 cmd = 1 << 16; /* no inc addr */
6344                 break;
6345         default:
6346                 cmd = WR_CONFIRM;
6347                 break;
6348         }
6349
6350         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6351         amdgpu_ring_write(ring, cmd);
6352         amdgpu_ring_write(ring, reg);
6353         amdgpu_ring_write(ring, 0);
6354         amdgpu_ring_write(ring, val);
6355 }
6356
6357 static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
6358 {
6359         struct amdgpu_device *adev = ring->adev;
6360         uint32_t value = 0;
6361
6362         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
6363         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
6364         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
6365         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
6366         WREG32(mmSQ_CMD, value);
6367 }
6368
6369 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6370                                                  enum amdgpu_interrupt_state state)
6371 {
6372         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6373                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6374 }
6375
6376 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6377                                                      int me, int pipe,
6378                                                      enum amdgpu_interrupt_state state)
6379 {
6380         u32 mec_int_cntl, mec_int_cntl_reg;
6381
6382         /*
6383          * amdgpu controls only the first MEC. That's why this function only
6384          * handles the setting of interrupts for this specific MEC. All other
6385          * pipes' interrupts are set by amdkfd.
6386          */
6387
6388         if (me == 1) {
6389                 switch (pipe) {
6390                 case 0:
6391                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6392                         break;
6393                 case 1:
6394                         mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6395                         break;
6396                 case 2:
6397                         mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6398                         break;
6399                 case 3:
6400                         mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6401                         break;
6402                 default:
6403                         DRM_DEBUG("invalid pipe %d\n", pipe);
6404                         return;
6405                 }
6406         } else {
6407                 DRM_DEBUG("invalid me %d\n", me);
6408                 return;
6409         }
6410
6411         switch (state) {
6412         case AMDGPU_IRQ_STATE_DISABLE:
6413                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6414                 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6415                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6416                 break;
6417         case AMDGPU_IRQ_STATE_ENABLE:
6418                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6419                 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6420                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6421                 break;
6422         default:
6423                 break;
6424         }
6425 }
6426
6427 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6428                                              struct amdgpu_irq_src *source,
6429                                              unsigned type,
6430                                              enum amdgpu_interrupt_state state)
6431 {
6432         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6433                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6434
6435         return 0;
6436 }
6437
6438 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6439                                               struct amdgpu_irq_src *source,
6440                                               unsigned type,
6441                                               enum amdgpu_interrupt_state state)
6442 {
6443         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6444                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6445
6446         return 0;
6447 }
6448
6449 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6450                                             struct amdgpu_irq_src *src,
6451                                             unsigned type,
6452                                             enum amdgpu_interrupt_state state)
6453 {
6454         switch (type) {
6455         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6456                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6457                 break;
6458         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6459                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6460                 break;
6461         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6462                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6463                 break;
6464         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6465                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6466                 break;
6467         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6468                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6469                 break;
6470         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6471                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6472                 break;
6473         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6474                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6475                 break;
6476         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6477                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6478                 break;
6479         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6480                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6481                 break;
6482         default:
6483                 break;
6484         }
6485         return 0;
6486 }
6487
6488 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6489                                          struct amdgpu_irq_src *source,
6490                                          unsigned int type,
6491                                          enum amdgpu_interrupt_state state)
6492 {
6493         int enable_flag;
6494
6495         switch (state) {
6496         case AMDGPU_IRQ_STATE_DISABLE:
6497                 enable_flag = 0;
6498                 break;
6499
6500         case AMDGPU_IRQ_STATE_ENABLE:
6501                 enable_flag = 1;
6502                 break;
6503
6504         default:
6505                 return -EINVAL;
6506         }
6507
6508         WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6509         WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6510         WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6511         WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6512         WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6513         WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6514                      enable_flag);
6515         WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6516                      enable_flag);
6517         WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6518                      enable_flag);
6519         WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6520                      enable_flag);
6521         WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6522                      enable_flag);
6523         WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6524                      enable_flag);
6525         WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6526                      enable_flag);
6527         WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6528                      enable_flag);
6529
6530         return 0;
6531 }
6532
6533 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6534                                      struct amdgpu_irq_src *source,
6535                                      unsigned int type,
6536                                      enum amdgpu_interrupt_state state)
6537 {
6538         int enable_flag;
6539
6540         switch (state) {
6541         case AMDGPU_IRQ_STATE_DISABLE:
6542                 enable_flag = 1;
6543                 break;
6544
6545         case AMDGPU_IRQ_STATE_ENABLE:
6546                 enable_flag = 0;
6547                 break;
6548
6549         default:
6550                 return -EINVAL;
6551         }
6552
6553         WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6554                      enable_flag);
6555
6556         return 0;
6557 }
6558
6559 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6560                             struct amdgpu_irq_src *source,
6561                             struct amdgpu_iv_entry *entry)
6562 {
6563         int i;
6564         u8 me_id, pipe_id, queue_id;
6565         struct amdgpu_ring *ring;
6566
6567         DRM_DEBUG("IH: CP EOP\n");
6568         me_id = (entry->ring_id & 0x0c) >> 2;
6569         pipe_id = (entry->ring_id & 0x03) >> 0;
6570         queue_id = (entry->ring_id & 0x70) >> 4;
6571
6572         switch (me_id) {
6573         case 0:
6574                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6575                 break;
6576         case 1:
6577         case 2:
6578                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6579                         ring = &adev->gfx.compute_ring[i];
6580                         /* Per-queue interrupt is supported for MEC starting from VI.
6581                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6582                           */
6583                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6584                                 amdgpu_fence_process(ring);
6585                 }
6586                 break;
6587         }
6588         return 0;
6589 }
6590
6591 static void gfx_v8_0_fault(struct amdgpu_device *adev,
6592                            struct amdgpu_iv_entry *entry)
6593 {
6594         u8 me_id, pipe_id, queue_id;
6595         struct amdgpu_ring *ring;
6596         int i;
6597
6598         me_id = (entry->ring_id & 0x0c) >> 2;
6599         pipe_id = (entry->ring_id & 0x03) >> 0;
6600         queue_id = (entry->ring_id & 0x70) >> 4;
6601
6602         switch (me_id) {
6603         case 0:
6604                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6605                 break;
6606         case 1:
6607         case 2:
6608                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6609                         ring = &adev->gfx.compute_ring[i];
6610                         if (ring->me == me_id && ring->pipe == pipe_id &&
6611                             ring->queue == queue_id)
6612                                 drm_sched_fault(&ring->sched);
6613                 }
6614                 break;
6615         }
6616 }
6617
6618 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6619                                  struct amdgpu_irq_src *source,
6620                                  struct amdgpu_iv_entry *entry)
6621 {
6622         DRM_ERROR("Illegal register access in command stream\n");
6623         gfx_v8_0_fault(adev, entry);
6624         return 0;
6625 }
6626
6627 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6628                                   struct amdgpu_irq_src *source,
6629                                   struct amdgpu_iv_entry *entry)
6630 {
6631         DRM_ERROR("Illegal instruction in command stream\n");
6632         gfx_v8_0_fault(adev, entry);
6633         return 0;
6634 }
6635
6636 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6637                                      struct amdgpu_irq_src *source,
6638                                      struct amdgpu_iv_entry *entry)
6639 {
6640         DRM_ERROR("CP EDC/ECC error detected.");
6641         return 0;
6642 }
6643
6644 static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data)
6645 {
6646         u32 enc, se_id, sh_id, cu_id;
6647         char type[20];
6648         int sq_edc_source = -1;
6649
6650         enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6651         se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6652
6653         switch (enc) {
6654                 case 0:
6655                         DRM_INFO("SQ general purpose intr detected:"
6656                                         "se_id %d, immed_overflow %d, host_reg_overflow %d,"
6657                                         "host_cmd_overflow %d, cmd_timestamp %d,"
6658                                         "reg_timestamp %d, thread_trace_buff_full %d,"
6659                                         "wlt %d, thread_trace %d.\n",
6660                                         se_id,
6661                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6662                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6663                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6664                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6665                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6666                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6667                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6668                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
6669                                         );
6670                         break;
6671                 case 1:
6672                 case 2:
6673
6674                         cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
6675                         sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
6676
6677                         /*
6678                          * This function can be called either directly from ISR
6679                          * or from BH in which case we can access SQ_EDC_INFO
6680                          * instance
6681                          */
6682                         if (in_task()) {
6683                                 mutex_lock(&adev->grbm_idx_mutex);
6684                                 gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
6685
6686                                 sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
6687
6688                                 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6689                                 mutex_unlock(&adev->grbm_idx_mutex);
6690                         }
6691
6692                         if (enc == 1)
6693                                 sprintf(type, "instruction intr");
6694                         else
6695                                 sprintf(type, "EDC/ECC error");
6696
6697                         DRM_INFO(
6698                                 "SQ %s detected: "
6699                                         "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
6700                                         "trap %s, sq_ed_info.source %s.\n",
6701                                         type, se_id, sh_id, cu_id,
6702                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
6703                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
6704                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
6705                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
6706                                         (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
6707                                 );
6708                         break;
6709                 default:
6710                         DRM_ERROR("SQ invalid encoding type\n.");
6711         }
6712 }
6713
6714 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
6715 {
6716
6717         struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
6718         struct sq_work *sq_work = container_of(work, struct sq_work, work);
6719
6720         gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data);
6721 }
6722
6723 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
6724                            struct amdgpu_irq_src *source,
6725                            struct amdgpu_iv_entry *entry)
6726 {
6727         unsigned ih_data = entry->src_data[0];
6728
6729         /*
6730          * Try to submit work so SQ_EDC_INFO can be accessed from
6731          * BH. If previous work submission hasn't finished yet
6732          * just print whatever info is possible directly from the ISR.
6733          */
6734         if (work_pending(&adev->gfx.sq_work.work)) {
6735                 gfx_v8_0_parse_sq_irq(adev, ih_data);
6736         } else {
6737                 adev->gfx.sq_work.ih_data = ih_data;
6738                 schedule_work(&adev->gfx.sq_work.work);
6739         }
6740
6741         return 0;
6742 }
6743
6744 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6745         .name = "gfx_v8_0",
6746         .early_init = gfx_v8_0_early_init,
6747         .late_init = gfx_v8_0_late_init,
6748         .sw_init = gfx_v8_0_sw_init,
6749         .sw_fini = gfx_v8_0_sw_fini,
6750         .hw_init = gfx_v8_0_hw_init,
6751         .hw_fini = gfx_v8_0_hw_fini,
6752         .suspend = gfx_v8_0_suspend,
6753         .resume = gfx_v8_0_resume,
6754         .is_idle = gfx_v8_0_is_idle,
6755         .wait_for_idle = gfx_v8_0_wait_for_idle,
6756         .check_soft_reset = gfx_v8_0_check_soft_reset,
6757         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6758         .soft_reset = gfx_v8_0_soft_reset,
6759         .post_soft_reset = gfx_v8_0_post_soft_reset,
6760         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6761         .set_powergating_state = gfx_v8_0_set_powergating_state,
6762         .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6763 };
6764
6765 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6766         .type = AMDGPU_RING_TYPE_GFX,
6767         .align_mask = 0xff,
6768         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6769         .support_64bit_ptrs = false,
6770         .get_rptr = gfx_v8_0_ring_get_rptr,
6771         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6772         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6773         .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6774                 5 +  /* COND_EXEC */
6775                 7 +  /* PIPELINE_SYNC */
6776                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
6777                 12 +  /* FENCE for VM_FLUSH */
6778                 20 + /* GDS switch */
6779                 4 + /* double SWITCH_BUFFER,
6780                        the first COND_EXEC jump to the place just
6781                            prior to this double SWITCH_BUFFER  */
6782                 5 + /* COND_EXEC */
6783                 7 +      /*     HDP_flush */
6784                 4 +      /*     VGT_flush */
6785                 14 + /* CE_META */
6786                 31 + /* DE_META */
6787                 3 + /* CNTX_CTRL */
6788                 5 + /* HDP_INVL */
6789                 12 + 12 + /* FENCE x2 */
6790                 2, /* SWITCH_BUFFER */
6791         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6792         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6793         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6794         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6795         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6796         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6797         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6798         .test_ring = gfx_v8_0_ring_test_ring,
6799         .test_ib = gfx_v8_0_ring_test_ib,
6800         .insert_nop = amdgpu_ring_insert_nop,
6801         .pad_ib = amdgpu_ring_generic_pad_ib,
6802         .emit_switch_buffer = gfx_v8_ring_emit_sb,
6803         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6804         .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6805         .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6806         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6807         .soft_recovery = gfx_v8_0_ring_soft_recovery,
6808 };
6809
6810 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6811         .type = AMDGPU_RING_TYPE_COMPUTE,
6812         .align_mask = 0xff,
6813         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6814         .support_64bit_ptrs = false,
6815         .get_rptr = gfx_v8_0_ring_get_rptr,
6816         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6817         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6818         .emit_frame_size =
6819                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6820                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6821                 5 + /* hdp_invalidate */
6822                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6823                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
6824                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6825         .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
6826         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6827         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6828         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6829         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6830         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6831         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6832         .test_ring = gfx_v8_0_ring_test_ring,
6833         .test_ib = gfx_v8_0_ring_test_ib,
6834         .insert_nop = amdgpu_ring_insert_nop,
6835         .pad_ib = amdgpu_ring_generic_pad_ib,
6836         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6837 };
6838
6839 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6840         .type = AMDGPU_RING_TYPE_KIQ,
6841         .align_mask = 0xff,
6842         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6843         .support_64bit_ptrs = false,
6844         .get_rptr = gfx_v8_0_ring_get_rptr,
6845         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6846         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6847         .emit_frame_size =
6848                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6849                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6850                 5 + /* hdp_invalidate */
6851                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6852                 17 + /* gfx_v8_0_ring_emit_vm_flush */
6853                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6854         .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
6855         .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6856         .test_ring = gfx_v8_0_ring_test_ring,
6857         .insert_nop = amdgpu_ring_insert_nop,
6858         .pad_ib = amdgpu_ring_generic_pad_ib,
6859         .emit_rreg = gfx_v8_0_ring_emit_rreg,
6860         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6861 };
6862
6863 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6864 {
6865         int i;
6866
6867         adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
6868
6869         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6870                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6871
6872         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6873                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6874 }
6875
6876 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6877         .set = gfx_v8_0_set_eop_interrupt_state,
6878         .process = gfx_v8_0_eop_irq,
6879 };
6880
6881 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6882         .set = gfx_v8_0_set_priv_reg_fault_state,
6883         .process = gfx_v8_0_priv_reg_irq,
6884 };
6885
6886 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6887         .set = gfx_v8_0_set_priv_inst_fault_state,
6888         .process = gfx_v8_0_priv_inst_irq,
6889 };
6890
6891 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
6892         .set = gfx_v8_0_set_cp_ecc_int_state,
6893         .process = gfx_v8_0_cp_ecc_error_irq,
6894 };
6895
6896 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
6897         .set = gfx_v8_0_set_sq_int_state,
6898         .process = gfx_v8_0_sq_irq,
6899 };
6900
6901 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6902 {
6903         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6904         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6905
6906         adev->gfx.priv_reg_irq.num_types = 1;
6907         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6908
6909         adev->gfx.priv_inst_irq.num_types = 1;
6910         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6911
6912         adev->gfx.cp_ecc_error_irq.num_types = 1;
6913         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
6914
6915         adev->gfx.sq_irq.num_types = 1;
6916         adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
6917 }
6918
6919 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6920 {
6921         adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6922 }
6923
6924 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6925 {
6926         /* init asci gds info */
6927         adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
6928         adev->gds.gws_size = 64;
6929         adev->gds.oa_size = 16;
6930         adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
6931 }
6932
6933 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6934                                                  u32 bitmap)
6935 {
6936         u32 data;
6937
6938         if (!bitmap)
6939                 return;
6940
6941         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6942         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6943
6944         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
6945 }
6946
6947 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6948 {
6949         u32 data, mask;
6950
6951         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
6952                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6953
6954         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6955
6956         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
6957 }
6958
6959 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6960 {
6961         int i, j, k, counter, active_cu_number = 0;
6962         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6963         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6964         unsigned disable_masks[4 * 2];
6965         u32 ao_cu_num;
6966
6967         memset(cu_info, 0, sizeof(*cu_info));
6968
6969         if (adev->flags & AMD_IS_APU)
6970                 ao_cu_num = 2;
6971         else
6972                 ao_cu_num = adev->gfx.config.max_cu_per_sh;
6973
6974         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
6975
6976         mutex_lock(&adev->grbm_idx_mutex);
6977         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6978                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6979                         mask = 1;
6980                         ao_bitmap = 0;
6981                         counter = 0;
6982                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
6983                         if (i < 4 && j < 2)
6984                                 gfx_v8_0_set_user_cu_inactive_bitmap(
6985                                         adev, disable_masks[i * 2 + j]);
6986                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6987                         cu_info->bitmap[i][j] = bitmap;
6988
6989                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
6990                                 if (bitmap & mask) {
6991                                         if (counter < ao_cu_num)
6992                                                 ao_bitmap |= mask;
6993                                         counter ++;
6994                                 }
6995                                 mask <<= 1;
6996                         }
6997                         active_cu_number += counter;
6998                         if (i < 2 && j < 2)
6999                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7000                         cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7001                 }
7002         }
7003         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7004         mutex_unlock(&adev->grbm_idx_mutex);
7005
7006         cu_info->number = active_cu_number;
7007         cu_info->ao_cu_mask = ao_cu_mask;
7008         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7009         cu_info->max_waves_per_simd = 10;
7010         cu_info->max_scratch_slots_per_cu = 32;
7011         cu_info->wave_front_size = 64;
7012         cu_info->lds_size = 64;
7013 }
7014
7015 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7016 {
7017         .type = AMD_IP_BLOCK_TYPE_GFX,
7018         .major = 8,
7019         .minor = 0,
7020         .rev = 0,
7021         .funcs = &gfx_v8_0_ip_funcs,
7022 };
7023
7024 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7025 {
7026         .type = AMD_IP_BLOCK_TYPE_GFX,
7027         .major = 8,
7028         .minor = 1,
7029         .rev = 0,
7030         .funcs = &gfx_v8_0_ip_funcs,
7031 };
7032
7033 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7034 {
7035         uint64_t ce_payload_addr;
7036         int cnt_ce;
7037         union {
7038                 struct vi_ce_ib_state regular;
7039                 struct vi_ce_ib_state_chained_ib chained;
7040         } ce_payload = {};
7041
7042         if (ring->adev->virt.chained_ib_support) {
7043                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7044                         offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7045                 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7046         } else {
7047                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7048                         offsetof(struct vi_gfx_meta_data, ce_payload);
7049                 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7050         }
7051
7052         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7053         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7054                                 WRITE_DATA_DST_SEL(8) |
7055                                 WR_CONFIRM) |
7056                                 WRITE_DATA_CACHE_POLICY(0));
7057         amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7058         amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7059         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7060 }
7061
7062 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7063 {
7064         uint64_t de_payload_addr, gds_addr, csa_addr;
7065         int cnt_de;
7066         union {
7067                 struct vi_de_ib_state regular;
7068                 struct vi_de_ib_state_chained_ib chained;
7069         } de_payload = {};
7070
7071         csa_addr = amdgpu_csa_vaddr(ring->adev);
7072         gds_addr = csa_addr + 4096;
7073         if (ring->adev->virt.chained_ib_support) {
7074                 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7075                 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7076                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7077                 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7078         } else {
7079                 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7080                 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7081                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7082                 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7083         }
7084
7085         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7086         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7087                                 WRITE_DATA_DST_SEL(8) |
7088                                 WR_CONFIRM) |
7089                                 WRITE_DATA_CACHE_POLICY(0));
7090         amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7091         amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7092         amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7093 }