Linux-libre 5.3.12-gnu
[librecmc/linux-libre.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "vi.h"
33 #include "vi_structs.h"
34 #include "vid.h"
35 #include "amdgpu_ucode.h"
36 #include "amdgpu_atombios.h"
37 #include "atombios_i2c.h"
38 #include "clearstate_vi.h"
39
40 #include "gmc/gmc_8_2_d.h"
41 #include "gmc/gmc_8_2_sh_mask.h"
42
43 #include "oss/oss_3_0_d.h"
44 #include "oss/oss_3_0_sh_mask.h"
45
46 #include "bif/bif_5_0_d.h"
47 #include "bif/bif_5_0_sh_mask.h"
48 #include "gca/gfx_8_0_d.h"
49 #include "gca/gfx_8_0_enum.h"
50 #include "gca/gfx_8_0_sh_mask.h"
51
52 #include "dce/dce_10_0_d.h"
53 #include "dce/dce_10_0_sh_mask.h"
54
55 #include "smu/smu_7_1_3_d.h"
56
57 #include "ivsrcid/ivsrcid_vislands30.h"
58
59 #define GFX8_NUM_GFX_RINGS     1
60 #define GFX8_MEC_HPD_SIZE 4096
61
62 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
63 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
64 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
65 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
66
67 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
68 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
69 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
70 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
71 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
72 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
73 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
74 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
75 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
76
77 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
78 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
79 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
80 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
81 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
82 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
83
84 /* BPM SERDES CMD */
85 #define SET_BPM_SERDES_CMD    1
86 #define CLE_BPM_SERDES_CMD    0
87
88 /* BPM Register Address*/
89 enum {
90         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
91         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
92         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
93         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
94         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
95         BPM_REG_FGCG_MAX
96 };
97
98 #define RLC_FormatDirectRegListLength        14
99
100 /*(DEBLOBBED)*/
101
102 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
103 {
104         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
105         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
106         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
107         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
108         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
109         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
110         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
111         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
112         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
113         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
114         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
115         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
116         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
117         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
118         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
119         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
120 };
121
122 static const u32 golden_settings_tonga_a11[] =
123 {
124         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
125         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
126         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
127         mmGB_GPU_ID, 0x0000000f, 0x00000000,
128         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
129         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
130         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
131         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
132         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
133         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
134         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
135         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
136         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
137         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
138         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
139         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
140 };
141
142 static const u32 tonga_golden_common_all[] =
143 {
144         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
145         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
146         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
147         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
148         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
149         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
150         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
151         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
152 };
153
154 static const u32 tonga_mgcg_cgcg_init[] =
155 {
156         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
157         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
158         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
159         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
160         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
161         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
162         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
163         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
164         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
165         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
166         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
167         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
168         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
169         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
170         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
171         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
172         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
173         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
174         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
175         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
176         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
177         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
178         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
179         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
180         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
181         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
182         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
183         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
184         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
185         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
186         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
187         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
188         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
189         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
190         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
191         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
192         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
193         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
194         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
195         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
196         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
197         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
198         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
199         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
200         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
201         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
202         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
203         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
204         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
205         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
206         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
207         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
208         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
209         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
210         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
211         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
212         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
213         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
214         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
215         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
216         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
217         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
218         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
219         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
220         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
221         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
222         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
223         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
224         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
225         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
226         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
227         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
228         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
229         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
230         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
231 };
232
233 static const u32 golden_settings_vegam_a11[] =
234 {
235         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
236         mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
237         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
238         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
239         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
240         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
241         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
242         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
243         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
244         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
245         mmSQ_CONFIG, 0x07f80000, 0x01180000,
246         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
247         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
248         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
249         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
250         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
251         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
252 };
253
254 static const u32 vegam_golden_common_all[] =
255 {
256         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
257         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
258         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
259         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
260         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
261         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
262 };
263
264 static const u32 golden_settings_polaris11_a11[] =
265 {
266         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
267         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
268         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
269         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
270         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
271         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
272         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
273         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
274         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
275         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
276         mmSQ_CONFIG, 0x07f80000, 0x01180000,
277         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
278         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
279         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
280         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
281         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
282         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
283 };
284
285 static const u32 polaris11_golden_common_all[] =
286 {
287         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
288         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
289         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
290         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
291         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
292         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
293 };
294
295 static const u32 golden_settings_polaris10_a11[] =
296 {
297         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
298         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
299         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
300         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
301         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
302         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
303         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
304         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
305         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
306         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
307         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
308         mmSQ_CONFIG, 0x07f80000, 0x07180000,
309         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
310         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
311         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
312         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
313         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
314 };
315
316 static const u32 polaris10_golden_common_all[] =
317 {
318         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
319         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
320         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
321         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
322         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
323         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
324         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
325         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
326 };
327
328 static const u32 fiji_golden_common_all[] =
329 {
330         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
331         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
332         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
333         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
334         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
335         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
336         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
337         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
338         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
339         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
340 };
341
342 static const u32 golden_settings_fiji_a10[] =
343 {
344         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
345         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
346         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
347         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
348         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
349         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
350         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
351         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
352         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
353         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
354         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
355 };
356
357 static const u32 fiji_mgcg_cgcg_init[] =
358 {
359         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
360         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
361         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
362         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
363         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
364         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
365         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
366         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
367         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
368         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
369         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
370         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
371         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
372         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
373         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
374         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
375         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
376         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
377         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
378         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
379         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
380         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
381         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
382         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
383         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
384         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
385         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
386         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
387         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
388         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
389         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
390         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
391         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
392         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
393         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
394 };
395
396 static const u32 golden_settings_iceland_a11[] =
397 {
398         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
399         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
400         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
401         mmGB_GPU_ID, 0x0000000f, 0x00000000,
402         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
403         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
404         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
405         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
406         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
407         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
408         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
409         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
410         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
411         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
412         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
413         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
414 };
415
416 static const u32 iceland_golden_common_all[] =
417 {
418         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
419         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
420         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
421         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
422         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
423         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
424         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
425         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
426 };
427
428 static const u32 iceland_mgcg_cgcg_init[] =
429 {
430         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
431         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
432         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
433         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
434         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
435         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
436         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
437         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
438         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
439         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
440         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
441         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
442         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
443         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
444         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
445         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
446         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
447         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
448         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
449         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
450         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
451         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
452         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
453         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
454         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
455         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
456         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
457         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
458         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
459         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
460         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
461         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
462         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
463         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
464         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
465         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
466         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
467         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
468         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
469         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
470         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
471         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
472         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
473         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
474         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
475         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
476         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
477         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
478         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
479         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
480         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
481         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
482         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
483         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
484         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
485         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
486         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
487         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
488         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
489         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
490         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
491         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
492         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
493         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
494 };
495
496 static const u32 cz_golden_settings_a11[] =
497 {
498         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
499         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
500         mmGB_GPU_ID, 0x0000000f, 0x00000000,
501         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
502         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
503         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
504         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
505         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
506         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
507         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
508         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
509         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
510 };
511
512 static const u32 cz_golden_common_all[] =
513 {
514         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
515         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
516         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
517         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
518         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
519         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
520         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
521         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
522 };
523
524 static const u32 cz_mgcg_cgcg_init[] =
525 {
526         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
527         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
528         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
529         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
530         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
531         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
532         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
533         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
534         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
535         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
536         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
537         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
538         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
539         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
540         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
541         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
542         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
543         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
544         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
545         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
546         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
547         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
548         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
549         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
550         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
551         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
552         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
553         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
554         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
555         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
556         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
557         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
558         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
559         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
560         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
561         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
562         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
563         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
564         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
565         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
566         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
567         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
568         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
569         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
570         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
571         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
572         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
573         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
574         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
575         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
576         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
577         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
578         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
579         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
580         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
581         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
582         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
583         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
584         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
585         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
586         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
587         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
588         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
589         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
590         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
591         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
592         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
593         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
594         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
595         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
596         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
597         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
598         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
599         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
600         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
601 };
602
603 static const u32 stoney_golden_settings_a11[] =
604 {
605         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
606         mmGB_GPU_ID, 0x0000000f, 0x00000000,
607         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
608         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
609         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
610         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
611         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
612         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
613         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
614         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
615 };
616
617 static const u32 stoney_golden_common_all[] =
618 {
619         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
620         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
621         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
622         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
623         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
624         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
625         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
626         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
627 };
628
629 static const u32 stoney_mgcg_cgcg_init[] =
630 {
631         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
632         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
633         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
634         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
635         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
636 };
637
638
639 static const char * const sq_edc_source_names[] = {
640         "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
641         "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
642         "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
643         "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
644         "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
645         "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
646         "SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
647 };
648
649 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
650 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
651 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
652 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
653 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
654 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
655 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
656 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
657
658 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
659 {
660         switch (adev->asic_type) {
661         case CHIP_TOPAZ:
662                 amdgpu_device_program_register_sequence(adev,
663                                                         iceland_mgcg_cgcg_init,
664                                                         ARRAY_SIZE(iceland_mgcg_cgcg_init));
665                 amdgpu_device_program_register_sequence(adev,
666                                                         golden_settings_iceland_a11,
667                                                         ARRAY_SIZE(golden_settings_iceland_a11));
668                 amdgpu_device_program_register_sequence(adev,
669                                                         iceland_golden_common_all,
670                                                         ARRAY_SIZE(iceland_golden_common_all));
671                 break;
672         case CHIP_FIJI:
673                 amdgpu_device_program_register_sequence(adev,
674                                                         fiji_mgcg_cgcg_init,
675                                                         ARRAY_SIZE(fiji_mgcg_cgcg_init));
676                 amdgpu_device_program_register_sequence(adev,
677                                                         golden_settings_fiji_a10,
678                                                         ARRAY_SIZE(golden_settings_fiji_a10));
679                 amdgpu_device_program_register_sequence(adev,
680                                                         fiji_golden_common_all,
681                                                         ARRAY_SIZE(fiji_golden_common_all));
682                 break;
683
684         case CHIP_TONGA:
685                 amdgpu_device_program_register_sequence(adev,
686                                                         tonga_mgcg_cgcg_init,
687                                                         ARRAY_SIZE(tonga_mgcg_cgcg_init));
688                 amdgpu_device_program_register_sequence(adev,
689                                                         golden_settings_tonga_a11,
690                                                         ARRAY_SIZE(golden_settings_tonga_a11));
691                 amdgpu_device_program_register_sequence(adev,
692                                                         tonga_golden_common_all,
693                                                         ARRAY_SIZE(tonga_golden_common_all));
694                 break;
695         case CHIP_VEGAM:
696                 amdgpu_device_program_register_sequence(adev,
697                                                         golden_settings_vegam_a11,
698                                                         ARRAY_SIZE(golden_settings_vegam_a11));
699                 amdgpu_device_program_register_sequence(adev,
700                                                         vegam_golden_common_all,
701                                                         ARRAY_SIZE(vegam_golden_common_all));
702                 break;
703         case CHIP_POLARIS11:
704         case CHIP_POLARIS12:
705                 amdgpu_device_program_register_sequence(adev,
706                                                         golden_settings_polaris11_a11,
707                                                         ARRAY_SIZE(golden_settings_polaris11_a11));
708                 amdgpu_device_program_register_sequence(adev,
709                                                         polaris11_golden_common_all,
710                                                         ARRAY_SIZE(polaris11_golden_common_all));
711                 break;
712         case CHIP_POLARIS10:
713                 amdgpu_device_program_register_sequence(adev,
714                                                         golden_settings_polaris10_a11,
715                                                         ARRAY_SIZE(golden_settings_polaris10_a11));
716                 amdgpu_device_program_register_sequence(adev,
717                                                         polaris10_golden_common_all,
718                                                         ARRAY_SIZE(polaris10_golden_common_all));
719                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
720                 if (adev->pdev->revision == 0xc7 &&
721                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
722                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
723                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
724                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
725                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
726                 }
727                 break;
728         case CHIP_CARRIZO:
729                 amdgpu_device_program_register_sequence(adev,
730                                                         cz_mgcg_cgcg_init,
731                                                         ARRAY_SIZE(cz_mgcg_cgcg_init));
732                 amdgpu_device_program_register_sequence(adev,
733                                                         cz_golden_settings_a11,
734                                                         ARRAY_SIZE(cz_golden_settings_a11));
735                 amdgpu_device_program_register_sequence(adev,
736                                                         cz_golden_common_all,
737                                                         ARRAY_SIZE(cz_golden_common_all));
738                 break;
739         case CHIP_STONEY:
740                 amdgpu_device_program_register_sequence(adev,
741                                                         stoney_mgcg_cgcg_init,
742                                                         ARRAY_SIZE(stoney_mgcg_cgcg_init));
743                 amdgpu_device_program_register_sequence(adev,
744                                                         stoney_golden_settings_a11,
745                                                         ARRAY_SIZE(stoney_golden_settings_a11));
746                 amdgpu_device_program_register_sequence(adev,
747                                                         stoney_golden_common_all,
748                                                         ARRAY_SIZE(stoney_golden_common_all));
749                 break;
750         default:
751                 break;
752         }
753 }
754
755 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
756 {
757         adev->gfx.scratch.num_reg = 8;
758         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
759         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
760 }
761
762 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
763 {
764         struct amdgpu_device *adev = ring->adev;
765         uint32_t scratch;
766         uint32_t tmp = 0;
767         unsigned i;
768         int r;
769
770         r = amdgpu_gfx_scratch_get(adev, &scratch);
771         if (r)
772                 return r;
773
774         WREG32(scratch, 0xCAFEDEAD);
775         r = amdgpu_ring_alloc(ring, 3);
776         if (r)
777                 goto error_free_scratch;
778
779         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
780         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
781         amdgpu_ring_write(ring, 0xDEADBEEF);
782         amdgpu_ring_commit(ring);
783
784         for (i = 0; i < adev->usec_timeout; i++) {
785                 tmp = RREG32(scratch);
786                 if (tmp == 0xDEADBEEF)
787                         break;
788                 udelay(1);
789         }
790
791         if (i >= adev->usec_timeout)
792                 r = -ETIMEDOUT;
793
794 error_free_scratch:
795         amdgpu_gfx_scratch_free(adev, scratch);
796         return r;
797 }
798
799 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
800 {
801         struct amdgpu_device *adev = ring->adev;
802         struct amdgpu_ib ib;
803         struct dma_fence *f = NULL;
804
805         unsigned int index;
806         uint64_t gpu_addr;
807         uint32_t tmp;
808         long r;
809
810         r = amdgpu_device_wb_get(adev, &index);
811         if (r)
812                 return r;
813
814         gpu_addr = adev->wb.gpu_addr + (index * 4);
815         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
816         memset(&ib, 0, sizeof(ib));
817         r = amdgpu_ib_get(adev, NULL, 16, &ib);
818         if (r)
819                 goto err1;
820
821         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
822         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
823         ib.ptr[2] = lower_32_bits(gpu_addr);
824         ib.ptr[3] = upper_32_bits(gpu_addr);
825         ib.ptr[4] = 0xDEADBEEF;
826         ib.length_dw = 5;
827
828         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
829         if (r)
830                 goto err2;
831
832         r = dma_fence_wait_timeout(f, false, timeout);
833         if (r == 0) {
834                 r = -ETIMEDOUT;
835                 goto err2;
836         } else if (r < 0) {
837                 goto err2;
838         }
839
840         tmp = adev->wb.wb[index];
841         if (tmp == 0xDEADBEEF)
842                 r = 0;
843         else
844                 r = -EINVAL;
845
846 err2:
847         amdgpu_ib_free(adev, &ib, NULL);
848         dma_fence_put(f);
849 err1:
850         amdgpu_device_wb_free(adev, index);
851         return r;
852 }
853
854
855 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
856 {
857         release_firmware(adev->gfx.pfp_fw);
858         adev->gfx.pfp_fw = NULL;
859         release_firmware(adev->gfx.me_fw);
860         adev->gfx.me_fw = NULL;
861         release_firmware(adev->gfx.ce_fw);
862         adev->gfx.ce_fw = NULL;
863         release_firmware(adev->gfx.rlc_fw);
864         adev->gfx.rlc_fw = NULL;
865         release_firmware(adev->gfx.mec_fw);
866         adev->gfx.mec_fw = NULL;
867         if ((adev->asic_type != CHIP_STONEY) &&
868             (adev->asic_type != CHIP_TOPAZ))
869                 release_firmware(adev->gfx.mec2_fw);
870         adev->gfx.mec2_fw = NULL;
871
872         kfree(adev->gfx.rlc.register_list_format);
873 }
874
875 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
876 {
877         const char *chip_name;
878         char fw_name[30];
879         int err;
880         struct amdgpu_firmware_info *info = NULL;
881         const struct common_firmware_header *header = NULL;
882         const struct gfx_firmware_header_v1_0 *cp_hdr;
883         const struct rlc_firmware_header_v2_0 *rlc_hdr;
884         unsigned int *tmp = NULL, i;
885
886         DRM_DEBUG("\n");
887
888         switch (adev->asic_type) {
889         case CHIP_TOPAZ:
890                 chip_name = "topaz";
891                 break;
892         case CHIP_TONGA:
893                 chip_name = "tonga";
894                 break;
895         case CHIP_CARRIZO:
896                 chip_name = "carrizo";
897                 break;
898         case CHIP_FIJI:
899                 chip_name = "fiji";
900                 break;
901         case CHIP_STONEY:
902                 chip_name = "stoney";
903                 break;
904         case CHIP_POLARIS10:
905                 chip_name = "polaris10";
906                 break;
907         case CHIP_POLARIS11:
908                 chip_name = "polaris11";
909                 break;
910         case CHIP_POLARIS12:
911                 chip_name = "polaris12";
912                 break;
913         case CHIP_VEGAM:
914                 chip_name = "vegam";
915                 break;
916         default:
917                 BUG();
918         }
919
920         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
921                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
922                 err = reject_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
923                 if (err == -ENOENT) {
924                         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
925                         err = reject_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
926                 }
927         } else {
928                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
929                 err = reject_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
930         }
931         if (err)
932                 goto out;
933         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
934         if (err)
935                 goto out;
936         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
937         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
938         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
939
940         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
941                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
942                 err = reject_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
943                 if (err == -ENOENT) {
944                         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
945                         err = reject_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
946                 }
947         } else {
948                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
949                 err = reject_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
950         }
951         if (err)
952                 goto out;
953         err = amdgpu_ucode_validate(adev->gfx.me_fw);
954         if (err)
955                 goto out;
956         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
957         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
958
959         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
960
961         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
962                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
963                 err = reject_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
964                 if (err == -ENOENT) {
965                         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
966                         err = reject_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
967                 }
968         } else {
969                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
970                 err = reject_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
971         }
972         if (err)
973                 goto out;
974         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
975         if (err)
976                 goto out;
977         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
978         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
979         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
980
981         /*
982          * Support for MCBP/Virtualization in combination with chained IBs is
983          * formal released on feature version #46
984          */
985         if (adev->gfx.ce_feature_version >= 46 &&
986             adev->gfx.pfp_feature_version >= 46) {
987                 adev->virt.chained_ib_support = true;
988                 DRM_INFO("Chained IB support enabled!\n");
989         } else
990                 adev->virt.chained_ib_support = false;
991
992         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
993         err = reject_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
994         if (err)
995                 goto out;
996         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
997         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
998         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
999         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1000
1001         adev->gfx.rlc.save_and_restore_offset =
1002                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1003         adev->gfx.rlc.clear_state_descriptor_offset =
1004                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1005         adev->gfx.rlc.avail_scratch_ram_locations =
1006                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1007         adev->gfx.rlc.reg_restore_list_size =
1008                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1009         adev->gfx.rlc.reg_list_format_start =
1010                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1011         adev->gfx.rlc.reg_list_format_separate_start =
1012                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1013         adev->gfx.rlc.starting_offsets_start =
1014                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1015         adev->gfx.rlc.reg_list_format_size_bytes =
1016                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1017         adev->gfx.rlc.reg_list_size_bytes =
1018                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1019
1020         adev->gfx.rlc.register_list_format =
1021                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1022                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1023
1024         if (!adev->gfx.rlc.register_list_format) {
1025                 err = -ENOMEM;
1026                 goto out;
1027         }
1028
1029         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1030                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1031         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1032                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1033
1034         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1035
1036         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1037                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1038         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1039                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1040
1041         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1042                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1043                 err = reject_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1044                 if (err == -ENOENT) {
1045                         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1046                         err = reject_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1047                 }
1048         } else {
1049                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1050                 err = reject_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1051         }
1052         if (err)
1053                 goto out;
1054         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1055         if (err)
1056                 goto out;
1057         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1058         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1059         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1060
1061         if ((adev->asic_type != CHIP_STONEY) &&
1062             (adev->asic_type != CHIP_TOPAZ)) {
1063                 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1064                         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1065                         err = reject_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1066                         if (err == -ENOENT) {
1067                                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1068                                 err = reject_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1069                         }
1070                 } else {
1071                         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1072                         err = reject_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1073                 }
1074                 if (!err) {
1075                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1076                         if (err)
1077                                 goto out;
1078                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1079                                 adev->gfx.mec2_fw->data;
1080                         adev->gfx.mec2_fw_version =
1081                                 le32_to_cpu(cp_hdr->header.ucode_version);
1082                         adev->gfx.mec2_feature_version =
1083                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1084                 } else {
1085                         err = 0;
1086                         adev->gfx.mec2_fw = NULL;
1087                 }
1088         }
1089
1090         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1091         info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1092         info->fw = adev->gfx.pfp_fw;
1093         header = (const struct common_firmware_header *)info->fw->data;
1094         adev->firmware.fw_size +=
1095                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1096
1097         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1098         info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1099         info->fw = adev->gfx.me_fw;
1100         header = (const struct common_firmware_header *)info->fw->data;
1101         adev->firmware.fw_size +=
1102                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1103
1104         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1105         info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1106         info->fw = adev->gfx.ce_fw;
1107         header = (const struct common_firmware_header *)info->fw->data;
1108         adev->firmware.fw_size +=
1109                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1110
1111         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1112         info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1113         info->fw = adev->gfx.rlc_fw;
1114         header = (const struct common_firmware_header *)info->fw->data;
1115         adev->firmware.fw_size +=
1116                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1117
1118         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1119         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1120         info->fw = adev->gfx.mec_fw;
1121         header = (const struct common_firmware_header *)info->fw->data;
1122         adev->firmware.fw_size +=
1123                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1124
1125         /* we need account JT in */
1126         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1127         adev->firmware.fw_size +=
1128                 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1129
1130         if (amdgpu_sriov_vf(adev)) {
1131                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1132                 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1133                 info->fw = adev->gfx.mec_fw;
1134                 adev->firmware.fw_size +=
1135                         ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1136         }
1137
1138         if (adev->gfx.mec2_fw) {
1139                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1140                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1141                 info->fw = adev->gfx.mec2_fw;
1142                 header = (const struct common_firmware_header *)info->fw->data;
1143                 adev->firmware.fw_size +=
1144                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1145         }
1146
1147 out:
1148         if (err) {
1149                 dev_err(adev->dev,
1150                         "gfx8: Failed to load firmware \"%s\"\n",
1151                         fw_name);
1152                 release_firmware(adev->gfx.pfp_fw);
1153                 adev->gfx.pfp_fw = NULL;
1154                 release_firmware(adev->gfx.me_fw);
1155                 adev->gfx.me_fw = NULL;
1156                 release_firmware(adev->gfx.ce_fw);
1157                 adev->gfx.ce_fw = NULL;
1158                 release_firmware(adev->gfx.rlc_fw);
1159                 adev->gfx.rlc_fw = NULL;
1160                 release_firmware(adev->gfx.mec_fw);
1161                 adev->gfx.mec_fw = NULL;
1162                 release_firmware(adev->gfx.mec2_fw);
1163                 adev->gfx.mec2_fw = NULL;
1164         }
1165         return err;
1166 }
1167
1168 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1169                                     volatile u32 *buffer)
1170 {
1171         u32 count = 0, i;
1172         const struct cs_section_def *sect = NULL;
1173         const struct cs_extent_def *ext = NULL;
1174
1175         if (adev->gfx.rlc.cs_data == NULL)
1176                 return;
1177         if (buffer == NULL)
1178                 return;
1179
1180         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1181         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1182
1183         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1184         buffer[count++] = cpu_to_le32(0x80000000);
1185         buffer[count++] = cpu_to_le32(0x80000000);
1186
1187         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1188                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1189                         if (sect->id == SECT_CONTEXT) {
1190                                 buffer[count++] =
1191                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1192                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1193                                                 PACKET3_SET_CONTEXT_REG_START);
1194                                 for (i = 0; i < ext->reg_count; i++)
1195                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1196                         } else {
1197                                 return;
1198                         }
1199                 }
1200         }
1201
1202         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1203         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1204                         PACKET3_SET_CONTEXT_REG_START);
1205         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1206         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1207
1208         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1209         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1210
1211         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1212         buffer[count++] = cpu_to_le32(0);
1213 }
1214
1215 static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
1216 {
1217         if (adev->asic_type == CHIP_CARRIZO)
1218                 return 5;
1219         else
1220                 return 4;
1221 }
1222
1223 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1224 {
1225         const struct cs_section_def *cs_data;
1226         int r;
1227
1228         adev->gfx.rlc.cs_data = vi_cs_data;
1229
1230         cs_data = adev->gfx.rlc.cs_data;
1231
1232         if (cs_data) {
1233                 /* init clear state block */
1234                 r = amdgpu_gfx_rlc_init_csb(adev);
1235                 if (r)
1236                         return r;
1237         }
1238
1239         if ((adev->asic_type == CHIP_CARRIZO) ||
1240             (adev->asic_type == CHIP_STONEY)) {
1241                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1242                 r = amdgpu_gfx_rlc_init_cpt(adev);
1243                 if (r)
1244                         return r;
1245         }
1246
1247         return 0;
1248 }
1249
1250 static int gfx_v8_0_csb_vram_pin(struct amdgpu_device *adev)
1251 {
1252         int r;
1253
1254         r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1255         if (unlikely(r != 0))
1256                 return r;
1257
1258         r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
1259                         AMDGPU_GEM_DOMAIN_VRAM);
1260         if (!r)
1261                 adev->gfx.rlc.clear_state_gpu_addr =
1262                         amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
1263
1264         amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1265
1266         return r;
1267 }
1268
1269 static void gfx_v8_0_csb_vram_unpin(struct amdgpu_device *adev)
1270 {
1271         int r;
1272
1273         if (!adev->gfx.rlc.clear_state_obj)
1274                 return;
1275
1276         r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
1277         if (likely(r == 0)) {
1278                 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1279                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1280         }
1281 }
1282
1283 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1284 {
1285         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1286 }
1287
1288 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1289 {
1290         int r;
1291         u32 *hpd;
1292         size_t mec_hpd_size;
1293
1294         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1295
1296         /* take ownership of the relevant compute queues */
1297         amdgpu_gfx_compute_queue_acquire(adev);
1298
1299         mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1300
1301         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1302                                       AMDGPU_GEM_DOMAIN_VRAM,
1303                                       &adev->gfx.mec.hpd_eop_obj,
1304                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1305                                       (void **)&hpd);
1306         if (r) {
1307                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1308                 return r;
1309         }
1310
1311         memset(hpd, 0, mec_hpd_size);
1312
1313         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1314         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1315
1316         return 0;
1317 }
1318
1319 static const u32 vgpr_init_compute_shader[] =
1320 {
1321         0x7e000209, 0x7e020208,
1322         0x7e040207, 0x7e060206,
1323         0x7e080205, 0x7e0a0204,
1324         0x7e0c0203, 0x7e0e0202,
1325         0x7e100201, 0x7e120200,
1326         0x7e140209, 0x7e160208,
1327         0x7e180207, 0x7e1a0206,
1328         0x7e1c0205, 0x7e1e0204,
1329         0x7e200203, 0x7e220202,
1330         0x7e240201, 0x7e260200,
1331         0x7e280209, 0x7e2a0208,
1332         0x7e2c0207, 0x7e2e0206,
1333         0x7e300205, 0x7e320204,
1334         0x7e340203, 0x7e360202,
1335         0x7e380201, 0x7e3a0200,
1336         0x7e3c0209, 0x7e3e0208,
1337         0x7e400207, 0x7e420206,
1338         0x7e440205, 0x7e460204,
1339         0x7e480203, 0x7e4a0202,
1340         0x7e4c0201, 0x7e4e0200,
1341         0x7e500209, 0x7e520208,
1342         0x7e540207, 0x7e560206,
1343         0x7e580205, 0x7e5a0204,
1344         0x7e5c0203, 0x7e5e0202,
1345         0x7e600201, 0x7e620200,
1346         0x7e640209, 0x7e660208,
1347         0x7e680207, 0x7e6a0206,
1348         0x7e6c0205, 0x7e6e0204,
1349         0x7e700203, 0x7e720202,
1350         0x7e740201, 0x7e760200,
1351         0x7e780209, 0x7e7a0208,
1352         0x7e7c0207, 0x7e7e0206,
1353         0xbf8a0000, 0xbf810000,
1354 };
1355
1356 static const u32 sgpr_init_compute_shader[] =
1357 {
1358         0xbe8a0100, 0xbe8c0102,
1359         0xbe8e0104, 0xbe900106,
1360         0xbe920108, 0xbe940100,
1361         0xbe960102, 0xbe980104,
1362         0xbe9a0106, 0xbe9c0108,
1363         0xbe9e0100, 0xbea00102,
1364         0xbea20104, 0xbea40106,
1365         0xbea60108, 0xbea80100,
1366         0xbeaa0102, 0xbeac0104,
1367         0xbeae0106, 0xbeb00108,
1368         0xbeb20100, 0xbeb40102,
1369         0xbeb60104, 0xbeb80106,
1370         0xbeba0108, 0xbebc0100,
1371         0xbebe0102, 0xbec00104,
1372         0xbec20106, 0xbec40108,
1373         0xbec60100, 0xbec80102,
1374         0xbee60004, 0xbee70005,
1375         0xbeea0006, 0xbeeb0007,
1376         0xbee80008, 0xbee90009,
1377         0xbefc0000, 0xbf8a0000,
1378         0xbf810000, 0x00000000,
1379 };
1380
1381 static const u32 vgpr_init_regs[] =
1382 {
1383         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1384         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1385         mmCOMPUTE_NUM_THREAD_X, 256*4,
1386         mmCOMPUTE_NUM_THREAD_Y, 1,
1387         mmCOMPUTE_NUM_THREAD_Z, 1,
1388         mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1389         mmCOMPUTE_PGM_RSRC2, 20,
1390         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1391         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1392         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1393         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1394         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1395         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1396         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1397         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1398         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1399         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1400 };
1401
1402 static const u32 sgpr1_init_regs[] =
1403 {
1404         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1405         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1406         mmCOMPUTE_NUM_THREAD_X, 256*5,
1407         mmCOMPUTE_NUM_THREAD_Y, 1,
1408         mmCOMPUTE_NUM_THREAD_Z, 1,
1409         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1410         mmCOMPUTE_PGM_RSRC2, 20,
1411         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1412         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1413         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1414         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1415         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1416         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1417         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1418         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1419         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1420         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1421 };
1422
1423 static const u32 sgpr2_init_regs[] =
1424 {
1425         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1426         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1427         mmCOMPUTE_NUM_THREAD_X, 256*5,
1428         mmCOMPUTE_NUM_THREAD_Y, 1,
1429         mmCOMPUTE_NUM_THREAD_Z, 1,
1430         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1431         mmCOMPUTE_PGM_RSRC2, 20,
1432         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1433         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1434         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1435         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1436         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1437         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1438         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1439         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1440         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1441         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1442 };
1443
1444 static const u32 sec_ded_counter_registers[] =
1445 {
1446         mmCPC_EDC_ATC_CNT,
1447         mmCPC_EDC_SCRATCH_CNT,
1448         mmCPC_EDC_UCODE_CNT,
1449         mmCPF_EDC_ATC_CNT,
1450         mmCPF_EDC_ROQ_CNT,
1451         mmCPF_EDC_TAG_CNT,
1452         mmCPG_EDC_ATC_CNT,
1453         mmCPG_EDC_DMA_CNT,
1454         mmCPG_EDC_TAG_CNT,
1455         mmDC_EDC_CSINVOC_CNT,
1456         mmDC_EDC_RESTORE_CNT,
1457         mmDC_EDC_STATE_CNT,
1458         mmGDS_EDC_CNT,
1459         mmGDS_EDC_GRBM_CNT,
1460         mmGDS_EDC_OA_DED,
1461         mmSPI_EDC_CNT,
1462         mmSQC_ATC_EDC_GATCL1_CNT,
1463         mmSQC_EDC_CNT,
1464         mmSQ_EDC_DED_CNT,
1465         mmSQ_EDC_INFO,
1466         mmSQ_EDC_SEC_CNT,
1467         mmTCC_EDC_CNT,
1468         mmTCP_ATC_EDC_GATCL1_CNT,
1469         mmTCP_EDC_CNT,
1470         mmTD_EDC_CNT
1471 };
1472
1473 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1474 {
1475         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1476         struct amdgpu_ib ib;
1477         struct dma_fence *f = NULL;
1478         int r, i;
1479         u32 tmp;
1480         unsigned total_size, vgpr_offset, sgpr_offset;
1481         u64 gpu_addr;
1482
1483         /* only supported on CZ */
1484         if (adev->asic_type != CHIP_CARRIZO)
1485                 return 0;
1486
1487         /* bail if the compute ring is not ready */
1488         if (!ring->sched.ready)
1489                 return 0;
1490
1491         tmp = RREG32(mmGB_EDC_MODE);
1492         WREG32(mmGB_EDC_MODE, 0);
1493
1494         total_size =
1495                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1496         total_size +=
1497                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1498         total_size +=
1499                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1500         total_size = ALIGN(total_size, 256);
1501         vgpr_offset = total_size;
1502         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1503         sgpr_offset = total_size;
1504         total_size += sizeof(sgpr_init_compute_shader);
1505
1506         /* allocate an indirect buffer to put the commands in */
1507         memset(&ib, 0, sizeof(ib));
1508         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1509         if (r) {
1510                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1511                 return r;
1512         }
1513
1514         /* load the compute shaders */
1515         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1516                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1517
1518         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1519                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1520
1521         /* init the ib length to 0 */
1522         ib.length_dw = 0;
1523
1524         /* VGPR */
1525         /* write the register state for the compute dispatch */
1526         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1527                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1528                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1529                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1530         }
1531         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1532         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1533         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1534         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1535         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1536         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1537
1538         /* write dispatch packet */
1539         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1540         ib.ptr[ib.length_dw++] = 8; /* x */
1541         ib.ptr[ib.length_dw++] = 1; /* y */
1542         ib.ptr[ib.length_dw++] = 1; /* z */
1543         ib.ptr[ib.length_dw++] =
1544                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1545
1546         /* write CS partial flush packet */
1547         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1548         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1549
1550         /* SGPR1 */
1551         /* write the register state for the compute dispatch */
1552         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1553                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1554                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1555                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1556         }
1557         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1558         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1559         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1560         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1561         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1562         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1563
1564         /* write dispatch packet */
1565         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1566         ib.ptr[ib.length_dw++] = 8; /* x */
1567         ib.ptr[ib.length_dw++] = 1; /* y */
1568         ib.ptr[ib.length_dw++] = 1; /* z */
1569         ib.ptr[ib.length_dw++] =
1570                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1571
1572         /* write CS partial flush packet */
1573         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1574         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1575
1576         /* SGPR2 */
1577         /* write the register state for the compute dispatch */
1578         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1579                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1580                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1581                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1582         }
1583         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1584         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1585         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1586         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1587         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1588         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1589
1590         /* write dispatch packet */
1591         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1592         ib.ptr[ib.length_dw++] = 8; /* x */
1593         ib.ptr[ib.length_dw++] = 1; /* y */
1594         ib.ptr[ib.length_dw++] = 1; /* z */
1595         ib.ptr[ib.length_dw++] =
1596                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1597
1598         /* write CS partial flush packet */
1599         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1600         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1601
1602         /* shedule the ib on the ring */
1603         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1604         if (r) {
1605                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1606                 goto fail;
1607         }
1608
1609         /* wait for the GPU to finish processing the IB */
1610         r = dma_fence_wait(f, false);
1611         if (r) {
1612                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1613                 goto fail;
1614         }
1615
1616         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1617         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1618         WREG32(mmGB_EDC_MODE, tmp);
1619
1620         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1621         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1622         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1623
1624
1625         /* read back registers to clear the counters */
1626         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1627                 RREG32(sec_ded_counter_registers[i]);
1628
1629 fail:
1630         amdgpu_ib_free(adev, &ib, NULL);
1631         dma_fence_put(f);
1632
1633         return r;
1634 }
1635
1636 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1637 {
1638         u32 gb_addr_config;
1639         u32 mc_shared_chmap, mc_arb_ramcfg;
1640         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1641         u32 tmp;
1642         int ret;
1643
1644         switch (adev->asic_type) {
1645         case CHIP_TOPAZ:
1646                 adev->gfx.config.max_shader_engines = 1;
1647                 adev->gfx.config.max_tile_pipes = 2;
1648                 adev->gfx.config.max_cu_per_sh = 6;
1649                 adev->gfx.config.max_sh_per_se = 1;
1650                 adev->gfx.config.max_backends_per_se = 2;
1651                 adev->gfx.config.max_texture_channel_caches = 2;
1652                 adev->gfx.config.max_gprs = 256;
1653                 adev->gfx.config.max_gs_threads = 32;
1654                 adev->gfx.config.max_hw_contexts = 8;
1655
1656                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1657                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1658                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1659                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1660                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1661                 break;
1662         case CHIP_FIJI:
1663                 adev->gfx.config.max_shader_engines = 4;
1664                 adev->gfx.config.max_tile_pipes = 16;
1665                 adev->gfx.config.max_cu_per_sh = 16;
1666                 adev->gfx.config.max_sh_per_se = 1;
1667                 adev->gfx.config.max_backends_per_se = 4;
1668                 adev->gfx.config.max_texture_channel_caches = 16;
1669                 adev->gfx.config.max_gprs = 256;
1670                 adev->gfx.config.max_gs_threads = 32;
1671                 adev->gfx.config.max_hw_contexts = 8;
1672
1673                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1674                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1675                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1676                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1677                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1678                 break;
1679         case CHIP_POLARIS11:
1680         case CHIP_POLARIS12:
1681                 ret = amdgpu_atombios_get_gfx_info(adev);
1682                 if (ret)
1683                         return ret;
1684                 adev->gfx.config.max_gprs = 256;
1685                 adev->gfx.config.max_gs_threads = 32;
1686                 adev->gfx.config.max_hw_contexts = 8;
1687
1688                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1689                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1690                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1691                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1692                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1693                 break;
1694         case CHIP_POLARIS10:
1695         case CHIP_VEGAM:
1696                 ret = amdgpu_atombios_get_gfx_info(adev);
1697                 if (ret)
1698                         return ret;
1699                 adev->gfx.config.max_gprs = 256;
1700                 adev->gfx.config.max_gs_threads = 32;
1701                 adev->gfx.config.max_hw_contexts = 8;
1702
1703                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1704                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1705                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1706                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1707                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1708                 break;
1709         case CHIP_TONGA:
1710                 adev->gfx.config.max_shader_engines = 4;
1711                 adev->gfx.config.max_tile_pipes = 8;
1712                 adev->gfx.config.max_cu_per_sh = 8;
1713                 adev->gfx.config.max_sh_per_se = 1;
1714                 adev->gfx.config.max_backends_per_se = 2;
1715                 adev->gfx.config.max_texture_channel_caches = 8;
1716                 adev->gfx.config.max_gprs = 256;
1717                 adev->gfx.config.max_gs_threads = 32;
1718                 adev->gfx.config.max_hw_contexts = 8;
1719
1720                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1721                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1722                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1723                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1724                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1725                 break;
1726         case CHIP_CARRIZO:
1727                 adev->gfx.config.max_shader_engines = 1;
1728                 adev->gfx.config.max_tile_pipes = 2;
1729                 adev->gfx.config.max_sh_per_se = 1;
1730                 adev->gfx.config.max_backends_per_se = 2;
1731                 adev->gfx.config.max_cu_per_sh = 8;
1732                 adev->gfx.config.max_texture_channel_caches = 2;
1733                 adev->gfx.config.max_gprs = 256;
1734                 adev->gfx.config.max_gs_threads = 32;
1735                 adev->gfx.config.max_hw_contexts = 8;
1736
1737                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1738                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1739                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1740                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1741                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1742                 break;
1743         case CHIP_STONEY:
1744                 adev->gfx.config.max_shader_engines = 1;
1745                 adev->gfx.config.max_tile_pipes = 2;
1746                 adev->gfx.config.max_sh_per_se = 1;
1747                 adev->gfx.config.max_backends_per_se = 1;
1748                 adev->gfx.config.max_cu_per_sh = 3;
1749                 adev->gfx.config.max_texture_channel_caches = 2;
1750                 adev->gfx.config.max_gprs = 256;
1751                 adev->gfx.config.max_gs_threads = 16;
1752                 adev->gfx.config.max_hw_contexts = 8;
1753
1754                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1755                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1756                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1757                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1758                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1759                 break;
1760         default:
1761                 adev->gfx.config.max_shader_engines = 2;
1762                 adev->gfx.config.max_tile_pipes = 4;
1763                 adev->gfx.config.max_cu_per_sh = 2;
1764                 adev->gfx.config.max_sh_per_se = 1;
1765                 adev->gfx.config.max_backends_per_se = 2;
1766                 adev->gfx.config.max_texture_channel_caches = 4;
1767                 adev->gfx.config.max_gprs = 256;
1768                 adev->gfx.config.max_gs_threads = 32;
1769                 adev->gfx.config.max_hw_contexts = 8;
1770
1771                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1772                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1773                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1774                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1775                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1776                 break;
1777         }
1778
1779         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1780         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1781         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1782
1783         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1784         adev->gfx.config.mem_max_burst_length_bytes = 256;
1785         if (adev->flags & AMD_IS_APU) {
1786                 /* Get memory bank mapping mode. */
1787                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1788                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1789                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1790
1791                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1792                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1793                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1794
1795                 /* Validate settings in case only one DIMM installed. */
1796                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1797                         dimm00_addr_map = 0;
1798                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1799                         dimm01_addr_map = 0;
1800                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1801                         dimm10_addr_map = 0;
1802                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1803                         dimm11_addr_map = 0;
1804
1805                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1806                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1807                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1808                         adev->gfx.config.mem_row_size_in_kb = 2;
1809                 else
1810                         adev->gfx.config.mem_row_size_in_kb = 1;
1811         } else {
1812                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1813                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1814                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1815                         adev->gfx.config.mem_row_size_in_kb = 4;
1816         }
1817
1818         adev->gfx.config.shader_engine_tile_size = 32;
1819         adev->gfx.config.num_gpus = 1;
1820         adev->gfx.config.multi_gpu_tile_size = 64;
1821
1822         /* fix up row size */
1823         switch (adev->gfx.config.mem_row_size_in_kb) {
1824         case 1:
1825         default:
1826                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1827                 break;
1828         case 2:
1829                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1830                 break;
1831         case 4:
1832                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1833                 break;
1834         }
1835         adev->gfx.config.gb_addr_config = gb_addr_config;
1836
1837         return 0;
1838 }
1839
1840 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1841                                         int mec, int pipe, int queue)
1842 {
1843         int r;
1844         unsigned irq_type;
1845         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1846
1847         ring = &adev->gfx.compute_ring[ring_id];
1848
1849         /* mec0 is me1 */
1850         ring->me = mec + 1;
1851         ring->pipe = pipe;
1852         ring->queue = queue;
1853
1854         ring->ring_obj = NULL;
1855         ring->use_doorbell = true;
1856         ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
1857         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1858                                 + (ring_id * GFX8_MEC_HPD_SIZE);
1859         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1860
1861         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1862                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1863                 + ring->pipe;
1864
1865         /* type-2 packets are deprecated on MEC, use type-3 instead */
1866         r = amdgpu_ring_init(adev, ring, 1024,
1867                         &adev->gfx.eop_irq, irq_type);
1868         if (r)
1869                 return r;
1870
1871
1872         return 0;
1873 }
1874
1875 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
1876
1877 static int gfx_v8_0_sw_init(void *handle)
1878 {
1879         int i, j, k, r, ring_id;
1880         struct amdgpu_ring *ring;
1881         struct amdgpu_kiq *kiq;
1882         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1883
1884         switch (adev->asic_type) {
1885         case CHIP_TONGA:
1886         case CHIP_CARRIZO:
1887         case CHIP_FIJI:
1888         case CHIP_POLARIS10:
1889         case CHIP_POLARIS11:
1890         case CHIP_POLARIS12:
1891         case CHIP_VEGAM:
1892                 adev->gfx.mec.num_mec = 2;
1893                 break;
1894         case CHIP_TOPAZ:
1895         case CHIP_STONEY:
1896         default:
1897                 adev->gfx.mec.num_mec = 1;
1898                 break;
1899         }
1900
1901         adev->gfx.mec.num_pipe_per_mec = 4;
1902         adev->gfx.mec.num_queue_per_pipe = 8;
1903
1904         /* EOP Event */
1905         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
1906         if (r)
1907                 return r;
1908
1909         /* Privileged reg */
1910         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
1911                               &adev->gfx.priv_reg_irq);
1912         if (r)
1913                 return r;
1914
1915         /* Privileged inst */
1916         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
1917                               &adev->gfx.priv_inst_irq);
1918         if (r)
1919                 return r;
1920
1921         /* Add CP EDC/ECC irq  */
1922         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
1923                               &adev->gfx.cp_ecc_error_irq);
1924         if (r)
1925                 return r;
1926
1927         /* SQ interrupts. */
1928         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
1929                               &adev->gfx.sq_irq);
1930         if (r) {
1931                 DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
1932                 return r;
1933         }
1934
1935         INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
1936
1937         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1938
1939         gfx_v8_0_scratch_init(adev);
1940
1941         r = gfx_v8_0_init_microcode(adev);
1942         if (r) {
1943                 DRM_ERROR("Failed to load gfx firmware!\n");
1944                 return r;
1945         }
1946
1947         r = adev->gfx.rlc.funcs->init(adev);
1948         if (r) {
1949                 DRM_ERROR("Failed to init rlc BOs!\n");
1950                 return r;
1951         }
1952
1953         r = gfx_v8_0_mec_init(adev);
1954         if (r) {
1955                 DRM_ERROR("Failed to init MEC BOs!\n");
1956                 return r;
1957         }
1958
1959         /* set up the gfx ring */
1960         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1961                 ring = &adev->gfx.gfx_ring[i];
1962                 ring->ring_obj = NULL;
1963                 sprintf(ring->name, "gfx");
1964                 /* no gfx doorbells on iceland */
1965                 if (adev->asic_type != CHIP_TOPAZ) {
1966                         ring->use_doorbell = true;
1967                         ring->doorbell_index = adev->doorbell_index.gfx_ring0;
1968                 }
1969
1970                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
1971                                      AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
1972                 if (r)
1973                         return r;
1974         }
1975
1976
1977         /* set up the compute queues - allocate horizontally across pipes */
1978         ring_id = 0;
1979         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1980                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1981                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1982                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
1983                                         continue;
1984
1985                                 r = gfx_v8_0_compute_ring_init(adev,
1986                                                                 ring_id,
1987                                                                 i, k, j);
1988                                 if (r)
1989                                         return r;
1990
1991                                 ring_id++;
1992                         }
1993                 }
1994         }
1995
1996         r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
1997         if (r) {
1998                 DRM_ERROR("Failed to init KIQ BOs!\n");
1999                 return r;
2000         }
2001
2002         kiq = &adev->gfx.kiq;
2003         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2004         if (r)
2005                 return r;
2006
2007         /* create MQD for all compute queues as well as KIQ for SRIOV case */
2008         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2009         if (r)
2010                 return r;
2011
2012         adev->gfx.ce_ram_size = 0x8000;
2013
2014         r = gfx_v8_0_gpu_early_init(adev);
2015         if (r)
2016                 return r;
2017
2018         return 0;
2019 }
2020
2021 static int gfx_v8_0_sw_fini(void *handle)
2022 {
2023         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2024         int i;
2025
2026         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2027                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2028         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2029                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2030
2031         amdgpu_gfx_mqd_sw_fini(adev);
2032         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2033         amdgpu_gfx_kiq_fini(adev);
2034
2035         gfx_v8_0_mec_fini(adev);
2036         amdgpu_gfx_rlc_fini(adev);
2037         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2038                                 &adev->gfx.rlc.clear_state_gpu_addr,
2039                                 (void **)&adev->gfx.rlc.cs_ptr);
2040         if ((adev->asic_type == CHIP_CARRIZO) ||
2041             (adev->asic_type == CHIP_STONEY)) {
2042                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2043                                 &adev->gfx.rlc.cp_table_gpu_addr,
2044                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2045         }
2046         gfx_v8_0_free_microcode(adev);
2047
2048         return 0;
2049 }
2050
2051 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2052 {
2053         uint32_t *modearray, *mod2array;
2054         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2055         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2056         u32 reg_offset;
2057
2058         modearray = adev->gfx.config.tile_mode_array;
2059         mod2array = adev->gfx.config.macrotile_mode_array;
2060
2061         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2062                 modearray[reg_offset] = 0;
2063
2064         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2065                 mod2array[reg_offset] = 0;
2066
2067         switch (adev->asic_type) {
2068         case CHIP_TOPAZ:
2069                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2070                                 PIPE_CONFIG(ADDR_SURF_P2) |
2071                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2072                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2073                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2074                                 PIPE_CONFIG(ADDR_SURF_P2) |
2075                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2076                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2077                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2078                                 PIPE_CONFIG(ADDR_SURF_P2) |
2079                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2080                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2081                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2082                                 PIPE_CONFIG(ADDR_SURF_P2) |
2083                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2084                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2085                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2086                                 PIPE_CONFIG(ADDR_SURF_P2) |
2087                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2088                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2089                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2090                                 PIPE_CONFIG(ADDR_SURF_P2) |
2091                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2092                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2093                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2094                                 PIPE_CONFIG(ADDR_SURF_P2) |
2095                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2096                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2097                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2098                                 PIPE_CONFIG(ADDR_SURF_P2));
2099                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2100                                 PIPE_CONFIG(ADDR_SURF_P2) |
2101                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2102                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2103                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2104                                  PIPE_CONFIG(ADDR_SURF_P2) |
2105                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2106                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2107                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2108                                  PIPE_CONFIG(ADDR_SURF_P2) |
2109                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2110                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2111                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2112                                  PIPE_CONFIG(ADDR_SURF_P2) |
2113                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2114                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2115                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2116                                  PIPE_CONFIG(ADDR_SURF_P2) |
2117                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2118                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2119                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2120                                  PIPE_CONFIG(ADDR_SURF_P2) |
2121                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2122                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2123                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2124                                  PIPE_CONFIG(ADDR_SURF_P2) |
2125                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2126                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2127                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2128                                  PIPE_CONFIG(ADDR_SURF_P2) |
2129                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2130                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2131                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2132                                  PIPE_CONFIG(ADDR_SURF_P2) |
2133                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2134                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2135                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2136                                  PIPE_CONFIG(ADDR_SURF_P2) |
2137                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2138                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2139                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2140                                  PIPE_CONFIG(ADDR_SURF_P2) |
2141                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2142                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2143                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2144                                  PIPE_CONFIG(ADDR_SURF_P2) |
2145                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2146                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2147                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2148                                  PIPE_CONFIG(ADDR_SURF_P2) |
2149                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2150                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2151                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2152                                  PIPE_CONFIG(ADDR_SURF_P2) |
2153                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2154                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2155                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2156                                  PIPE_CONFIG(ADDR_SURF_P2) |
2157                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2158                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2159                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2160                                  PIPE_CONFIG(ADDR_SURF_P2) |
2161                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2162                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2163                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2164                                  PIPE_CONFIG(ADDR_SURF_P2) |
2165                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2166                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2167                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2168                                  PIPE_CONFIG(ADDR_SURF_P2) |
2169                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2170                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2171
2172                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2173                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2174                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2175                                 NUM_BANKS(ADDR_SURF_8_BANK));
2176                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2177                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2178                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2179                                 NUM_BANKS(ADDR_SURF_8_BANK));
2180                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2181                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2182                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2183                                 NUM_BANKS(ADDR_SURF_8_BANK));
2184                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2185                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2186                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2187                                 NUM_BANKS(ADDR_SURF_8_BANK));
2188                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2189                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2190                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2191                                 NUM_BANKS(ADDR_SURF_8_BANK));
2192                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2193                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2194                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2195                                 NUM_BANKS(ADDR_SURF_8_BANK));
2196                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2197                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2198                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2199                                 NUM_BANKS(ADDR_SURF_8_BANK));
2200                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2201                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2202                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2203                                 NUM_BANKS(ADDR_SURF_16_BANK));
2204                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2205                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2206                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2207                                 NUM_BANKS(ADDR_SURF_16_BANK));
2208                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2209                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2210                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2211                                  NUM_BANKS(ADDR_SURF_16_BANK));
2212                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2213                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2214                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2215                                  NUM_BANKS(ADDR_SURF_16_BANK));
2216                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2217                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2218                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2219                                  NUM_BANKS(ADDR_SURF_16_BANK));
2220                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2221                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2222                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2223                                  NUM_BANKS(ADDR_SURF_16_BANK));
2224                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2225                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2226                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2227                                  NUM_BANKS(ADDR_SURF_8_BANK));
2228
2229                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2230                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2231                             reg_offset != 23)
2232                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2233
2234                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2235                         if (reg_offset != 7)
2236                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2237
2238                 break;
2239         case CHIP_FIJI:
2240         case CHIP_VEGAM:
2241                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2242                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2243                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2244                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2245                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2246                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2247                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2248                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2249                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2250                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2251                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2252                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2253                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2254                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2255                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2256                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2257                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2258                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2259                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2260                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2261                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2262                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2263                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2264                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2265                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2266                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2267                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2268                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2269                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2270                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2271                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2272                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2273                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2274                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2275                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2276                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2277                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2278                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2279                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2280                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2281                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2282                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2283                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2284                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2285                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2286                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2287                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2288                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2289                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2290                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2291                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2292                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2293                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2294                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2295                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2296                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2297                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2298                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2299                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2300                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2301                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2302                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2303                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2304                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2305                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2306                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2307                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2308                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2309                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2310                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2311                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2312                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2313                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2314                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2315                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2316                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2317                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2318                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2319                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2320                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2321                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2322                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2323                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2324                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2325                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2326                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2327                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2328                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2329                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2330                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2331                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2332                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2333                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2334                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2335                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2336                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2337                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2338                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2339                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2340                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2341                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2342                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2343                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2344                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2345                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2346                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2347                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2348                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2349                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2350                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2351                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2352                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2353                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2354                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2355                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2356                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2357                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2358                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2359                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2360                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2361                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2362                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2363
2364                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2365                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2366                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2367                                 NUM_BANKS(ADDR_SURF_8_BANK));
2368                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2369                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2370                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2371                                 NUM_BANKS(ADDR_SURF_8_BANK));
2372                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2373                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2374                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2375                                 NUM_BANKS(ADDR_SURF_8_BANK));
2376                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2377                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2378                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2379                                 NUM_BANKS(ADDR_SURF_8_BANK));
2380                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2381                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2382                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2383                                 NUM_BANKS(ADDR_SURF_8_BANK));
2384                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2385                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2386                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2387                                 NUM_BANKS(ADDR_SURF_8_BANK));
2388                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2389                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2390                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2391                                 NUM_BANKS(ADDR_SURF_8_BANK));
2392                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2393                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2394                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2395                                 NUM_BANKS(ADDR_SURF_8_BANK));
2396                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2397                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2398                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2399                                 NUM_BANKS(ADDR_SURF_8_BANK));
2400                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2401                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2402                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2403                                  NUM_BANKS(ADDR_SURF_8_BANK));
2404                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2405                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2406                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2407                                  NUM_BANKS(ADDR_SURF_8_BANK));
2408                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2409                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2410                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2411                                  NUM_BANKS(ADDR_SURF_8_BANK));
2412                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2413                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2414                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2415                                  NUM_BANKS(ADDR_SURF_8_BANK));
2416                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2417                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2418                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2419                                  NUM_BANKS(ADDR_SURF_4_BANK));
2420
2421                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2422                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2423
2424                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2425                         if (reg_offset != 7)
2426                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2427
2428                 break;
2429         case CHIP_TONGA:
2430                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2431                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2432                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2433                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2434                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2435                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2436                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2437                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2438                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2439                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2440                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2441                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2442                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2443                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2444                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2445                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2446                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2447                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2448                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2449                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2450                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2451                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2452                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2453                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2454                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2455                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2456                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2457                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2458                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2459                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2460                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2461                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2462                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2463                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2464                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2465                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2466                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2467                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2468                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2469                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2470                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2471                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2472                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2473                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2474                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2475                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2476                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2477                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2478                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2479                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2480                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2481                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2482                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2483                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2484                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2485                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2486                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2487                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2488                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2489                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2490                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2491                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2492                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2493                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2494                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2495                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2496                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2497                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2498                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2499                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2500                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2501                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2502                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2503                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2504                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2505                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2506                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2507                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2508                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2509                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2510                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2511                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2512                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2513                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2514                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2515                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2516                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2517                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2518                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2519                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2520                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2521                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2522                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2523                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2524                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2525                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2526                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2527                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2528                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2529                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2530                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2531                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2532                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2533                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2534                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2535                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2536                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2537                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2538                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2539                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2540                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2541                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2542                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2543                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2544                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2545                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2546                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2547                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2548                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2549                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2550                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2551                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2552
2553                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2554                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2555                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2556                                 NUM_BANKS(ADDR_SURF_16_BANK));
2557                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2558                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2559                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2560                                 NUM_BANKS(ADDR_SURF_16_BANK));
2561                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2562                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2563                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2564                                 NUM_BANKS(ADDR_SURF_16_BANK));
2565                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2566                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2567                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2568                                 NUM_BANKS(ADDR_SURF_16_BANK));
2569                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2570                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2571                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2572                                 NUM_BANKS(ADDR_SURF_16_BANK));
2573                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2574                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2575                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2576                                 NUM_BANKS(ADDR_SURF_16_BANK));
2577                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2578                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2579                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2580                                 NUM_BANKS(ADDR_SURF_16_BANK));
2581                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2582                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2583                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2584                                 NUM_BANKS(ADDR_SURF_16_BANK));
2585                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2586                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2587                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2588                                 NUM_BANKS(ADDR_SURF_16_BANK));
2589                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2590                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2591                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2592                                  NUM_BANKS(ADDR_SURF_16_BANK));
2593                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2594                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2595                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2596                                  NUM_BANKS(ADDR_SURF_16_BANK));
2597                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2598                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2599                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2600                                  NUM_BANKS(ADDR_SURF_8_BANK));
2601                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2602                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2603                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2604                                  NUM_BANKS(ADDR_SURF_4_BANK));
2605                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2606                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2607                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2608                                  NUM_BANKS(ADDR_SURF_4_BANK));
2609
2610                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2611                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2612
2613                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2614                         if (reg_offset != 7)
2615                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2616
2617                 break;
2618         case CHIP_POLARIS11:
2619         case CHIP_POLARIS12:
2620                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2621                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2622                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2623                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2624                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2625                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2626                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2627                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2628                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2629                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2630                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2631                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2632                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2633                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2634                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2635                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2636                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2637                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2638                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2639                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2640                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2641                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2642                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2643                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2644                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2645                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2646                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2647                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2648                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2649                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2650                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2651                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2652                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2653                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2654                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2655                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2656                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2657                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2658                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2659                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2660                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2661                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2662                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2663                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2664                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2665                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2666                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2667                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2668                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2669                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2670                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2671                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2672                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2673                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2674                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2675                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2676                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2677                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2678                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2679                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2680                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2681                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2682                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2683                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2684                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2685                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2686                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2687                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2688                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2689                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2690                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2691                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2692                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2693                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2694                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2695                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2696                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2697                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2698                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2699                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2700                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2701                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2702                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2703                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2704                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2705                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2706                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2707                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2708                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2709                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2710                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2711                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2712                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2713                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2714                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2715                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2716                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2717                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2718                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2719                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2720                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2721                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2722                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2723                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2724                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2725                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2726                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2727                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2728                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2729                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2730                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2731                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2732                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2733                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2734                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2735                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2736                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2737                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2738                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2739                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2740                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2741                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2742
2743                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2744                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2745                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2746                                 NUM_BANKS(ADDR_SURF_16_BANK));
2747
2748                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2749                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2750                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2751                                 NUM_BANKS(ADDR_SURF_16_BANK));
2752
2753                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2754                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2755                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2756                                 NUM_BANKS(ADDR_SURF_16_BANK));
2757
2758                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2759                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2760                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2761                                 NUM_BANKS(ADDR_SURF_16_BANK));
2762
2763                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2764                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2765                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2766                                 NUM_BANKS(ADDR_SURF_16_BANK));
2767
2768                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2769                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2770                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2771                                 NUM_BANKS(ADDR_SURF_16_BANK));
2772
2773                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2774                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2775                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2776                                 NUM_BANKS(ADDR_SURF_16_BANK));
2777
2778                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2779                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2780                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2781                                 NUM_BANKS(ADDR_SURF_16_BANK));
2782
2783                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2784                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2785                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2786                                 NUM_BANKS(ADDR_SURF_16_BANK));
2787
2788                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2789                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2790                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2791                                 NUM_BANKS(ADDR_SURF_16_BANK));
2792
2793                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2794                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2795                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2796                                 NUM_BANKS(ADDR_SURF_16_BANK));
2797
2798                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2799                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2800                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2801                                 NUM_BANKS(ADDR_SURF_16_BANK));
2802
2803                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2804                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2805                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2806                                 NUM_BANKS(ADDR_SURF_8_BANK));
2807
2808                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2809                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2810                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2811                                 NUM_BANKS(ADDR_SURF_4_BANK));
2812
2813                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2814                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2815
2816                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2817                         if (reg_offset != 7)
2818                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2819
2820                 break;
2821         case CHIP_POLARIS10:
2822                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2823                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2824                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2825                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2826                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2827                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2828                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2829                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2830                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2831                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2832                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2833                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2834                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2835                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2836                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2837                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2838                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2839                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2840                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2841                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2842                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2843                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2844                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2845                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2846                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2847                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2848                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2849                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2850                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2851                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2852                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2853                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2854                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2855                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2856                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2857                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2858                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2859                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2860                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2861                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2862                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2863                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2864                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2865                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2866                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2867                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2868                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2869                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2870                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2871                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2872                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2873                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2874                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2875                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2876                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2877                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2878                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2879                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2880                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2881                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2882                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2883                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2884                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2885                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2886                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2887                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2888                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2889                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2890                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2891                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2892                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2893                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2894                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2895                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2896                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2897                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2898                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2899                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2900                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2901                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2902                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2903                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2904                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2905                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2906                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2907                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2908                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2909                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2910                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2911                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2912                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2913                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2914                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2915                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2916                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2917                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2918                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2919                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2920                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2921                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2922                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2923                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2924                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2925                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2926                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2927                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2928                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2929                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2930                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2931                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2932                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2933                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2934                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2935                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2936                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2937                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2938                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2939                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2940                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2941                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2942                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2943                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2944
2945                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2946                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2947                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2948                                 NUM_BANKS(ADDR_SURF_16_BANK));
2949
2950                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2951                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2952                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2953                                 NUM_BANKS(ADDR_SURF_16_BANK));
2954
2955                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2956                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2957                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2958                                 NUM_BANKS(ADDR_SURF_16_BANK));
2959
2960                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2961                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2962                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2963                                 NUM_BANKS(ADDR_SURF_16_BANK));
2964
2965                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2966                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2967                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2968                                 NUM_BANKS(ADDR_SURF_16_BANK));
2969
2970                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2971                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2972                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2973                                 NUM_BANKS(ADDR_SURF_16_BANK));
2974
2975                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2976                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2977                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2978                                 NUM_BANKS(ADDR_SURF_16_BANK));
2979
2980                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2981                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2982                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2983                                 NUM_BANKS(ADDR_SURF_16_BANK));
2984
2985                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2986                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2987                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2988                                 NUM_BANKS(ADDR_SURF_16_BANK));
2989
2990                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2991                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2992                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2993                                 NUM_BANKS(ADDR_SURF_16_BANK));
2994
2995                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2996                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2997                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2998                                 NUM_BANKS(ADDR_SURF_16_BANK));
2999
3000                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3001                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3002                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3003                                 NUM_BANKS(ADDR_SURF_8_BANK));
3004
3005                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3006                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3007                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3008                                 NUM_BANKS(ADDR_SURF_4_BANK));
3009
3010                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3011                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3012                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3013                                 NUM_BANKS(ADDR_SURF_4_BANK));
3014
3015                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3016                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3017
3018                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3019                         if (reg_offset != 7)
3020                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3021
3022                 break;
3023         case CHIP_STONEY:
3024                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3025                                 PIPE_CONFIG(ADDR_SURF_P2) |
3026                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3027                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3028                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3029                                 PIPE_CONFIG(ADDR_SURF_P2) |
3030                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3031                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3032                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3033                                 PIPE_CONFIG(ADDR_SURF_P2) |
3034                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3035                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3036                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3037                                 PIPE_CONFIG(ADDR_SURF_P2) |
3038                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3039                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3040                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3041                                 PIPE_CONFIG(ADDR_SURF_P2) |
3042                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3043                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3044                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3045                                 PIPE_CONFIG(ADDR_SURF_P2) |
3046                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3047                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3048                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3049                                 PIPE_CONFIG(ADDR_SURF_P2) |
3050                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3051                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3052                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3053                                 PIPE_CONFIG(ADDR_SURF_P2));
3054                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3055                                 PIPE_CONFIG(ADDR_SURF_P2) |
3056                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3057                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3058                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3059                                  PIPE_CONFIG(ADDR_SURF_P2) |
3060                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3061                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3062                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3063                                  PIPE_CONFIG(ADDR_SURF_P2) |
3064                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3065                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3066                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3067                                  PIPE_CONFIG(ADDR_SURF_P2) |
3068                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3069                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3070                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3071                                  PIPE_CONFIG(ADDR_SURF_P2) |
3072                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3073                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3074                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3075                                  PIPE_CONFIG(ADDR_SURF_P2) |
3076                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3077                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3078                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3079                                  PIPE_CONFIG(ADDR_SURF_P2) |
3080                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3081                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3082                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3083                                  PIPE_CONFIG(ADDR_SURF_P2) |
3084                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3085                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3086                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3087                                  PIPE_CONFIG(ADDR_SURF_P2) |
3088                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3089                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3090                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3091                                  PIPE_CONFIG(ADDR_SURF_P2) |
3092                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3093                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3094                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3095                                  PIPE_CONFIG(ADDR_SURF_P2) |
3096                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3097                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3098                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3099                                  PIPE_CONFIG(ADDR_SURF_P2) |
3100                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3101                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3102                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3103                                  PIPE_CONFIG(ADDR_SURF_P2) |
3104                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3105                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3106                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3107                                  PIPE_CONFIG(ADDR_SURF_P2) |
3108                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3109                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3110                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3111                                  PIPE_CONFIG(ADDR_SURF_P2) |
3112                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3113                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3114                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3115                                  PIPE_CONFIG(ADDR_SURF_P2) |
3116                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3117                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3118                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3119                                  PIPE_CONFIG(ADDR_SURF_P2) |
3120                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3121                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3122                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3123                                  PIPE_CONFIG(ADDR_SURF_P2) |
3124                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3125                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3126
3127                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3128                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3129                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3130                                 NUM_BANKS(ADDR_SURF_8_BANK));
3131                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3132                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3133                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3134                                 NUM_BANKS(ADDR_SURF_8_BANK));
3135                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3136                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3137                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3138                                 NUM_BANKS(ADDR_SURF_8_BANK));
3139                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3140                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3141                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3142                                 NUM_BANKS(ADDR_SURF_8_BANK));
3143                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3144                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3145                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3146                                 NUM_BANKS(ADDR_SURF_8_BANK));
3147                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3148                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3149                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3150                                 NUM_BANKS(ADDR_SURF_8_BANK));
3151                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3152                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3153                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3154                                 NUM_BANKS(ADDR_SURF_8_BANK));
3155                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3156                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3157                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3158                                 NUM_BANKS(ADDR_SURF_16_BANK));
3159                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3160                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3161                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3162                                 NUM_BANKS(ADDR_SURF_16_BANK));
3163                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3164                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3165                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3166                                  NUM_BANKS(ADDR_SURF_16_BANK));
3167                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3168                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3169                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3170                                  NUM_BANKS(ADDR_SURF_16_BANK));
3171                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3172                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3173                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3174                                  NUM_BANKS(ADDR_SURF_16_BANK));
3175                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3176                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3177                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3178                                  NUM_BANKS(ADDR_SURF_16_BANK));
3179                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3180                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3181                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3182                                  NUM_BANKS(ADDR_SURF_8_BANK));
3183
3184                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3185                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3186                             reg_offset != 23)
3187                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3188
3189                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3190                         if (reg_offset != 7)
3191                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3192
3193                 break;
3194         default:
3195                 dev_warn(adev->dev,
3196                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3197                          adev->asic_type);
3198                 /* fall through */
3199
3200         case CHIP_CARRIZO:
3201                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3202                                 PIPE_CONFIG(ADDR_SURF_P2) |
3203                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3204                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3205                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3206                                 PIPE_CONFIG(ADDR_SURF_P2) |
3207                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3208                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3209                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3210                                 PIPE_CONFIG(ADDR_SURF_P2) |
3211                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3212                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3213                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3214                                 PIPE_CONFIG(ADDR_SURF_P2) |
3215                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3216                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3217                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3218                                 PIPE_CONFIG(ADDR_SURF_P2) |
3219                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3220                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3221                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3222                                 PIPE_CONFIG(ADDR_SURF_P2) |
3223                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3224                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3225                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3226                                 PIPE_CONFIG(ADDR_SURF_P2) |
3227                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3228                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3229                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3230                                 PIPE_CONFIG(ADDR_SURF_P2));
3231                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3232                                 PIPE_CONFIG(ADDR_SURF_P2) |
3233                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3234                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3235                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3236                                  PIPE_CONFIG(ADDR_SURF_P2) |
3237                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3238                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3239                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3240                                  PIPE_CONFIG(ADDR_SURF_P2) |
3241                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3242                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3243                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3244                                  PIPE_CONFIG(ADDR_SURF_P2) |
3245                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3246                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3247                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3248                                  PIPE_CONFIG(ADDR_SURF_P2) |
3249                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3250                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3251                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3252                                  PIPE_CONFIG(ADDR_SURF_P2) |
3253                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3254                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3255                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3256                                  PIPE_CONFIG(ADDR_SURF_P2) |
3257                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3258                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3259                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3260                                  PIPE_CONFIG(ADDR_SURF_P2) |
3261                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3262                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3263                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3264                                  PIPE_CONFIG(ADDR_SURF_P2) |
3265                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3266                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3267                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3268                                  PIPE_CONFIG(ADDR_SURF_P2) |
3269                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3270                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3271                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3272                                  PIPE_CONFIG(ADDR_SURF_P2) |
3273                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3274                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3275                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3276                                  PIPE_CONFIG(ADDR_SURF_P2) |
3277                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3278                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3279                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3280                                  PIPE_CONFIG(ADDR_SURF_P2) |
3281                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3282                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3283                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3284                                  PIPE_CONFIG(ADDR_SURF_P2) |
3285                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3286                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3287                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3288                                  PIPE_CONFIG(ADDR_SURF_P2) |
3289                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3290                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3291                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3292                                  PIPE_CONFIG(ADDR_SURF_P2) |
3293                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3294                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3295                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3296                                  PIPE_CONFIG(ADDR_SURF_P2) |
3297                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3298                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3299                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3300                                  PIPE_CONFIG(ADDR_SURF_P2) |
3301                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3302                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3303
3304                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3305                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3306                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3307                                 NUM_BANKS(ADDR_SURF_8_BANK));
3308                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3309                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3310                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3311                                 NUM_BANKS(ADDR_SURF_8_BANK));
3312                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3313                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3314                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3315                                 NUM_BANKS(ADDR_SURF_8_BANK));
3316                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3317                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3318                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3319                                 NUM_BANKS(ADDR_SURF_8_BANK));
3320                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3321                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3322                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3323                                 NUM_BANKS(ADDR_SURF_8_BANK));
3324                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3325                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3326                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3327                                 NUM_BANKS(ADDR_SURF_8_BANK));
3328                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3329                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3330                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3331                                 NUM_BANKS(ADDR_SURF_8_BANK));
3332                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3333                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3334                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3335                                 NUM_BANKS(ADDR_SURF_16_BANK));
3336                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3337                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3338                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3339                                 NUM_BANKS(ADDR_SURF_16_BANK));
3340                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3341                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3342                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3343                                  NUM_BANKS(ADDR_SURF_16_BANK));
3344                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3345                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3346                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3347                                  NUM_BANKS(ADDR_SURF_16_BANK));
3348                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3349                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3350                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3351                                  NUM_BANKS(ADDR_SURF_16_BANK));
3352                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3353                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3354                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3355                                  NUM_BANKS(ADDR_SURF_16_BANK));
3356                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3357                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3358                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3359                                  NUM_BANKS(ADDR_SURF_8_BANK));
3360
3361                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3362                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3363                             reg_offset != 23)
3364                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3365
3366                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3367                         if (reg_offset != 7)
3368                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3369
3370                 break;
3371         }
3372 }
3373
3374 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3375                                   u32 se_num, u32 sh_num, u32 instance)
3376 {
3377         u32 data;
3378
3379         if (instance == 0xffffffff)
3380                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3381         else
3382                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3383
3384         if (se_num == 0xffffffff)
3385                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3386         else
3387                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3388
3389         if (sh_num == 0xffffffff)
3390                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3391         else
3392                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3393
3394         WREG32(mmGRBM_GFX_INDEX, data);
3395 }
3396
3397 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3398                                   u32 me, u32 pipe, u32 q, u32 vm)
3399 {
3400         vi_srbm_select(adev, me, pipe, q, vm);
3401 }
3402
3403 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3404 {
3405         u32 data, mask;
3406
3407         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3408                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3409
3410         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3411
3412         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3413                                          adev->gfx.config.max_sh_per_se);
3414
3415         return (~data) & mask;
3416 }
3417
3418 static void
3419 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3420 {
3421         switch (adev->asic_type) {
3422         case CHIP_FIJI:
3423         case CHIP_VEGAM:
3424                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3425                           RB_XSEL2(1) | PKR_MAP(2) |
3426                           PKR_XSEL(1) | PKR_YSEL(1) |
3427                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3428                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3429                            SE_PAIR_YSEL(2);
3430                 break;
3431         case CHIP_TONGA:
3432         case CHIP_POLARIS10:
3433                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3434                           SE_XSEL(1) | SE_YSEL(1);
3435                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3436                            SE_PAIR_YSEL(2);
3437                 break;
3438         case CHIP_TOPAZ:
3439         case CHIP_CARRIZO:
3440                 *rconf |= RB_MAP_PKR0(2);
3441                 *rconf1 |= 0x0;
3442                 break;
3443         case CHIP_POLARIS11:
3444         case CHIP_POLARIS12:
3445                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3446                           SE_XSEL(1) | SE_YSEL(1);
3447                 *rconf1 |= 0x0;
3448                 break;
3449         case CHIP_STONEY:
3450                 *rconf |= 0x0;
3451                 *rconf1 |= 0x0;
3452                 break;
3453         default:
3454                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3455                 break;
3456         }
3457 }
3458
3459 static void
3460 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3461                                         u32 raster_config, u32 raster_config_1,
3462                                         unsigned rb_mask, unsigned num_rb)
3463 {
3464         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3465         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3466         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3467         unsigned rb_per_se = num_rb / num_se;
3468         unsigned se_mask[4];
3469         unsigned se;
3470
3471         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3472         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3473         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3474         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3475
3476         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3477         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3478         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3479
3480         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3481                              (!se_mask[2] && !se_mask[3]))) {
3482                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3483
3484                 if (!se_mask[0] && !se_mask[1]) {
3485                         raster_config_1 |=
3486                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3487                 } else {
3488                         raster_config_1 |=
3489                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3490                 }
3491         }
3492
3493         for (se = 0; se < num_se; se++) {
3494                 unsigned raster_config_se = raster_config;
3495                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3496                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3497                 int idx = (se / 2) * 2;
3498
3499                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3500                         raster_config_se &= ~SE_MAP_MASK;
3501
3502                         if (!se_mask[idx]) {
3503                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3504                         } else {
3505                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3506                         }
3507                 }
3508
3509                 pkr0_mask &= rb_mask;
3510                 pkr1_mask &= rb_mask;
3511                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3512                         raster_config_se &= ~PKR_MAP_MASK;
3513
3514                         if (!pkr0_mask) {
3515                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3516                         } else {
3517                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3518                         }
3519                 }
3520
3521                 if (rb_per_se >= 2) {
3522                         unsigned rb0_mask = 1 << (se * rb_per_se);
3523                         unsigned rb1_mask = rb0_mask << 1;
3524
3525                         rb0_mask &= rb_mask;
3526                         rb1_mask &= rb_mask;
3527                         if (!rb0_mask || !rb1_mask) {
3528                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3529
3530                                 if (!rb0_mask) {
3531                                         raster_config_se |=
3532                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3533                                 } else {
3534                                         raster_config_se |=
3535                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3536                                 }
3537                         }
3538
3539                         if (rb_per_se > 2) {
3540                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3541                                 rb1_mask = rb0_mask << 1;
3542                                 rb0_mask &= rb_mask;
3543                                 rb1_mask &= rb_mask;
3544                                 if (!rb0_mask || !rb1_mask) {
3545                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3546
3547                                         if (!rb0_mask) {
3548                                                 raster_config_se |=
3549                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3550                                         } else {
3551                                                 raster_config_se |=
3552                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3553                                         }
3554                                 }
3555                         }
3556                 }
3557
3558                 /* GRBM_GFX_INDEX has a different offset on VI */
3559                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3560                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3561                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3562         }
3563
3564         /* GRBM_GFX_INDEX has a different offset on VI */
3565         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3566 }
3567
3568 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3569 {
3570         int i, j;
3571         u32 data;
3572         u32 raster_config = 0, raster_config_1 = 0;
3573         u32 active_rbs = 0;
3574         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3575                                         adev->gfx.config.max_sh_per_se;
3576         unsigned num_rb_pipes;
3577
3578         mutex_lock(&adev->grbm_idx_mutex);
3579         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3580                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3581                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3582                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3583                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3584                                                rb_bitmap_width_per_sh);
3585                 }
3586         }
3587         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3588
3589         adev->gfx.config.backend_enable_mask = active_rbs;
3590         adev->gfx.config.num_rbs = hweight32(active_rbs);
3591
3592         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3593                              adev->gfx.config.max_shader_engines, 16);
3594
3595         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3596
3597         if (!adev->gfx.config.backend_enable_mask ||
3598                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3599                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3600                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3601         } else {
3602                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3603                                                         adev->gfx.config.backend_enable_mask,
3604                                                         num_rb_pipes);
3605         }
3606
3607         /* cache the values for userspace */
3608         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3609                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3610                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3611                         adev->gfx.config.rb_config[i][j].rb_backend_disable =
3612                                 RREG32(mmCC_RB_BACKEND_DISABLE);
3613                         adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3614                                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3615                         adev->gfx.config.rb_config[i][j].raster_config =
3616                                 RREG32(mmPA_SC_RASTER_CONFIG);
3617                         adev->gfx.config.rb_config[i][j].raster_config_1 =
3618                                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3619                 }
3620         }
3621         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3622         mutex_unlock(&adev->grbm_idx_mutex);
3623 }
3624
3625 /**
3626  * gfx_v8_0_init_compute_vmid - gart enable
3627  *
3628  * @adev: amdgpu_device pointer
3629  *
3630  * Initialize compute vmid sh_mem registers
3631  *
3632  */
3633 #define DEFAULT_SH_MEM_BASES    (0x6000)
3634 #define FIRST_COMPUTE_VMID      (8)
3635 #define LAST_COMPUTE_VMID       (16)
3636 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3637 {
3638         int i;
3639         uint32_t sh_mem_config;
3640         uint32_t sh_mem_bases;
3641
3642         /*
3643          * Configure apertures:
3644          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3645          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3646          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3647          */
3648         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3649
3650         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3651                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3652                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3653                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3654                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3655                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3656
3657         mutex_lock(&adev->srbm_mutex);
3658         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3659                 vi_srbm_select(adev, 0, 0, 0, i);
3660                 /* CP and shaders */
3661                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3662                 WREG32(mmSH_MEM_APE1_BASE, 1);
3663                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3664                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3665         }
3666         vi_srbm_select(adev, 0, 0, 0, 0);
3667         mutex_unlock(&adev->srbm_mutex);
3668
3669         /* Initialize all compute VMIDs to have no GDS, GWS, or OA
3670            acccess. These should be enabled by FW for target VMIDs. */
3671         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3672                 WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
3673                 WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
3674                 WREG32(amdgpu_gds_reg_offset[i].gws, 0);
3675                 WREG32(amdgpu_gds_reg_offset[i].oa, 0);
3676         }
3677 }
3678
3679 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3680 {
3681         switch (adev->asic_type) {
3682         default:
3683                 adev->gfx.config.double_offchip_lds_buf = 1;
3684                 break;
3685         case CHIP_CARRIZO:
3686         case CHIP_STONEY:
3687                 adev->gfx.config.double_offchip_lds_buf = 0;
3688                 break;
3689         }
3690 }
3691
3692 static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
3693 {
3694         u32 tmp, sh_static_mem_cfg;
3695         int i;
3696
3697         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3698         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3699         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3700         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3701
3702         gfx_v8_0_tiling_mode_table_init(adev);
3703         gfx_v8_0_setup_rb(adev);
3704         gfx_v8_0_get_cu_info(adev);
3705         gfx_v8_0_config_init(adev);
3706
3707         /* XXX SH_MEM regs */
3708         /* where to put LDS, scratch, GPUVM in FSA64 space */
3709         sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3710                                    SWIZZLE_ENABLE, 1);
3711         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3712                                    ELEMENT_SIZE, 1);
3713         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3714                                    INDEX_STRIDE, 3);
3715         WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3716
3717         mutex_lock(&adev->srbm_mutex);
3718         for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3719                 vi_srbm_select(adev, 0, 0, 0, i);
3720                 /* CP and shaders */
3721                 if (i == 0) {
3722                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3723                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3724                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3725                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3726                         WREG32(mmSH_MEM_CONFIG, tmp);
3727                         WREG32(mmSH_MEM_BASES, 0);
3728                 } else {
3729                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3730                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3731                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3732                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3733                         WREG32(mmSH_MEM_CONFIG, tmp);
3734                         tmp = adev->gmc.shared_aperture_start >> 48;
3735                         WREG32(mmSH_MEM_BASES, tmp);
3736                 }
3737
3738                 WREG32(mmSH_MEM_APE1_BASE, 1);
3739                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3740         }
3741         vi_srbm_select(adev, 0, 0, 0, 0);
3742         mutex_unlock(&adev->srbm_mutex);
3743
3744         gfx_v8_0_init_compute_vmid(adev);
3745
3746         mutex_lock(&adev->grbm_idx_mutex);
3747         /*
3748          * making sure that the following register writes will be broadcasted
3749          * to all the shaders
3750          */
3751         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3752
3753         WREG32(mmPA_SC_FIFO_SIZE,
3754                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3755                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3756                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3757                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3758                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3759                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3760                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3761                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3762
3763         tmp = RREG32(mmSPI_ARB_PRIORITY);
3764         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3765         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3766         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3767         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3768         WREG32(mmSPI_ARB_PRIORITY, tmp);
3769
3770         mutex_unlock(&adev->grbm_idx_mutex);
3771
3772 }
3773
3774 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3775 {
3776         u32 i, j, k;
3777         u32 mask;
3778
3779         mutex_lock(&adev->grbm_idx_mutex);
3780         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3781                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3782                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3783                         for (k = 0; k < adev->usec_timeout; k++) {
3784                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3785                                         break;
3786                                 udelay(1);
3787                         }
3788                         if (k == adev->usec_timeout) {
3789                                 gfx_v8_0_select_se_sh(adev, 0xffffffff,
3790                                                       0xffffffff, 0xffffffff);
3791                                 mutex_unlock(&adev->grbm_idx_mutex);
3792                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3793                                          i, j);
3794                                 return;
3795                         }
3796                 }
3797         }
3798         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3799         mutex_unlock(&adev->grbm_idx_mutex);
3800
3801         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3802                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3803                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3804                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3805         for (k = 0; k < adev->usec_timeout; k++) {
3806                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3807                         break;
3808                 udelay(1);
3809         }
3810 }
3811
3812 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3813                                                bool enable)
3814 {
3815         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3816
3817         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3818         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3819         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3820         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3821
3822         WREG32(mmCP_INT_CNTL_RING0, tmp);
3823 }
3824
3825 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3826 {
3827         /* csib */
3828         WREG32(mmRLC_CSIB_ADDR_HI,
3829                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3830         WREG32(mmRLC_CSIB_ADDR_LO,
3831                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3832         WREG32(mmRLC_CSIB_LENGTH,
3833                         adev->gfx.rlc.clear_state_size);
3834 }
3835
3836 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3837                                 int ind_offset,
3838                                 int list_size,
3839                                 int *unique_indices,
3840                                 int *indices_count,
3841                                 int max_indices,
3842                                 int *ind_start_offsets,
3843                                 int *offset_count,
3844                                 int max_offset)
3845 {
3846         int indices;
3847         bool new_entry = true;
3848
3849         for (; ind_offset < list_size; ind_offset++) {
3850
3851                 if (new_entry) {
3852                         new_entry = false;
3853                         ind_start_offsets[*offset_count] = ind_offset;
3854                         *offset_count = *offset_count + 1;
3855                         BUG_ON(*offset_count >= max_offset);
3856                 }
3857
3858                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3859                         new_entry = true;
3860                         continue;
3861                 }
3862
3863                 ind_offset += 2;
3864
3865                 /* look for the matching indice */
3866                 for (indices = 0;
3867                         indices < *indices_count;
3868                         indices++) {
3869                         if (unique_indices[indices] ==
3870                                 register_list_format[ind_offset])
3871                                 break;
3872                 }
3873
3874                 if (indices >= *indices_count) {
3875                         unique_indices[*indices_count] =
3876                                 register_list_format[ind_offset];
3877                         indices = *indices_count;
3878                         *indices_count = *indices_count + 1;
3879                         BUG_ON(*indices_count >= max_indices);
3880                 }
3881
3882                 register_list_format[ind_offset] = indices;
3883         }
3884 }
3885
3886 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3887 {
3888         int i, temp, data;
3889         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3890         int indices_count = 0;
3891         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3892         int offset_count = 0;
3893
3894         int list_size;
3895         unsigned int *register_list_format =
3896                 kmemdup(adev->gfx.rlc.register_list_format,
3897                         adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3898         if (!register_list_format)
3899                 return -ENOMEM;
3900
3901         gfx_v8_0_parse_ind_reg_list(register_list_format,
3902                                 RLC_FormatDirectRegListLength,
3903                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3904                                 unique_indices,
3905                                 &indices_count,
3906                                 ARRAY_SIZE(unique_indices),
3907                                 indirect_start_offsets,
3908                                 &offset_count,
3909                                 ARRAY_SIZE(indirect_start_offsets));
3910
3911         /* save and restore list */
3912         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3913
3914         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3915         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3916                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3917
3918         /* indirect list */
3919         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3920         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3921                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3922
3923         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3924         list_size = list_size >> 1;
3925         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3926         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3927
3928         /* starting offsets starts */
3929         WREG32(mmRLC_GPM_SCRATCH_ADDR,
3930                 adev->gfx.rlc.starting_offsets_start);
3931         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
3932                 WREG32(mmRLC_GPM_SCRATCH_DATA,
3933                                 indirect_start_offsets[i]);
3934
3935         /* unique indices */
3936         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3937         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3938         for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
3939                 if (unique_indices[i] != 0) {
3940                         WREG32(temp + i, unique_indices[i] & 0x3FFFF);
3941                         WREG32(data + i, unique_indices[i] >> 20);
3942                 }
3943         }
3944         kfree(register_list_format);
3945
3946         return 0;
3947 }
3948
3949 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3950 {
3951         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
3952 }
3953
3954 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3955 {
3956         uint32_t data;
3957
3958         WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
3959
3960         data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
3961         data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
3962         data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
3963         data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
3964         WREG32(mmRLC_PG_DELAY, data);
3965
3966         WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
3967         WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
3968
3969 }
3970
3971 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
3972                                                 bool enable)
3973 {
3974         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
3975 }
3976
3977 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
3978                                                   bool enable)
3979 {
3980         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
3981 }
3982
3983 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
3984 {
3985         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
3986 }
3987
3988 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
3989 {
3990         if ((adev->asic_type == CHIP_CARRIZO) ||
3991             (adev->asic_type == CHIP_STONEY)) {
3992                 gfx_v8_0_init_csb(adev);
3993                 gfx_v8_0_init_save_restore_list(adev);
3994                 gfx_v8_0_enable_save_restore_machine(adev);
3995                 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
3996                 gfx_v8_0_init_power_gating(adev);
3997                 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
3998         } else if ((adev->asic_type == CHIP_POLARIS11) ||
3999                    (adev->asic_type == CHIP_POLARIS12) ||
4000                    (adev->asic_type == CHIP_VEGAM)) {
4001                 gfx_v8_0_init_csb(adev);
4002                 gfx_v8_0_init_save_restore_list(adev);
4003                 gfx_v8_0_enable_save_restore_machine(adev);
4004                 gfx_v8_0_init_power_gating(adev);
4005         }
4006
4007 }
4008
4009 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4010 {
4011         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4012
4013         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4014         gfx_v8_0_wait_for_rlc_serdes(adev);
4015 }
4016
4017 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4018 {
4019         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4020         udelay(50);
4021
4022         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4023         udelay(50);
4024 }
4025
4026 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4027 {
4028         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4029
4030         /* carrizo do enable cp interrupt after cp inited */
4031         if (!(adev->flags & AMD_IS_APU))
4032                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4033
4034         udelay(50);
4035 }
4036
4037 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4038 {
4039         if (amdgpu_sriov_vf(adev)) {
4040                 gfx_v8_0_init_csb(adev);
4041                 return 0;
4042         }
4043
4044         adev->gfx.rlc.funcs->stop(adev);
4045         adev->gfx.rlc.funcs->reset(adev);
4046         gfx_v8_0_init_pg(adev);
4047         adev->gfx.rlc.funcs->start(adev);
4048
4049         return 0;
4050 }
4051
4052 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4053 {
4054         int i;
4055         u32 tmp = RREG32(mmCP_ME_CNTL);
4056
4057         if (enable) {
4058                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4059                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4060                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4061         } else {
4062                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4063                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4064                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4065                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4066                         adev->gfx.gfx_ring[i].sched.ready = false;
4067         }
4068         WREG32(mmCP_ME_CNTL, tmp);
4069         udelay(50);
4070 }
4071
4072 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4073 {
4074         u32 count = 0;
4075         const struct cs_section_def *sect = NULL;
4076         const struct cs_extent_def *ext = NULL;
4077
4078         /* begin clear state */
4079         count += 2;
4080         /* context control state */
4081         count += 3;
4082
4083         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4084                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4085                         if (sect->id == SECT_CONTEXT)
4086                                 count += 2 + ext->reg_count;
4087                         else
4088                                 return 0;
4089                 }
4090         }
4091         /* pa_sc_raster_config/pa_sc_raster_config1 */
4092         count += 4;
4093         /* end clear state */
4094         count += 2;
4095         /* clear state */
4096         count += 2;
4097
4098         return count;
4099 }
4100
4101 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4102 {
4103         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4104         const struct cs_section_def *sect = NULL;
4105         const struct cs_extent_def *ext = NULL;
4106         int r, i;
4107
4108         /* init the CP */
4109         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4110         WREG32(mmCP_ENDIAN_SWAP, 0);
4111         WREG32(mmCP_DEVICE_ID, 1);
4112
4113         gfx_v8_0_cp_gfx_enable(adev, true);
4114
4115         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4116         if (r) {
4117                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4118                 return r;
4119         }
4120
4121         /* clear state buffer */
4122         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4123         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4124
4125         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4126         amdgpu_ring_write(ring, 0x80000000);
4127         amdgpu_ring_write(ring, 0x80000000);
4128
4129         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4130                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4131                         if (sect->id == SECT_CONTEXT) {
4132                                 amdgpu_ring_write(ring,
4133                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4134                                                ext->reg_count));
4135                                 amdgpu_ring_write(ring,
4136                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4137                                 for (i = 0; i < ext->reg_count; i++)
4138                                         amdgpu_ring_write(ring, ext->extent[i]);
4139                         }
4140                 }
4141         }
4142
4143         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4144         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4145         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4146         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4147
4148         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4149         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4150
4151         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4152         amdgpu_ring_write(ring, 0);
4153
4154         /* init the CE partitions */
4155         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4156         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4157         amdgpu_ring_write(ring, 0x8000);
4158         amdgpu_ring_write(ring, 0x8000);
4159
4160         amdgpu_ring_commit(ring);
4161
4162         return 0;
4163 }
4164 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4165 {
4166         u32 tmp;
4167         /* no gfx doorbells on iceland */
4168         if (adev->asic_type == CHIP_TOPAZ)
4169                 return;
4170
4171         tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4172
4173         if (ring->use_doorbell) {
4174                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4175                                 DOORBELL_OFFSET, ring->doorbell_index);
4176                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4177                                                 DOORBELL_HIT, 0);
4178                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4179                                             DOORBELL_EN, 1);
4180         } else {
4181                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4182         }
4183
4184         WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4185
4186         if (adev->flags & AMD_IS_APU)
4187                 return;
4188
4189         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4190                                         DOORBELL_RANGE_LOWER,
4191                                         adev->doorbell_index.gfx_ring0);
4192         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4193
4194         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4195                 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4196 }
4197
4198 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4199 {
4200         struct amdgpu_ring *ring;
4201         u32 tmp;
4202         u32 rb_bufsz;
4203         u64 rb_addr, rptr_addr, wptr_gpu_addr;
4204
4205         /* Set the write pointer delay */
4206         WREG32(mmCP_RB_WPTR_DELAY, 0);
4207
4208         /* set the RB to use vmid 0 */
4209         WREG32(mmCP_RB_VMID, 0);
4210
4211         /* Set ring buffer size */
4212         ring = &adev->gfx.gfx_ring[0];
4213         rb_bufsz = order_base_2(ring->ring_size / 8);
4214         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4215         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4216         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4217         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4218 #ifdef __BIG_ENDIAN
4219         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4220 #endif
4221         WREG32(mmCP_RB0_CNTL, tmp);
4222
4223         /* Initialize the ring buffer's read and write pointers */
4224         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4225         ring->wptr = 0;
4226         WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4227
4228         /* set the wb address wether it's enabled or not */
4229         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4230         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4231         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4232
4233         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4234         WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4235         WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4236         mdelay(1);
4237         WREG32(mmCP_RB0_CNTL, tmp);
4238
4239         rb_addr = ring->gpu_addr >> 8;
4240         WREG32(mmCP_RB0_BASE, rb_addr);
4241         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4242
4243         gfx_v8_0_set_cpg_door_bell(adev, ring);
4244         /* start the ring */
4245         amdgpu_ring_clear_ring(ring);
4246         gfx_v8_0_cp_gfx_start(adev);
4247         ring->sched.ready = true;
4248
4249         return 0;
4250 }
4251
4252 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4253 {
4254         int i;
4255
4256         if (enable) {
4257                 WREG32(mmCP_MEC_CNTL, 0);
4258         } else {
4259                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4260                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4261                         adev->gfx.compute_ring[i].sched.ready = false;
4262                 adev->gfx.kiq.ring.sched.ready = false;
4263         }
4264         udelay(50);
4265 }
4266
4267 /* KIQ functions */
4268 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4269 {
4270         uint32_t tmp;
4271         struct amdgpu_device *adev = ring->adev;
4272
4273         /* tell RLC which is KIQ queue */
4274         tmp = RREG32(mmRLC_CP_SCHEDULERS);
4275         tmp &= 0xffffff00;
4276         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4277         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4278         tmp |= 0x80;
4279         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4280 }
4281
4282 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4283 {
4284         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4285         uint64_t queue_mask = 0;
4286         int r, i;
4287
4288         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4289                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4290                         continue;
4291
4292                 /* This situation may be hit in the future if a new HW
4293                  * generation exposes more than 64 queues. If so, the
4294                  * definition of queue_mask needs updating */
4295                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4296                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4297                         break;
4298                 }
4299
4300                 queue_mask |= (1ull << i);
4301         }
4302
4303         r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
4304         if (r) {
4305                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4306                 return r;
4307         }
4308         /* set resources */
4309         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4310         amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4311         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4312         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4313         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4314         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4315         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4316         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4317         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4318                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4319                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4320                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4321
4322                 /* map queues */
4323                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4324                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4325                 amdgpu_ring_write(kiq_ring,
4326                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4327                 amdgpu_ring_write(kiq_ring,
4328                                   PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4329                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4330                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4331                                   PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4332                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4333                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4334                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4335                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4336         }
4337
4338         amdgpu_ring_commit(kiq_ring);
4339
4340         return 0;
4341 }
4342
4343 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4344 {
4345         int i, r = 0;
4346
4347         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4348                 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4349                 for (i = 0; i < adev->usec_timeout; i++) {
4350                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4351                                 break;
4352                         udelay(1);
4353                 }
4354                 if (i == adev->usec_timeout)
4355                         r = -ETIMEDOUT;
4356         }
4357         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4358         WREG32(mmCP_HQD_PQ_RPTR, 0);
4359         WREG32(mmCP_HQD_PQ_WPTR, 0);
4360
4361         return r;
4362 }
4363
4364 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4365 {
4366         struct amdgpu_device *adev = ring->adev;
4367         struct vi_mqd *mqd = ring->mqd_ptr;
4368         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4369         uint32_t tmp;
4370
4371         mqd->header = 0xC0310800;
4372         mqd->compute_pipelinestat_enable = 0x00000001;
4373         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4374         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4375         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4376         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4377         mqd->compute_misc_reserved = 0x00000003;
4378         mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4379                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4380         mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4381                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4382         eop_base_addr = ring->eop_gpu_addr >> 8;
4383         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4384         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4385
4386         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4387         tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4388         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4389                         (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4390
4391         mqd->cp_hqd_eop_control = tmp;
4392
4393         /* enable doorbell? */
4394         tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4395                             CP_HQD_PQ_DOORBELL_CONTROL,
4396                             DOORBELL_EN,
4397                             ring->use_doorbell ? 1 : 0);
4398
4399         mqd->cp_hqd_pq_doorbell_control = tmp;
4400
4401         /* set the pointer to the MQD */
4402         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4403         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4404
4405         /* set MQD vmid to 0 */
4406         tmp = RREG32(mmCP_MQD_CONTROL);
4407         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4408         mqd->cp_mqd_control = tmp;
4409
4410         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4411         hqd_gpu_addr = ring->gpu_addr >> 8;
4412         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4413         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4414
4415         /* set up the HQD, this is similar to CP_RB0_CNTL */
4416         tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4417         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4418                             (order_base_2(ring->ring_size / 4) - 1));
4419         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4420                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4421 #ifdef __BIG_ENDIAN
4422         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4423 #endif
4424         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4425         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4426         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4427         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4428         mqd->cp_hqd_pq_control = tmp;
4429
4430         /* set the wb address whether it's enabled or not */
4431         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4432         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4433         mqd->cp_hqd_pq_rptr_report_addr_hi =
4434                 upper_32_bits(wb_gpu_addr) & 0xffff;
4435
4436         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4437         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4438         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4439         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4440
4441         tmp = 0;
4442         /* enable the doorbell if requested */
4443         if (ring->use_doorbell) {
4444                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4445                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4446                                 DOORBELL_OFFSET, ring->doorbell_index);
4447
4448                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4449                                          DOORBELL_EN, 1);
4450                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4451                                          DOORBELL_SOURCE, 0);
4452                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4453                                          DOORBELL_HIT, 0);
4454         }
4455
4456         mqd->cp_hqd_pq_doorbell_control = tmp;
4457
4458         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4459         ring->wptr = 0;
4460         mqd->cp_hqd_pq_wptr = ring->wptr;
4461         mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4462
4463         /* set the vmid for the queue */
4464         mqd->cp_hqd_vmid = 0;
4465
4466         tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4467         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4468         mqd->cp_hqd_persistent_state = tmp;
4469
4470         /* set MTYPE */
4471         tmp = RREG32(mmCP_HQD_IB_CONTROL);
4472         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4473         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4474         mqd->cp_hqd_ib_control = tmp;
4475
4476         tmp = RREG32(mmCP_HQD_IQ_TIMER);
4477         tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4478         mqd->cp_hqd_iq_timer = tmp;
4479
4480         tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4481         tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4482         mqd->cp_hqd_ctx_save_control = tmp;
4483
4484         /* defaults */
4485         mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4486         mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4487         mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4488         mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4489         mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4490         mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4491         mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4492         mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4493         mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4494         mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4495         mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4496         mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4497         mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4498         mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4499         mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4500
4501         /* activate the queue */
4502         mqd->cp_hqd_active = 1;
4503
4504         return 0;
4505 }
4506
4507 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4508                         struct vi_mqd *mqd)
4509 {
4510         uint32_t mqd_reg;
4511         uint32_t *mqd_data;
4512
4513         /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4514         mqd_data = &mqd->cp_mqd_base_addr_lo;
4515
4516         /* disable wptr polling */
4517         WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4518
4519         /* program all HQD registers */
4520         for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4521                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4522
4523         /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4524          * This is safe since EOP RPTR==WPTR for any inactive HQD
4525          * on ASICs that do not support context-save.
4526          * EOP writes/reads can start anywhere in the ring.
4527          */
4528         if (adev->asic_type != CHIP_TONGA) {
4529                 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4530                 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4531                 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4532         }
4533
4534         for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4535                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4536
4537         /* activate the HQD */
4538         for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4539                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4540
4541         return 0;
4542 }
4543
4544 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4545 {
4546         struct amdgpu_device *adev = ring->adev;
4547         struct vi_mqd *mqd = ring->mqd_ptr;
4548         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4549
4550         gfx_v8_0_kiq_setting(ring);
4551
4552         if (adev->in_gpu_reset) { /* for GPU_RESET case */
4553                 /* reset MQD to a clean status */
4554                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4555                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4556
4557                 /* reset ring buffer */
4558                 ring->wptr = 0;
4559                 amdgpu_ring_clear_ring(ring);
4560                 mutex_lock(&adev->srbm_mutex);
4561                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4562                 gfx_v8_0_mqd_commit(adev, mqd);
4563                 vi_srbm_select(adev, 0, 0, 0, 0);
4564                 mutex_unlock(&adev->srbm_mutex);
4565         } else {
4566                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4567                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4568                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4569                 mutex_lock(&adev->srbm_mutex);
4570                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4571                 gfx_v8_0_mqd_init(ring);
4572                 gfx_v8_0_mqd_commit(adev, mqd);
4573                 vi_srbm_select(adev, 0, 0, 0, 0);
4574                 mutex_unlock(&adev->srbm_mutex);
4575
4576                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4577                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4578         }
4579
4580         return 0;
4581 }
4582
4583 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4584 {
4585         struct amdgpu_device *adev = ring->adev;
4586         struct vi_mqd *mqd = ring->mqd_ptr;
4587         int mqd_idx = ring - &adev->gfx.compute_ring[0];
4588
4589         if (!adev->in_gpu_reset && !adev->in_suspend) {
4590                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4591                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4592                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4593                 mutex_lock(&adev->srbm_mutex);
4594                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4595                 gfx_v8_0_mqd_init(ring);
4596                 vi_srbm_select(adev, 0, 0, 0, 0);
4597                 mutex_unlock(&adev->srbm_mutex);
4598
4599                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4600                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4601         } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
4602                 /* reset MQD to a clean status */
4603                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4604                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4605                 /* reset ring buffer */
4606                 ring->wptr = 0;
4607                 amdgpu_ring_clear_ring(ring);
4608         } else {
4609                 amdgpu_ring_clear_ring(ring);
4610         }
4611         return 0;
4612 }
4613
4614 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4615 {
4616         if (adev->asic_type > CHIP_TONGA) {
4617                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, adev->doorbell_index.kiq << 2);
4618                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, adev->doorbell_index.mec_ring7 << 2);
4619         }
4620         /* enable doorbells */
4621         WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4622 }
4623
4624 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4625 {
4626         struct amdgpu_ring *ring;
4627         int r;
4628
4629         ring = &adev->gfx.kiq.ring;
4630
4631         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4632         if (unlikely(r != 0))
4633                 return r;
4634
4635         r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4636         if (unlikely(r != 0))
4637                 return r;
4638
4639         gfx_v8_0_kiq_init_queue(ring);
4640         amdgpu_bo_kunmap(ring->mqd_obj);
4641         ring->mqd_ptr = NULL;
4642         amdgpu_bo_unreserve(ring->mqd_obj);
4643         ring->sched.ready = true;
4644         return 0;
4645 }
4646
4647 static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
4648 {
4649         struct amdgpu_ring *ring = NULL;
4650         int r = 0, i;
4651
4652         gfx_v8_0_cp_compute_enable(adev, true);
4653
4654         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4655                 ring = &adev->gfx.compute_ring[i];
4656
4657                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4658                 if (unlikely(r != 0))
4659                         goto done;
4660                 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4661                 if (!r) {
4662                         r = gfx_v8_0_kcq_init_queue(ring);
4663                         amdgpu_bo_kunmap(ring->mqd_obj);
4664                         ring->mqd_ptr = NULL;
4665                 }
4666                 amdgpu_bo_unreserve(ring->mqd_obj);
4667                 if (r)
4668                         goto done;
4669         }
4670
4671         gfx_v8_0_set_mec_doorbell_range(adev);
4672
4673         r = gfx_v8_0_kiq_kcq_enable(adev);
4674         if (r)
4675                 goto done;
4676
4677 done:
4678         return r;
4679 }
4680
4681 static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev)
4682 {
4683         int r, i;
4684         struct amdgpu_ring *ring;
4685
4686         /* collect all the ring_tests here, gfx, kiq, compute */
4687         ring = &adev->gfx.gfx_ring[0];
4688         r = amdgpu_ring_test_helper(ring);
4689         if (r)
4690                 return r;
4691
4692         ring = &adev->gfx.kiq.ring;
4693         r = amdgpu_ring_test_helper(ring);
4694         if (r)
4695                 return r;
4696
4697         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4698                 ring = &adev->gfx.compute_ring[i];
4699                 amdgpu_ring_test_helper(ring);
4700         }
4701
4702         return 0;
4703 }
4704
4705 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4706 {
4707         int r;
4708
4709         if (!(adev->flags & AMD_IS_APU))
4710                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4711
4712         r = gfx_v8_0_kiq_resume(adev);
4713         if (r)
4714                 return r;
4715
4716         r = gfx_v8_0_cp_gfx_resume(adev);
4717         if (r)
4718                 return r;
4719
4720         r = gfx_v8_0_kcq_resume(adev);
4721         if (r)
4722                 return r;
4723
4724         r = gfx_v8_0_cp_test_all_rings(adev);
4725         if (r)
4726                 return r;
4727
4728         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4729
4730         return 0;
4731 }
4732
4733 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4734 {
4735         gfx_v8_0_cp_gfx_enable(adev, enable);
4736         gfx_v8_0_cp_compute_enable(adev, enable);
4737 }
4738
4739 static int gfx_v8_0_hw_init(void *handle)
4740 {
4741         int r;
4742         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4743
4744         gfx_v8_0_init_golden_registers(adev);
4745         gfx_v8_0_constants_init(adev);
4746
4747         r = gfx_v8_0_csb_vram_pin(adev);
4748         if (r)
4749                 return r;
4750
4751         r = adev->gfx.rlc.funcs->resume(adev);
4752         if (r)
4753                 return r;
4754
4755         r = gfx_v8_0_cp_resume(adev);
4756
4757         return r;
4758 }
4759
4760 static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
4761 {
4762         int r, i;
4763         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4764
4765         r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
4766         if (r)
4767                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4768
4769         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4770                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4771
4772                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
4773                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
4774                                                 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
4775                                                 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
4776                                                 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
4777                                                 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
4778                 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
4779                 amdgpu_ring_write(kiq_ring, 0);
4780                 amdgpu_ring_write(kiq_ring, 0);
4781                 amdgpu_ring_write(kiq_ring, 0);
4782         }
4783         r = amdgpu_ring_test_helper(kiq_ring);
4784         if (r)
4785                 DRM_ERROR("KCQ disable failed\n");
4786
4787         return r;
4788 }
4789
4790 static bool gfx_v8_0_is_idle(void *handle)
4791 {
4792         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4793
4794         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
4795                 || RREG32(mmGRBM_STATUS2) != 0x8)
4796                 return false;
4797         else
4798                 return true;
4799 }
4800
4801 static bool gfx_v8_0_rlc_is_idle(void *handle)
4802 {
4803         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4804
4805         if (RREG32(mmGRBM_STATUS2) != 0x8)
4806                 return false;
4807         else
4808                 return true;
4809 }
4810
4811 static int gfx_v8_0_wait_for_rlc_idle(void *handle)
4812 {
4813         unsigned int i;
4814         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4815
4816         for (i = 0; i < adev->usec_timeout; i++) {
4817                 if (gfx_v8_0_rlc_is_idle(handle))
4818                         return 0;
4819
4820                 udelay(1);
4821         }
4822         return -ETIMEDOUT;
4823 }
4824
4825 static int gfx_v8_0_wait_for_idle(void *handle)
4826 {
4827         unsigned int i;
4828         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4829
4830         for (i = 0; i < adev->usec_timeout; i++) {
4831                 if (gfx_v8_0_is_idle(handle))
4832                         return 0;
4833
4834                 udelay(1);
4835         }
4836         return -ETIMEDOUT;
4837 }
4838
4839 static int gfx_v8_0_hw_fini(void *handle)
4840 {
4841         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4842
4843         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4844         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4845
4846         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4847
4848         amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
4849
4850         /* disable KCQ to avoid CPC touch memory not valid anymore */
4851         gfx_v8_0_kcq_disable(adev);
4852
4853         if (amdgpu_sriov_vf(adev)) {
4854                 pr_debug("For SRIOV client, shouldn't do anything.\n");
4855                 return 0;
4856         }
4857         amdgpu_gfx_rlc_enter_safe_mode(adev);
4858         if (!gfx_v8_0_wait_for_idle(adev))
4859                 gfx_v8_0_cp_enable(adev, false);
4860         else
4861                 pr_err("cp is busy, skip halt cp\n");
4862         if (!gfx_v8_0_wait_for_rlc_idle(adev))
4863                 adev->gfx.rlc.funcs->stop(adev);
4864         else
4865                 pr_err("rlc is busy, skip halt rlc\n");
4866         amdgpu_gfx_rlc_exit_safe_mode(adev);
4867
4868         gfx_v8_0_csb_vram_unpin(adev);
4869
4870         return 0;
4871 }
4872
4873 static int gfx_v8_0_suspend(void *handle)
4874 {
4875         return gfx_v8_0_hw_fini(handle);
4876 }
4877
4878 static int gfx_v8_0_resume(void *handle)
4879 {
4880         return gfx_v8_0_hw_init(handle);
4881 }
4882
4883 static bool gfx_v8_0_check_soft_reset(void *handle)
4884 {
4885         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4886         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4887         u32 tmp;
4888
4889         /* GRBM_STATUS */
4890         tmp = RREG32(mmGRBM_STATUS);
4891         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4892                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4893                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4894                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4895                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4896                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
4897                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4898                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4899                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4900                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4901                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4902                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4903                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4904         }
4905
4906         /* GRBM_STATUS2 */
4907         tmp = RREG32(mmGRBM_STATUS2);
4908         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4909                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4910                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4911
4912         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
4913             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
4914             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
4915                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4916                                                 SOFT_RESET_CPF, 1);
4917                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4918                                                 SOFT_RESET_CPC, 1);
4919                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4920                                                 SOFT_RESET_CPG, 1);
4921                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
4922                                                 SOFT_RESET_GRBM, 1);
4923         }
4924
4925         /* SRBM_STATUS */
4926         tmp = RREG32(mmSRBM_STATUS);
4927         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4928                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4929                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4930         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
4931                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4932                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
4933
4934         if (grbm_soft_reset || srbm_soft_reset) {
4935                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
4936                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
4937                 return true;
4938         } else {
4939                 adev->gfx.grbm_soft_reset = 0;
4940                 adev->gfx.srbm_soft_reset = 0;
4941                 return false;
4942         }
4943 }
4944
4945 static int gfx_v8_0_pre_soft_reset(void *handle)
4946 {
4947         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4948         u32 grbm_soft_reset = 0;
4949
4950         if ((!adev->gfx.grbm_soft_reset) &&
4951             (!adev->gfx.srbm_soft_reset))
4952                 return 0;
4953
4954         grbm_soft_reset = adev->gfx.grbm_soft_reset;
4955
4956         /* stop the rlc */
4957         adev->gfx.rlc.funcs->stop(adev);
4958
4959         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
4960             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
4961                 /* Disable GFX parsing/prefetching */
4962                 gfx_v8_0_cp_gfx_enable(adev, false);
4963
4964         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
4965             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
4966             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
4967             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
4968                 int i;
4969
4970                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4971                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4972
4973                         mutex_lock(&adev->srbm_mutex);
4974                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4975                         gfx_v8_0_deactivate_hqd(adev, 2);
4976                         vi_srbm_select(adev, 0, 0, 0, 0);
4977                         mutex_unlock(&adev->srbm_mutex);
4978                 }
4979                 /* Disable MEC parsing/prefetching */
4980                 gfx_v8_0_cp_compute_enable(adev, false);
4981         }
4982
4983        return 0;
4984 }
4985
4986 static int gfx_v8_0_soft_reset(void *handle)
4987 {
4988         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4989         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4990         u32 tmp;
4991
4992         if ((!adev->gfx.grbm_soft_reset) &&
4993             (!adev->gfx.srbm_soft_reset))
4994                 return 0;
4995
4996         grbm_soft_reset = adev->gfx.grbm_soft_reset;
4997         srbm_soft_reset = adev->gfx.srbm_soft_reset;
4998
4999         if (grbm_soft_reset || srbm_soft_reset) {
5000                 tmp = RREG32(mmGMCON_DEBUG);
5001                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5002                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5003                 WREG32(mmGMCON_DEBUG, tmp);
5004                 udelay(50);
5005         }
5006
5007         if (grbm_soft_reset) {
5008                 tmp = RREG32(mmGRBM_SOFT_RESET);
5009                 tmp |= grbm_soft_reset;
5010                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5011                 WREG32(mmGRBM_SOFT_RESET, tmp);
5012                 tmp = RREG32(mmGRBM_SOFT_RESET);
5013
5014                 udelay(50);
5015
5016                 tmp &= ~grbm_soft_reset;
5017                 WREG32(mmGRBM_SOFT_RESET, tmp);
5018                 tmp = RREG32(mmGRBM_SOFT_RESET);
5019         }
5020
5021         if (srbm_soft_reset) {
5022                 tmp = RREG32(mmSRBM_SOFT_RESET);
5023                 tmp |= srbm_soft_reset;
5024                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5025                 WREG32(mmSRBM_SOFT_RESET, tmp);
5026                 tmp = RREG32(mmSRBM_SOFT_RESET);
5027
5028                 udelay(50);
5029
5030                 tmp &= ~srbm_soft_reset;
5031                 WREG32(mmSRBM_SOFT_RESET, tmp);
5032                 tmp = RREG32(mmSRBM_SOFT_RESET);
5033         }
5034
5035         if (grbm_soft_reset || srbm_soft_reset) {
5036                 tmp = RREG32(mmGMCON_DEBUG);
5037                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5038                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5039                 WREG32(mmGMCON_DEBUG, tmp);
5040         }
5041
5042         /* Wait a little for things to settle down */
5043         udelay(50);
5044
5045         return 0;
5046 }
5047
5048 static int gfx_v8_0_post_soft_reset(void *handle)
5049 {
5050         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5051         u32 grbm_soft_reset = 0;
5052
5053         if ((!adev->gfx.grbm_soft_reset) &&
5054             (!adev->gfx.srbm_soft_reset))
5055                 return 0;
5056
5057         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5058
5059         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5060             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5061             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5062             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5063                 int i;
5064
5065                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5066                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5067
5068                         mutex_lock(&adev->srbm_mutex);
5069                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5070                         gfx_v8_0_deactivate_hqd(adev, 2);
5071                         vi_srbm_select(adev, 0, 0, 0, 0);
5072                         mutex_unlock(&adev->srbm_mutex);
5073                 }
5074                 gfx_v8_0_kiq_resume(adev);
5075                 gfx_v8_0_kcq_resume(adev);
5076         }
5077
5078         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5079             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5080                 gfx_v8_0_cp_gfx_resume(adev);
5081
5082         gfx_v8_0_cp_test_all_rings(adev);
5083
5084         adev->gfx.rlc.funcs->start(adev);
5085
5086         return 0;
5087 }
5088
5089 /**
5090  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5091  *
5092  * @adev: amdgpu_device pointer
5093  *
5094  * Fetches a GPU clock counter snapshot.
5095  * Returns the 64 bit clock counter snapshot.
5096  */
5097 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5098 {
5099         uint64_t clock;
5100
5101         mutex_lock(&adev->gfx.gpu_clock_mutex);
5102         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5103         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5104                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5105         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5106         return clock;
5107 }
5108
5109 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5110                                           uint32_t vmid,
5111                                           uint32_t gds_base, uint32_t gds_size,
5112                                           uint32_t gws_base, uint32_t gws_size,
5113                                           uint32_t oa_base, uint32_t oa_size)
5114 {
5115         /* GDS Base */
5116         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5117         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5118                                 WRITE_DATA_DST_SEL(0)));
5119         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5120         amdgpu_ring_write(ring, 0);
5121         amdgpu_ring_write(ring, gds_base);
5122
5123         /* GDS Size */
5124         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5125         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5126                                 WRITE_DATA_DST_SEL(0)));
5127         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5128         amdgpu_ring_write(ring, 0);
5129         amdgpu_ring_write(ring, gds_size);
5130
5131         /* GWS */
5132         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5133         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5134                                 WRITE_DATA_DST_SEL(0)));
5135         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5136         amdgpu_ring_write(ring, 0);
5137         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5138
5139         /* OA */
5140         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5141         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5142                                 WRITE_DATA_DST_SEL(0)));
5143         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5144         amdgpu_ring_write(ring, 0);
5145         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5146 }
5147
5148 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5149 {
5150         WREG32(mmSQ_IND_INDEX,
5151                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5152                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5153                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5154                 (SQ_IND_INDEX__FORCE_READ_MASK));
5155         return RREG32(mmSQ_IND_DATA);
5156 }
5157
5158 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5159                            uint32_t wave, uint32_t thread,
5160                            uint32_t regno, uint32_t num, uint32_t *out)
5161 {
5162         WREG32(mmSQ_IND_INDEX,
5163                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5164                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5165                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5166                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5167                 (SQ_IND_INDEX__FORCE_READ_MASK) |
5168                 (SQ_IND_INDEX__AUTO_INCR_MASK));
5169         while (num--)
5170                 *(out++) = RREG32(mmSQ_IND_DATA);
5171 }
5172
5173 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5174 {
5175         /* type 0 wave data */
5176         dst[(*no_fields)++] = 0;
5177         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5178         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5179         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5180         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5181         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5182         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5183         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5184         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5185         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5186         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5187         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5188         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5189         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5190         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5191         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5192         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5193         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5194         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5195 }
5196
5197 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5198                                      uint32_t wave, uint32_t start,
5199                                      uint32_t size, uint32_t *dst)
5200 {
5201         wave_read_regs(
5202                 adev, simd, wave, 0,
5203                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5204 }
5205
5206
5207 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5208         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5209         .select_se_sh = &gfx_v8_0_select_se_sh,
5210         .read_wave_data = &gfx_v8_0_read_wave_data,
5211         .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5212         .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5213 };
5214
5215 static int gfx_v8_0_early_init(void *handle)
5216 {
5217         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5218
5219         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5220         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5221         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5222         gfx_v8_0_set_ring_funcs(adev);
5223         gfx_v8_0_set_irq_funcs(adev);
5224         gfx_v8_0_set_gds_init(adev);
5225         gfx_v8_0_set_rlc_funcs(adev);
5226
5227         return 0;
5228 }
5229
5230 static int gfx_v8_0_late_init(void *handle)
5231 {
5232         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5233         int r;
5234
5235         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5236         if (r)
5237                 return r;
5238
5239         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5240         if (r)
5241                 return r;
5242
5243         /* requires IBs so do in late init after IB pool is initialized */
5244         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5245         if (r)
5246                 return r;
5247
5248         r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5249         if (r) {
5250                 DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5251                 return r;
5252         }
5253
5254         r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5255         if (r) {
5256                 DRM_ERROR(
5257                         "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5258                         r);
5259                 return r;
5260         }
5261
5262         return 0;
5263 }
5264
5265 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5266                                                        bool enable)
5267 {
5268         if (((adev->asic_type == CHIP_POLARIS11) ||
5269             (adev->asic_type == CHIP_POLARIS12) ||
5270             (adev->asic_type == CHIP_VEGAM)) &&
5271             adev->powerplay.pp_funcs->set_powergating_by_smu)
5272                 /* Send msg to SMU via Powerplay */
5273                 amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
5274
5275         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5276 }
5277
5278 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5279                                                         bool enable)
5280 {
5281         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5282 }
5283
5284 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5285                 bool enable)
5286 {
5287         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5288 }
5289
5290 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5291                                           bool enable)
5292 {
5293         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5294 }
5295
5296 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5297                                                 bool enable)
5298 {
5299         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5300
5301         /* Read any GFX register to wake up GFX. */
5302         if (!enable)
5303                 RREG32(mmDB_RENDER_CONTROL);
5304 }
5305
5306 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5307                                           bool enable)
5308 {
5309         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5310                 cz_enable_gfx_cg_power_gating(adev, true);
5311                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5312                         cz_enable_gfx_pipeline_power_gating(adev, true);
5313         } else {
5314                 cz_enable_gfx_cg_power_gating(adev, false);
5315                 cz_enable_gfx_pipeline_power_gating(adev, false);
5316         }
5317 }
5318
5319 static int gfx_v8_0_set_powergating_state(void *handle,
5320                                           enum amd_powergating_state state)
5321 {
5322         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5323         bool enable = (state == AMD_PG_STATE_GATE);
5324
5325         if (amdgpu_sriov_vf(adev))
5326                 return 0;
5327
5328         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5329                                 AMD_PG_SUPPORT_RLC_SMU_HS |
5330                                 AMD_PG_SUPPORT_CP |
5331                                 AMD_PG_SUPPORT_GFX_DMG))
5332                 amdgpu_gfx_rlc_enter_safe_mode(adev);
5333         switch (adev->asic_type) {
5334         case CHIP_CARRIZO:
5335         case CHIP_STONEY:
5336
5337                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5338                         cz_enable_sck_slow_down_on_power_up(adev, true);
5339                         cz_enable_sck_slow_down_on_power_down(adev, true);
5340                 } else {
5341                         cz_enable_sck_slow_down_on_power_up(adev, false);
5342                         cz_enable_sck_slow_down_on_power_down(adev, false);
5343                 }
5344                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5345                         cz_enable_cp_power_gating(adev, true);
5346                 else
5347                         cz_enable_cp_power_gating(adev, false);
5348
5349                 cz_update_gfx_cg_power_gating(adev, enable);
5350
5351                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5352                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5353                 else
5354                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5355
5356                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5357                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5358                 else
5359                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5360                 break;
5361         case CHIP_POLARIS11:
5362         case CHIP_POLARIS12:
5363         case CHIP_VEGAM:
5364                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5365                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5366                 else
5367                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5368
5369                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5370                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5371                 else
5372                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5373
5374                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5375                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5376                 else
5377                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5378                 break;
5379         default:
5380                 break;
5381         }
5382         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5383                                 AMD_PG_SUPPORT_RLC_SMU_HS |
5384                                 AMD_PG_SUPPORT_CP |
5385                                 AMD_PG_SUPPORT_GFX_DMG))
5386                 amdgpu_gfx_rlc_exit_safe_mode(adev);
5387         return 0;
5388 }
5389
5390 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5391 {
5392         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5393         int data;
5394
5395         if (amdgpu_sriov_vf(adev))
5396                 *flags = 0;
5397
5398         /* AMD_CG_SUPPORT_GFX_MGCG */
5399         data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5400         if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5401                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5402
5403         /* AMD_CG_SUPPORT_GFX_CGLG */
5404         data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5405         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5406                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5407
5408         /* AMD_CG_SUPPORT_GFX_CGLS */
5409         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5410                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5411
5412         /* AMD_CG_SUPPORT_GFX_CGTS */
5413         data = RREG32(mmCGTS_SM_CTRL_REG);
5414         if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5415                 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5416
5417         /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5418         if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5419                 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5420
5421         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5422         data = RREG32(mmRLC_MEM_SLP_CNTL);
5423         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5424                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5425
5426         /* AMD_CG_SUPPORT_GFX_CP_LS */
5427         data = RREG32(mmCP_MEM_SLP_CNTL);
5428         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5429                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5430 }
5431
5432 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5433                                      uint32_t reg_addr, uint32_t cmd)
5434 {
5435         uint32_t data;
5436
5437         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5438
5439         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5440         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5441
5442         data = RREG32(mmRLC_SERDES_WR_CTRL);
5443         if (adev->asic_type == CHIP_STONEY)
5444                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5445                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5446                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5447                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5448                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5449                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5450                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5451                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5452                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5453         else
5454                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5455                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5456                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5457                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5458                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5459                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5460                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5461                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5462                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5463                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5464                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5465         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5466                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5467                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5468                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5469
5470         WREG32(mmRLC_SERDES_WR_CTRL, data);
5471 }
5472
5473 #define MSG_ENTER_RLC_SAFE_MODE     1
5474 #define MSG_EXIT_RLC_SAFE_MODE      0
5475 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5476 #define RLC_GPR_REG2__REQ__SHIFT 0
5477 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5478 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5479
5480 static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev)
5481 {
5482         uint32_t rlc_setting;
5483
5484         rlc_setting = RREG32(mmRLC_CNTL);
5485         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
5486                 return false;
5487
5488         return true;
5489 }
5490
5491 static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev)
5492 {
5493         uint32_t data;
5494         unsigned i;
5495         data = RREG32(mmRLC_CNTL);
5496         data |= RLC_SAFE_MODE__CMD_MASK;
5497         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5498         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5499         WREG32(mmRLC_SAFE_MODE, data);
5500
5501         /* wait for RLC_SAFE_MODE */
5502         for (i = 0; i < adev->usec_timeout; i++) {
5503                 if ((RREG32(mmRLC_GPM_STAT) &
5504                      (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5505                       RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5506                     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5507                      RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5508                         break;
5509                 udelay(1);
5510         }
5511         for (i = 0; i < adev->usec_timeout; i++) {
5512                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5513                         break;
5514                 udelay(1);
5515         }
5516 }
5517
5518 static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev)
5519 {
5520         uint32_t data;
5521         unsigned i;
5522
5523         data = RREG32(mmRLC_CNTL);
5524         data |= RLC_SAFE_MODE__CMD_MASK;
5525         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5526         WREG32(mmRLC_SAFE_MODE, data);
5527
5528         for (i = 0; i < adev->usec_timeout; i++) {
5529                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5530                         break;
5531                 udelay(1);
5532         }
5533 }
5534
5535 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5536         .is_rlc_enabled = gfx_v8_0_is_rlc_enabled,
5537         .set_safe_mode = gfx_v8_0_set_safe_mode,
5538         .unset_safe_mode = gfx_v8_0_unset_safe_mode,
5539         .init = gfx_v8_0_rlc_init,
5540         .get_csb_size = gfx_v8_0_get_csb_size,
5541         .get_csb_buffer = gfx_v8_0_get_csb_buffer,
5542         .get_cp_table_num = gfx_v8_0_cp_jump_table_num,
5543         .resume = gfx_v8_0_rlc_resume,
5544         .stop = gfx_v8_0_rlc_stop,
5545         .reset = gfx_v8_0_rlc_reset,
5546         .start = gfx_v8_0_rlc_start
5547 };
5548
5549 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5550                                                       bool enable)
5551 {
5552         uint32_t temp, data;
5553
5554         amdgpu_gfx_rlc_enter_safe_mode(adev);
5555
5556         /* It is disabled by HW by default */
5557         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5558                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5559                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5560                                 /* 1 - RLC memory Light sleep */
5561                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5562
5563                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5564                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5565                 }
5566
5567                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5568                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5569                 if (adev->flags & AMD_IS_APU)
5570                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5571                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5572                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5573                 else
5574                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5575                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5576                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5577                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5578
5579                 if (temp != data)
5580                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5581
5582                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5583                 gfx_v8_0_wait_for_rlc_serdes(adev);
5584
5585                 /* 5 - clear mgcg override */
5586                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5587
5588                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5589                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5590                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5591                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5592                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5593                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5594                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5595                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5596                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5597                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5598                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5599                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5600                         if (temp != data)
5601                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5602                 }
5603                 udelay(50);
5604
5605                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5606                 gfx_v8_0_wait_for_rlc_serdes(adev);
5607         } else {
5608                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5609                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5610                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5611                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5612                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5613                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5614                 if (temp != data)
5615                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5616
5617                 /* 2 - disable MGLS in RLC */
5618                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5619                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5620                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5621                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5622                 }
5623
5624                 /* 3 - disable MGLS in CP */
5625                 data = RREG32(mmCP_MEM_SLP_CNTL);
5626                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5627                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5628                         WREG32(mmCP_MEM_SLP_CNTL, data);
5629                 }
5630
5631                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5632                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5633                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5634                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5635                 if (temp != data)
5636                         WREG32(mmCGTS_SM_CTRL_REG, data);
5637
5638                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5639                 gfx_v8_0_wait_for_rlc_serdes(adev);
5640
5641                 /* 6 - set mgcg override */
5642                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5643
5644                 udelay(50);
5645
5646                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5647                 gfx_v8_0_wait_for_rlc_serdes(adev);
5648         }
5649
5650         amdgpu_gfx_rlc_exit_safe_mode(adev);
5651 }
5652
5653 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5654                                                       bool enable)
5655 {
5656         uint32_t temp, temp1, data, data1;
5657
5658         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5659
5660         amdgpu_gfx_rlc_enter_safe_mode(adev);
5661
5662         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5663                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5664                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5665                 if (temp1 != data1)
5666                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5667
5668                 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5669                 gfx_v8_0_wait_for_rlc_serdes(adev);
5670
5671                 /* 2 - clear cgcg override */
5672                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5673
5674                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5675                 gfx_v8_0_wait_for_rlc_serdes(adev);
5676
5677                 /* 3 - write cmd to set CGLS */
5678                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5679
5680                 /* 4 - enable cgcg */
5681                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5682
5683                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5684                         /* enable cgls*/
5685                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5686
5687                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5688                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5689
5690                         if (temp1 != data1)
5691                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5692                 } else {
5693                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5694                 }
5695
5696                 if (temp != data)
5697                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5698
5699                 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5700                  * Cmp_busy/GFX_Idle interrupts
5701                  */
5702                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5703         } else {
5704                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5705                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5706
5707                 /* TEST CGCG */
5708                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5709                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5710                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5711                 if (temp1 != data1)
5712                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5713
5714                 /* read gfx register to wake up cgcg */
5715                 RREG32(mmCB_CGTT_SCLK_CTRL);
5716                 RREG32(mmCB_CGTT_SCLK_CTRL);
5717                 RREG32(mmCB_CGTT_SCLK_CTRL);
5718                 RREG32(mmCB_CGTT_SCLK_CTRL);
5719
5720                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5721                 gfx_v8_0_wait_for_rlc_serdes(adev);
5722
5723                 /* write cmd to Set CGCG Overrride */
5724                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5725
5726                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5727                 gfx_v8_0_wait_for_rlc_serdes(adev);
5728
5729                 /* write cmd to Clear CGLS */
5730                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5731
5732                 /* disable cgcg, cgls should be disabled too. */
5733                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5734                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5735                 if (temp != data)
5736                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5737                 /* enable interrupts again for PG */
5738                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5739         }
5740
5741         gfx_v8_0_wait_for_rlc_serdes(adev);
5742
5743         amdgpu_gfx_rlc_exit_safe_mode(adev);
5744 }
5745 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5746                                             bool enable)
5747 {
5748         if (enable) {
5749                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5750                  * ===  MGCG + MGLS + TS(CG/LS) ===
5751                  */
5752                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5753                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5754         } else {
5755                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5756                  * ===  CGCG + CGLS ===
5757                  */
5758                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5759                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5760         }
5761         return 0;
5762 }
5763
5764 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5765                                           enum amd_clockgating_state state)
5766 {
5767         uint32_t msg_id, pp_state = 0;
5768         uint32_t pp_support_state = 0;
5769
5770         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5771                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5772                         pp_support_state = PP_STATE_SUPPORT_LS;
5773                         pp_state = PP_STATE_LS;
5774                 }
5775                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5776                         pp_support_state |= PP_STATE_SUPPORT_CG;
5777                         pp_state |= PP_STATE_CG;
5778                 }
5779                 if (state == AMD_CG_STATE_UNGATE)
5780                         pp_state = 0;
5781
5782                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5783                                 PP_BLOCK_GFX_CG,
5784                                 pp_support_state,
5785                                 pp_state);
5786                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5787                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5788         }
5789
5790         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5791                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5792                         pp_support_state = PP_STATE_SUPPORT_LS;
5793                         pp_state = PP_STATE_LS;
5794                 }
5795
5796                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5797                         pp_support_state |= PP_STATE_SUPPORT_CG;
5798                         pp_state |= PP_STATE_CG;
5799                 }
5800
5801                 if (state == AMD_CG_STATE_UNGATE)
5802                         pp_state = 0;
5803
5804                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5805                                 PP_BLOCK_GFX_MG,
5806                                 pp_support_state,
5807                                 pp_state);
5808                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5809                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5810         }
5811
5812         return 0;
5813 }
5814
5815 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5816                                           enum amd_clockgating_state state)
5817 {
5818
5819         uint32_t msg_id, pp_state = 0;
5820         uint32_t pp_support_state = 0;
5821
5822         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5823                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5824                         pp_support_state = PP_STATE_SUPPORT_LS;
5825                         pp_state = PP_STATE_LS;
5826                 }
5827                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5828                         pp_support_state |= PP_STATE_SUPPORT_CG;
5829                         pp_state |= PP_STATE_CG;
5830                 }
5831                 if (state == AMD_CG_STATE_UNGATE)
5832                         pp_state = 0;
5833
5834                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5835                                 PP_BLOCK_GFX_CG,
5836                                 pp_support_state,
5837                                 pp_state);
5838                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5839                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5840         }
5841
5842         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5843                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5844                         pp_support_state = PP_STATE_SUPPORT_LS;
5845                         pp_state = PP_STATE_LS;
5846                 }
5847                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5848                         pp_support_state |= PP_STATE_SUPPORT_CG;
5849                         pp_state |= PP_STATE_CG;
5850                 }
5851                 if (state == AMD_CG_STATE_UNGATE)
5852                         pp_state = 0;
5853
5854                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5855                                 PP_BLOCK_GFX_3D,
5856                                 pp_support_state,
5857                                 pp_state);
5858                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5859                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5860         }
5861
5862         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5863                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5864                         pp_support_state = PP_STATE_SUPPORT_LS;
5865                         pp_state = PP_STATE_LS;
5866                 }
5867
5868                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5869                         pp_support_state |= PP_STATE_SUPPORT_CG;
5870                         pp_state |= PP_STATE_CG;
5871                 }
5872
5873                 if (state == AMD_CG_STATE_UNGATE)
5874                         pp_state = 0;
5875
5876                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5877                                 PP_BLOCK_GFX_MG,
5878                                 pp_support_state,
5879                                 pp_state);
5880                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5881                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5882         }
5883
5884         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5885                 pp_support_state = PP_STATE_SUPPORT_LS;
5886
5887                 if (state == AMD_CG_STATE_UNGATE)
5888                         pp_state = 0;
5889                 else
5890                         pp_state = PP_STATE_LS;
5891
5892                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5893                                 PP_BLOCK_GFX_RLC,
5894                                 pp_support_state,
5895                                 pp_state);
5896                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5897                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5898         }
5899
5900         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5901                 pp_support_state = PP_STATE_SUPPORT_LS;
5902
5903                 if (state == AMD_CG_STATE_UNGATE)
5904                         pp_state = 0;
5905                 else
5906                         pp_state = PP_STATE_LS;
5907                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5908                         PP_BLOCK_GFX_CP,
5909                         pp_support_state,
5910                         pp_state);
5911                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5912                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5913         }
5914
5915         return 0;
5916 }
5917
5918 static int gfx_v8_0_set_clockgating_state(void *handle,
5919                                           enum amd_clockgating_state state)
5920 {
5921         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5922
5923         if (amdgpu_sriov_vf(adev))
5924                 return 0;
5925
5926         switch (adev->asic_type) {
5927         case CHIP_FIJI:
5928         case CHIP_CARRIZO:
5929         case CHIP_STONEY:
5930                 gfx_v8_0_update_gfx_clock_gating(adev,
5931                                                  state == AMD_CG_STATE_GATE);
5932                 break;
5933         case CHIP_TONGA:
5934                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
5935                 break;
5936         case CHIP_POLARIS10:
5937         case CHIP_POLARIS11:
5938         case CHIP_POLARIS12:
5939         case CHIP_VEGAM:
5940                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
5941                 break;
5942         default:
5943                 break;
5944         }
5945         return 0;
5946 }
5947
5948 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
5949 {
5950         return ring->adev->wb.wb[ring->rptr_offs];
5951 }
5952
5953 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5954 {
5955         struct amdgpu_device *adev = ring->adev;
5956
5957         if (ring->use_doorbell)
5958                 /* XXX check if swapping is necessary on BE */
5959                 return ring->adev->wb.wb[ring->wptr_offs];
5960         else
5961                 return RREG32(mmCP_RB0_WPTR);
5962 }
5963
5964 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5965 {
5966         struct amdgpu_device *adev = ring->adev;
5967
5968         if (ring->use_doorbell) {
5969                 /* XXX check if swapping is necessary on BE */
5970                 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
5971                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
5972         } else {
5973                 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5974                 (void)RREG32(mmCP_RB0_WPTR);
5975         }
5976 }
5977
5978 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5979 {
5980         u32 ref_and_mask, reg_mem_engine;
5981
5982         if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
5983             (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
5984                 switch (ring->me) {
5985                 case 1:
5986                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
5987                         break;
5988                 case 2:
5989                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
5990                         break;
5991                 default:
5992                         return;
5993                 }
5994                 reg_mem_engine = 0;
5995         } else {
5996                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
5997                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
5998         }
5999
6000         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6001         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6002                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6003                                  reg_mem_engine));
6004         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6005         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6006         amdgpu_ring_write(ring, ref_and_mask);
6007         amdgpu_ring_write(ring, ref_and_mask);
6008         amdgpu_ring_write(ring, 0x20); /* poll interval */
6009 }
6010
6011 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6012 {
6013         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6014         amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6015                 EVENT_INDEX(4));
6016
6017         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6018         amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6019                 EVENT_INDEX(0));
6020 }
6021
6022 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6023                                         struct amdgpu_job *job,
6024                                         struct amdgpu_ib *ib,
6025                                         uint32_t flags)
6026 {
6027         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6028         u32 header, control = 0;
6029
6030         if (ib->flags & AMDGPU_IB_FLAG_CE)
6031                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6032         else
6033                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6034
6035         control |= ib->length_dw | (vmid << 24);
6036
6037         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6038                 control |= INDIRECT_BUFFER_PRE_ENB(1);
6039
6040                 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6041                         gfx_v8_0_ring_emit_de_meta(ring);
6042         }
6043
6044         amdgpu_ring_write(ring, header);
6045         amdgpu_ring_write(ring,
6046 #ifdef __BIG_ENDIAN
6047                           (2 << 0) |
6048 #endif
6049                           (ib->gpu_addr & 0xFFFFFFFC));
6050         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6051         amdgpu_ring_write(ring, control);
6052 }
6053
6054 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6055                                           struct amdgpu_job *job,
6056                                           struct amdgpu_ib *ib,
6057                                           uint32_t flags)
6058 {
6059         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6060         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6061
6062         /* Currently, there is a high possibility to get wave ID mismatch
6063          * between ME and GDS, leading to a hw deadlock, because ME generates
6064          * different wave IDs than the GDS expects. This situation happens
6065          * randomly when at least 5 compute pipes use GDS ordered append.
6066          * The wave IDs generated by ME are also wrong after suspend/resume.
6067          * Those are probably bugs somewhere else in the kernel driver.
6068          *
6069          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
6070          * GDS to 0 for this ring (me/pipe).
6071          */
6072         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
6073                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
6074                 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
6075                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
6076         }
6077
6078         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6079         amdgpu_ring_write(ring,
6080 #ifdef __BIG_ENDIAN
6081                                 (2 << 0) |
6082 #endif
6083                                 (ib->gpu_addr & 0xFFFFFFFC));
6084         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6085         amdgpu_ring_write(ring, control);
6086 }
6087
6088 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6089                                          u64 seq, unsigned flags)
6090 {
6091         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6092         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6093
6094         /* EVENT_WRITE_EOP - flush caches, send int */
6095         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6096         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6097                                  EOP_TC_ACTION_EN |
6098                                  EOP_TC_WB_ACTION_EN |
6099                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6100                                  EVENT_INDEX(5)));
6101         amdgpu_ring_write(ring, addr & 0xfffffffc);
6102         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6103                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6104         amdgpu_ring_write(ring, lower_32_bits(seq));
6105         amdgpu_ring_write(ring, upper_32_bits(seq));
6106
6107 }
6108
6109 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6110 {
6111         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6112         uint32_t seq = ring->fence_drv.sync_seq;
6113         uint64_t addr = ring->fence_drv.gpu_addr;
6114
6115         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6116         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6117                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6118                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6119         amdgpu_ring_write(ring, addr & 0xfffffffc);
6120         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6121         amdgpu_ring_write(ring, seq);
6122         amdgpu_ring_write(ring, 0xffffffff);
6123         amdgpu_ring_write(ring, 4); /* poll interval */
6124 }
6125
6126 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6127                                         unsigned vmid, uint64_t pd_addr)
6128 {
6129         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6130
6131         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6132
6133         /* wait for the invalidate to complete */
6134         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6135         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6136                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6137                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6138         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6139         amdgpu_ring_write(ring, 0);
6140         amdgpu_ring_write(ring, 0); /* ref */
6141         amdgpu_ring_write(ring, 0); /* mask */
6142         amdgpu_ring_write(ring, 0x20); /* poll interval */
6143
6144         /* compute doesn't have PFP */
6145         if (usepfp) {
6146                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6147                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6148                 amdgpu_ring_write(ring, 0x0);
6149         }
6150 }
6151
6152 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6153 {
6154         return ring->adev->wb.wb[ring->wptr_offs];
6155 }
6156
6157 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6158 {
6159         struct amdgpu_device *adev = ring->adev;
6160
6161         /* XXX check if swapping is necessary on BE */
6162         adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6163         WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6164 }
6165
6166 static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
6167                                            bool acquire)
6168 {
6169         struct amdgpu_device *adev = ring->adev;
6170         int pipe_num, tmp, reg;
6171         int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
6172
6173         pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
6174
6175         /* first me only has 2 entries, GFX and HP3D */
6176         if (ring->me > 0)
6177                 pipe_num -= 2;
6178
6179         reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num;
6180         tmp = RREG32(reg);
6181         tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
6182         WREG32(reg, tmp);
6183 }
6184
6185 static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev,
6186                                             struct amdgpu_ring *ring,
6187                                             bool acquire)
6188 {
6189         int i, pipe;
6190         bool reserve;
6191         struct amdgpu_ring *iring;
6192
6193         mutex_lock(&adev->gfx.pipe_reserve_mutex);
6194         pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
6195         if (acquire)
6196                 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6197         else
6198                 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6199
6200         if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
6201                 /* Clear all reservations - everyone reacquires all resources */
6202                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
6203                         gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
6204                                                        true);
6205
6206                 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
6207                         gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
6208                                                        true);
6209         } else {
6210                 /* Lower all pipes without a current reservation */
6211                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
6212                         iring = &adev->gfx.gfx_ring[i];
6213                         pipe = amdgpu_gfx_mec_queue_to_bit(adev,
6214                                                            iring->me,
6215                                                            iring->pipe,
6216                                                            0);
6217                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6218                         gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6219                 }
6220
6221                 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
6222                         iring = &adev->gfx.compute_ring[i];
6223                         pipe = amdgpu_gfx_mec_queue_to_bit(adev,
6224                                                            iring->me,
6225                                                            iring->pipe,
6226                                                            0);
6227                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6228                         gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6229                 }
6230         }
6231
6232         mutex_unlock(&adev->gfx.pipe_reserve_mutex);
6233 }
6234
6235 static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev,
6236                                       struct amdgpu_ring *ring,
6237                                       bool acquire)
6238 {
6239         uint32_t pipe_priority = acquire ? 0x2 : 0x0;
6240         uint32_t queue_priority = acquire ? 0xf : 0x0;
6241
6242         mutex_lock(&adev->srbm_mutex);
6243         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
6244
6245         WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority);
6246         WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority);
6247
6248         vi_srbm_select(adev, 0, 0, 0, 0);
6249         mutex_unlock(&adev->srbm_mutex);
6250 }
6251 static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
6252                                                enum drm_sched_priority priority)
6253 {
6254         struct amdgpu_device *adev = ring->adev;
6255         bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
6256
6257         if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
6258                 return;
6259
6260         gfx_v8_0_hqd_set_priority(adev, ring, acquire);
6261         gfx_v8_0_pipe_reserve_resources(adev, ring, acquire);
6262 }
6263
6264 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6265                                              u64 addr, u64 seq,
6266                                              unsigned flags)
6267 {
6268         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6269         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6270
6271         /* RELEASE_MEM - flush caches, send int */
6272         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6273         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6274                                  EOP_TC_ACTION_EN |
6275                                  EOP_TC_WB_ACTION_EN |
6276                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6277                                  EVENT_INDEX(5)));
6278         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6279         amdgpu_ring_write(ring, addr & 0xfffffffc);
6280         amdgpu_ring_write(ring, upper_32_bits(addr));
6281         amdgpu_ring_write(ring, lower_32_bits(seq));
6282         amdgpu_ring_write(ring, upper_32_bits(seq));
6283 }
6284
6285 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6286                                          u64 seq, unsigned int flags)
6287 {
6288         /* we only allocate 32bit for each seq wb address */
6289         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6290
6291         /* write fence seq to the "addr" */
6292         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6293         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6294                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6295         amdgpu_ring_write(ring, lower_32_bits(addr));
6296         amdgpu_ring_write(ring, upper_32_bits(addr));
6297         amdgpu_ring_write(ring, lower_32_bits(seq));
6298
6299         if (flags & AMDGPU_FENCE_FLAG_INT) {
6300                 /* set register to trigger INT */
6301                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6302                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6303                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6304                 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6305                 amdgpu_ring_write(ring, 0);
6306                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6307         }
6308 }
6309
6310 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6311 {
6312         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6313         amdgpu_ring_write(ring, 0);
6314 }
6315
6316 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6317 {
6318         uint32_t dw2 = 0;
6319
6320         if (amdgpu_sriov_vf(ring->adev))
6321                 gfx_v8_0_ring_emit_ce_meta(ring);
6322
6323         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6324         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6325                 gfx_v8_0_ring_emit_vgt_flush(ring);
6326                 /* set load_global_config & load_global_uconfig */
6327                 dw2 |= 0x8001;
6328                 /* set load_cs_sh_regs */
6329                 dw2 |= 0x01000000;
6330                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6331                 dw2 |= 0x10002;
6332
6333                 /* set load_ce_ram if preamble presented */
6334                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6335                         dw2 |= 0x10000000;
6336         } else {
6337                 /* still load_ce_ram if this is the first time preamble presented
6338                  * although there is no context switch happens.
6339                  */
6340                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6341                         dw2 |= 0x10000000;
6342         }
6343
6344         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6345         amdgpu_ring_write(ring, dw2);
6346         amdgpu_ring_write(ring, 0);
6347 }
6348
6349 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6350 {
6351         unsigned ret;
6352
6353         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6354         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6355         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6356         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6357         ret = ring->wptr & ring->buf_mask;
6358         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6359         return ret;
6360 }
6361
6362 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6363 {
6364         unsigned cur;
6365
6366         BUG_ON(offset > ring->buf_mask);
6367         BUG_ON(ring->ring[offset] != 0x55aa55aa);
6368
6369         cur = (ring->wptr & ring->buf_mask) - 1;
6370         if (likely(cur > offset))
6371                 ring->ring[offset] = cur - offset;
6372         else
6373                 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6374 }
6375
6376 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6377 {
6378         struct amdgpu_device *adev = ring->adev;
6379
6380         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6381         amdgpu_ring_write(ring, 0 |     /* src: register*/
6382                                 (5 << 8) |      /* dst: memory */
6383                                 (1 << 20));     /* write confirm */
6384         amdgpu_ring_write(ring, reg);
6385         amdgpu_ring_write(ring, 0);
6386         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6387                                 adev->virt.reg_val_offs * 4));
6388         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6389                                 adev->virt.reg_val_offs * 4));
6390 }
6391
6392 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6393                                   uint32_t val)
6394 {
6395         uint32_t cmd;
6396
6397         switch (ring->funcs->type) {
6398         case AMDGPU_RING_TYPE_GFX:
6399                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6400                 break;
6401         case AMDGPU_RING_TYPE_KIQ:
6402                 cmd = 1 << 16; /* no inc addr */
6403                 break;
6404         default:
6405                 cmd = WR_CONFIRM;
6406                 break;
6407         }
6408
6409         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6410         amdgpu_ring_write(ring, cmd);
6411         amdgpu_ring_write(ring, reg);
6412         amdgpu_ring_write(ring, 0);
6413         amdgpu_ring_write(ring, val);
6414 }
6415
6416 static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
6417 {
6418         struct amdgpu_device *adev = ring->adev;
6419         uint32_t value = 0;
6420
6421         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
6422         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
6423         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
6424         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
6425         WREG32(mmSQ_CMD, value);
6426 }
6427
6428 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6429                                                  enum amdgpu_interrupt_state state)
6430 {
6431         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6432                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6433 }
6434
6435 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6436                                                      int me, int pipe,
6437                                                      enum amdgpu_interrupt_state state)
6438 {
6439         u32 mec_int_cntl, mec_int_cntl_reg;
6440
6441         /*
6442          * amdgpu controls only the first MEC. That's why this function only
6443          * handles the setting of interrupts for this specific MEC. All other
6444          * pipes' interrupts are set by amdkfd.
6445          */
6446
6447         if (me == 1) {
6448                 switch (pipe) {
6449                 case 0:
6450                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6451                         break;
6452                 case 1:
6453                         mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6454                         break;
6455                 case 2:
6456                         mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6457                         break;
6458                 case 3:
6459                         mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6460                         break;
6461                 default:
6462                         DRM_DEBUG("invalid pipe %d\n", pipe);
6463                         return;
6464                 }
6465         } else {
6466                 DRM_DEBUG("invalid me %d\n", me);
6467                 return;
6468         }
6469
6470         switch (state) {
6471         case AMDGPU_IRQ_STATE_DISABLE:
6472                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6473                 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6474                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6475                 break;
6476         case AMDGPU_IRQ_STATE_ENABLE:
6477                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6478                 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6479                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6480                 break;
6481         default:
6482                 break;
6483         }
6484 }
6485
6486 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6487                                              struct amdgpu_irq_src *source,
6488                                              unsigned type,
6489                                              enum amdgpu_interrupt_state state)
6490 {
6491         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6492                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6493
6494         return 0;
6495 }
6496
6497 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6498                                               struct amdgpu_irq_src *source,
6499                                               unsigned type,
6500                                               enum amdgpu_interrupt_state state)
6501 {
6502         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6503                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6504
6505         return 0;
6506 }
6507
6508 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6509                                             struct amdgpu_irq_src *src,
6510                                             unsigned type,
6511                                             enum amdgpu_interrupt_state state)
6512 {
6513         switch (type) {
6514         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6515                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6516                 break;
6517         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6518                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6519                 break;
6520         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6521                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6522                 break;
6523         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6524                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6525                 break;
6526         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6527                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6528                 break;
6529         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6530                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6531                 break;
6532         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6533                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6534                 break;
6535         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6536                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6537                 break;
6538         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6539                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6540                 break;
6541         default:
6542                 break;
6543         }
6544         return 0;
6545 }
6546
6547 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6548                                          struct amdgpu_irq_src *source,
6549                                          unsigned int type,
6550                                          enum amdgpu_interrupt_state state)
6551 {
6552         int enable_flag;
6553
6554         switch (state) {
6555         case AMDGPU_IRQ_STATE_DISABLE:
6556                 enable_flag = 0;
6557                 break;
6558
6559         case AMDGPU_IRQ_STATE_ENABLE:
6560                 enable_flag = 1;
6561                 break;
6562
6563         default:
6564                 return -EINVAL;
6565         }
6566
6567         WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6568         WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6569         WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6570         WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6571         WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6572         WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6573                      enable_flag);
6574         WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6575                      enable_flag);
6576         WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6577                      enable_flag);
6578         WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6579                      enable_flag);
6580         WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6581                      enable_flag);
6582         WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6583                      enable_flag);
6584         WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6585                      enable_flag);
6586         WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6587                      enable_flag);
6588
6589         return 0;
6590 }
6591
6592 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6593                                      struct amdgpu_irq_src *source,
6594                                      unsigned int type,
6595                                      enum amdgpu_interrupt_state state)
6596 {
6597         int enable_flag;
6598
6599         switch (state) {
6600         case AMDGPU_IRQ_STATE_DISABLE:
6601                 enable_flag = 1;
6602                 break;
6603
6604         case AMDGPU_IRQ_STATE_ENABLE:
6605                 enable_flag = 0;
6606                 break;
6607
6608         default:
6609                 return -EINVAL;
6610         }
6611
6612         WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6613                      enable_flag);
6614
6615         return 0;
6616 }
6617
6618 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6619                             struct amdgpu_irq_src *source,
6620                             struct amdgpu_iv_entry *entry)
6621 {
6622         int i;
6623         u8 me_id, pipe_id, queue_id;
6624         struct amdgpu_ring *ring;
6625
6626         DRM_DEBUG("IH: CP EOP\n");
6627         me_id = (entry->ring_id & 0x0c) >> 2;
6628         pipe_id = (entry->ring_id & 0x03) >> 0;
6629         queue_id = (entry->ring_id & 0x70) >> 4;
6630
6631         switch (me_id) {
6632         case 0:
6633                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6634                 break;
6635         case 1:
6636         case 2:
6637                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6638                         ring = &adev->gfx.compute_ring[i];
6639                         /* Per-queue interrupt is supported for MEC starting from VI.
6640                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6641                           */
6642                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6643                                 amdgpu_fence_process(ring);
6644                 }
6645                 break;
6646         }
6647         return 0;
6648 }
6649
6650 static void gfx_v8_0_fault(struct amdgpu_device *adev,
6651                            struct amdgpu_iv_entry *entry)
6652 {
6653         u8 me_id, pipe_id, queue_id;
6654         struct amdgpu_ring *ring;
6655         int i;
6656
6657         me_id = (entry->ring_id & 0x0c) >> 2;
6658         pipe_id = (entry->ring_id & 0x03) >> 0;
6659         queue_id = (entry->ring_id & 0x70) >> 4;
6660
6661         switch (me_id) {
6662         case 0:
6663                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6664                 break;
6665         case 1:
6666         case 2:
6667                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6668                         ring = &adev->gfx.compute_ring[i];
6669                         if (ring->me == me_id && ring->pipe == pipe_id &&
6670                             ring->queue == queue_id)
6671                                 drm_sched_fault(&ring->sched);
6672                 }
6673                 break;
6674         }
6675 }
6676
6677 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6678                                  struct amdgpu_irq_src *source,
6679                                  struct amdgpu_iv_entry *entry)
6680 {
6681         DRM_ERROR("Illegal register access in command stream\n");
6682         gfx_v8_0_fault(adev, entry);
6683         return 0;
6684 }
6685
6686 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6687                                   struct amdgpu_irq_src *source,
6688                                   struct amdgpu_iv_entry *entry)
6689 {
6690         DRM_ERROR("Illegal instruction in command stream\n");
6691         gfx_v8_0_fault(adev, entry);
6692         return 0;
6693 }
6694
6695 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6696                                      struct amdgpu_irq_src *source,
6697                                      struct amdgpu_iv_entry *entry)
6698 {
6699         DRM_ERROR("CP EDC/ECC error detected.");
6700         return 0;
6701 }
6702
6703 static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data)
6704 {
6705         u32 enc, se_id, sh_id, cu_id;
6706         char type[20];
6707         int sq_edc_source = -1;
6708
6709         enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6710         se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6711
6712         switch (enc) {
6713                 case 0:
6714                         DRM_INFO("SQ general purpose intr detected:"
6715                                         "se_id %d, immed_overflow %d, host_reg_overflow %d,"
6716                                         "host_cmd_overflow %d, cmd_timestamp %d,"
6717                                         "reg_timestamp %d, thread_trace_buff_full %d,"
6718                                         "wlt %d, thread_trace %d.\n",
6719                                         se_id,
6720                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6721                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6722                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6723                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6724                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6725                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6726                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6727                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
6728                                         );
6729                         break;
6730                 case 1:
6731                 case 2:
6732
6733                         cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
6734                         sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
6735
6736                         /*
6737                          * This function can be called either directly from ISR
6738                          * or from BH in which case we can access SQ_EDC_INFO
6739                          * instance
6740                          */
6741                         if (in_task()) {
6742                                 mutex_lock(&adev->grbm_idx_mutex);
6743                                 gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
6744
6745                                 sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
6746
6747                                 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6748                                 mutex_unlock(&adev->grbm_idx_mutex);
6749                         }
6750
6751                         if (enc == 1)
6752                                 sprintf(type, "instruction intr");
6753                         else
6754                                 sprintf(type, "EDC/ECC error");
6755
6756                         DRM_INFO(
6757                                 "SQ %s detected: "
6758                                         "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
6759                                         "trap %s, sq_ed_info.source %s.\n",
6760                                         type, se_id, sh_id, cu_id,
6761                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
6762                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
6763                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
6764                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
6765                                         (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
6766                                 );
6767                         break;
6768                 default:
6769                         DRM_ERROR("SQ invalid encoding type\n.");
6770         }
6771 }
6772
6773 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
6774 {
6775
6776         struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
6777         struct sq_work *sq_work = container_of(work, struct sq_work, work);
6778
6779         gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data);
6780 }
6781
6782 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
6783                            struct amdgpu_irq_src *source,
6784                            struct amdgpu_iv_entry *entry)
6785 {
6786         unsigned ih_data = entry->src_data[0];
6787
6788         /*
6789          * Try to submit work so SQ_EDC_INFO can be accessed from
6790          * BH. If previous work submission hasn't finished yet
6791          * just print whatever info is possible directly from the ISR.
6792          */
6793         if (work_pending(&adev->gfx.sq_work.work)) {
6794                 gfx_v8_0_parse_sq_irq(adev, ih_data);
6795         } else {
6796                 adev->gfx.sq_work.ih_data = ih_data;
6797                 schedule_work(&adev->gfx.sq_work.work);
6798         }
6799
6800         return 0;
6801 }
6802
6803 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6804         .name = "gfx_v8_0",
6805         .early_init = gfx_v8_0_early_init,
6806         .late_init = gfx_v8_0_late_init,
6807         .sw_init = gfx_v8_0_sw_init,
6808         .sw_fini = gfx_v8_0_sw_fini,
6809         .hw_init = gfx_v8_0_hw_init,
6810         .hw_fini = gfx_v8_0_hw_fini,
6811         .suspend = gfx_v8_0_suspend,
6812         .resume = gfx_v8_0_resume,
6813         .is_idle = gfx_v8_0_is_idle,
6814         .wait_for_idle = gfx_v8_0_wait_for_idle,
6815         .check_soft_reset = gfx_v8_0_check_soft_reset,
6816         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6817         .soft_reset = gfx_v8_0_soft_reset,
6818         .post_soft_reset = gfx_v8_0_post_soft_reset,
6819         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6820         .set_powergating_state = gfx_v8_0_set_powergating_state,
6821         .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6822 };
6823
6824 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6825         .type = AMDGPU_RING_TYPE_GFX,
6826         .align_mask = 0xff,
6827         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6828         .support_64bit_ptrs = false,
6829         .get_rptr = gfx_v8_0_ring_get_rptr,
6830         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6831         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6832         .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6833                 5 +  /* COND_EXEC */
6834                 7 +  /* PIPELINE_SYNC */
6835                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
6836                 8 +  /* FENCE for VM_FLUSH */
6837                 20 + /* GDS switch */
6838                 4 + /* double SWITCH_BUFFER,
6839                        the first COND_EXEC jump to the place just
6840                            prior to this double SWITCH_BUFFER  */
6841                 5 + /* COND_EXEC */
6842                 7 +      /*     HDP_flush */
6843                 4 +      /*     VGT_flush */
6844                 14 + /* CE_META */
6845                 31 + /* DE_META */
6846                 3 + /* CNTX_CTRL */
6847                 5 + /* HDP_INVL */
6848                 8 + 8 + /* FENCE x2 */
6849                 2, /* SWITCH_BUFFER */
6850         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6851         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6852         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6853         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6854         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6855         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6856         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6857         .test_ring = gfx_v8_0_ring_test_ring,
6858         .test_ib = gfx_v8_0_ring_test_ib,
6859         .insert_nop = amdgpu_ring_insert_nop,
6860         .pad_ib = amdgpu_ring_generic_pad_ib,
6861         .emit_switch_buffer = gfx_v8_ring_emit_sb,
6862         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6863         .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6864         .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6865         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6866         .soft_recovery = gfx_v8_0_ring_soft_recovery,
6867 };
6868
6869 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6870         .type = AMDGPU_RING_TYPE_COMPUTE,
6871         .align_mask = 0xff,
6872         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6873         .support_64bit_ptrs = false,
6874         .get_rptr = gfx_v8_0_ring_get_rptr,
6875         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6876         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6877         .emit_frame_size =
6878                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6879                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6880                 5 + /* hdp_invalidate */
6881                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6882                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
6883                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6884         .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
6885         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6886         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6887         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6888         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6889         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6890         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6891         .test_ring = gfx_v8_0_ring_test_ring,
6892         .test_ib = gfx_v8_0_ring_test_ib,
6893         .insert_nop = amdgpu_ring_insert_nop,
6894         .pad_ib = amdgpu_ring_generic_pad_ib,
6895         .set_priority = gfx_v8_0_ring_set_priority_compute,
6896         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6897 };
6898
6899 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6900         .type = AMDGPU_RING_TYPE_KIQ,
6901         .align_mask = 0xff,
6902         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6903         .support_64bit_ptrs = false,
6904         .get_rptr = gfx_v8_0_ring_get_rptr,
6905         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6906         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6907         .emit_frame_size =
6908                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6909                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6910                 5 + /* hdp_invalidate */
6911                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6912                 17 + /* gfx_v8_0_ring_emit_vm_flush */
6913                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6914         .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
6915         .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6916         .test_ring = gfx_v8_0_ring_test_ring,
6917         .insert_nop = amdgpu_ring_insert_nop,
6918         .pad_ib = amdgpu_ring_generic_pad_ib,
6919         .emit_rreg = gfx_v8_0_ring_emit_rreg,
6920         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6921 };
6922
6923 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6924 {
6925         int i;
6926
6927         adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
6928
6929         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6930                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6931
6932         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6933                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6934 }
6935
6936 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6937         .set = gfx_v8_0_set_eop_interrupt_state,
6938         .process = gfx_v8_0_eop_irq,
6939 };
6940
6941 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6942         .set = gfx_v8_0_set_priv_reg_fault_state,
6943         .process = gfx_v8_0_priv_reg_irq,
6944 };
6945
6946 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6947         .set = gfx_v8_0_set_priv_inst_fault_state,
6948         .process = gfx_v8_0_priv_inst_irq,
6949 };
6950
6951 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
6952         .set = gfx_v8_0_set_cp_ecc_int_state,
6953         .process = gfx_v8_0_cp_ecc_error_irq,
6954 };
6955
6956 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
6957         .set = gfx_v8_0_set_sq_int_state,
6958         .process = gfx_v8_0_sq_irq,
6959 };
6960
6961 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6962 {
6963         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6964         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6965
6966         adev->gfx.priv_reg_irq.num_types = 1;
6967         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6968
6969         adev->gfx.priv_inst_irq.num_types = 1;
6970         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6971
6972         adev->gfx.cp_ecc_error_irq.num_types = 1;
6973         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
6974
6975         adev->gfx.sq_irq.num_types = 1;
6976         adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
6977 }
6978
6979 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6980 {
6981         adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6982 }
6983
6984 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6985 {
6986         /* init asci gds info */
6987         adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
6988         adev->gds.gws_size = 64;
6989         adev->gds.oa_size = 16;
6990         adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
6991 }
6992
6993 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6994                                                  u32 bitmap)
6995 {
6996         u32 data;
6997
6998         if (!bitmap)
6999                 return;
7000
7001         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7002         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7003
7004         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7005 }
7006
7007 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7008 {
7009         u32 data, mask;
7010
7011         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7012                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7013
7014         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7015
7016         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7017 }
7018
7019 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7020 {
7021         int i, j, k, counter, active_cu_number = 0;
7022         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7023         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7024         unsigned disable_masks[4 * 2];
7025         u32 ao_cu_num;
7026
7027         memset(cu_info, 0, sizeof(*cu_info));
7028
7029         if (adev->flags & AMD_IS_APU)
7030                 ao_cu_num = 2;
7031         else
7032                 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7033
7034         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7035
7036         mutex_lock(&adev->grbm_idx_mutex);
7037         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7038                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7039                         mask = 1;
7040                         ao_bitmap = 0;
7041                         counter = 0;
7042                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7043                         if (i < 4 && j < 2)
7044                                 gfx_v8_0_set_user_cu_inactive_bitmap(
7045                                         adev, disable_masks[i * 2 + j]);
7046                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7047                         cu_info->bitmap[i][j] = bitmap;
7048
7049                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7050                                 if (bitmap & mask) {
7051                                         if (counter < ao_cu_num)
7052                                                 ao_bitmap |= mask;
7053                                         counter ++;
7054                                 }
7055                                 mask <<= 1;
7056                         }
7057                         active_cu_number += counter;
7058                         if (i < 2 && j < 2)
7059                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7060                         cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7061                 }
7062         }
7063         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7064         mutex_unlock(&adev->grbm_idx_mutex);
7065
7066         cu_info->number = active_cu_number;
7067         cu_info->ao_cu_mask = ao_cu_mask;
7068         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7069         cu_info->max_waves_per_simd = 10;
7070         cu_info->max_scratch_slots_per_cu = 32;
7071         cu_info->wave_front_size = 64;
7072         cu_info->lds_size = 64;
7073 }
7074
7075 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7076 {
7077         .type = AMD_IP_BLOCK_TYPE_GFX,
7078         .major = 8,
7079         .minor = 0,
7080         .rev = 0,
7081         .funcs = &gfx_v8_0_ip_funcs,
7082 };
7083
7084 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7085 {
7086         .type = AMD_IP_BLOCK_TYPE_GFX,
7087         .major = 8,
7088         .minor = 1,
7089         .rev = 0,
7090         .funcs = &gfx_v8_0_ip_funcs,
7091 };
7092
7093 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7094 {
7095         uint64_t ce_payload_addr;
7096         int cnt_ce;
7097         union {
7098                 struct vi_ce_ib_state regular;
7099                 struct vi_ce_ib_state_chained_ib chained;
7100         } ce_payload = {};
7101
7102         if (ring->adev->virt.chained_ib_support) {
7103                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7104                         offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7105                 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7106         } else {
7107                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7108                         offsetof(struct vi_gfx_meta_data, ce_payload);
7109                 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7110         }
7111
7112         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7113         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7114                                 WRITE_DATA_DST_SEL(8) |
7115                                 WR_CONFIRM) |
7116                                 WRITE_DATA_CACHE_POLICY(0));
7117         amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7118         amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7119         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7120 }
7121
7122 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7123 {
7124         uint64_t de_payload_addr, gds_addr, csa_addr;
7125         int cnt_de;
7126         union {
7127                 struct vi_de_ib_state regular;
7128                 struct vi_de_ib_state_chained_ib chained;
7129         } de_payload = {};
7130
7131         csa_addr = amdgpu_csa_vaddr(ring->adev);
7132         gds_addr = csa_addr + 4096;
7133         if (ring->adev->virt.chained_ib_support) {
7134                 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7135                 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7136                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7137                 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7138         } else {
7139                 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7140                 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7141                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7142                 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7143         }
7144
7145         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7146         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7147                                 WRITE_DATA_DST_SEL(8) |
7148                                 WR_CONFIRM) |
7149                                 WRITE_DATA_CACHE_POLICY(0));
7150         amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7151         amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7152         amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7153 }