2 * Copyright 2014 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
31 #include "amdgpu_gfx.h"
33 #include "vi_structs.h"
35 #include "amdgpu_ucode.h"
36 #include "amdgpu_atombios.h"
37 #include "atombios_i2c.h"
38 #include "clearstate_vi.h"
40 #include "gmc/gmc_8_2_d.h"
41 #include "gmc/gmc_8_2_sh_mask.h"
43 #include "oss/oss_3_0_d.h"
44 #include "oss/oss_3_0_sh_mask.h"
46 #include "bif/bif_5_0_d.h"
47 #include "bif/bif_5_0_sh_mask.h"
48 #include "gca/gfx_8_0_d.h"
49 #include "gca/gfx_8_0_enum.h"
50 #include "gca/gfx_8_0_sh_mask.h"
52 #include "dce/dce_10_0_d.h"
53 #include "dce/dce_10_0_sh_mask.h"
55 #include "smu/smu_7_1_3_d.h"
57 #include "ivsrcid/ivsrcid_vislands30.h"
59 #define GFX8_NUM_GFX_RINGS 1
60 #define GFX8_MEC_HPD_SIZE 4096
62 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
63 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
64 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
65 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
67 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
68 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
69 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
70 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
71 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
72 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
73 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
74 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
75 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
77 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L
78 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L
79 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L
80 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L
81 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L
82 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L
85 #define SET_BPM_SERDES_CMD 1
86 #define CLE_BPM_SERDES_CMD 0
88 /* BPM Register Address*/
90 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */
91 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */
92 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */
93 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */
94 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */
98 #define RLC_FormatDirectRegListLength 14
102 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
104 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
105 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
106 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
107 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
108 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
109 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
110 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
111 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
112 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
113 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
114 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
115 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
116 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
117 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
118 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
119 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
122 static const u32 golden_settings_tonga_a11[] =
124 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
125 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
126 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
127 mmGB_GPU_ID, 0x0000000f, 0x00000000,
128 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
129 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
130 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
131 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
132 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
133 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
134 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
135 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
136 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
137 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
138 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
139 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
142 static const u32 tonga_golden_common_all[] =
144 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
145 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
146 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
147 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
148 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
149 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
150 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
151 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
154 static const u32 tonga_mgcg_cgcg_init[] =
156 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
157 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
158 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
159 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
160 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
161 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
162 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
163 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
164 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
165 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
166 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
167 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
168 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
169 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
170 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
171 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
172 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
173 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
174 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
175 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
176 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
177 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
178 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
179 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
180 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
181 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
182 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
183 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
184 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
185 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
186 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
187 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
188 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
189 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
190 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
191 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
192 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
193 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
194 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
195 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
196 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
197 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
198 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
199 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
200 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
201 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
202 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
203 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
204 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
205 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
206 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
207 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
208 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
209 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
210 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
211 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
212 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
213 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
214 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
215 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
216 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
217 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
218 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
219 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
220 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
221 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
222 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
223 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
224 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
225 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
226 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
227 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
228 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
229 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
230 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
233 static const u32 golden_settings_vegam_a11[] =
235 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
236 mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
237 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
238 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
239 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
240 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
241 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
242 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
243 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
244 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
245 mmSQ_CONFIG, 0x07f80000, 0x01180000,
246 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
247 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
248 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
249 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
250 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
251 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
254 static const u32 vegam_golden_common_all[] =
256 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
257 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
258 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
259 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
260 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
261 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
264 static const u32 golden_settings_polaris11_a11[] =
266 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
267 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
268 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
269 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
270 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
271 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
272 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
273 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
274 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
275 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
276 mmSQ_CONFIG, 0x07f80000, 0x01180000,
277 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
278 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
279 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
280 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
281 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
282 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
285 static const u32 polaris11_golden_common_all[] =
287 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
288 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
289 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
290 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
291 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
292 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
295 static const u32 golden_settings_polaris10_a11[] =
297 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
298 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
299 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
300 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
301 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
302 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
303 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
304 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
305 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
306 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
307 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
308 mmSQ_CONFIG, 0x07f80000, 0x07180000,
309 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
310 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
311 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
312 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
313 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
316 static const u32 polaris10_golden_common_all[] =
318 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
319 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
320 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
321 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
322 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
323 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
324 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
325 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
328 static const u32 fiji_golden_common_all[] =
330 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
331 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
332 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
333 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
334 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
335 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
336 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
337 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
338 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
339 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
342 static const u32 golden_settings_fiji_a10[] =
344 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
345 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
346 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
347 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
348 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
349 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
350 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
351 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
352 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
353 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
354 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
357 static const u32 fiji_mgcg_cgcg_init[] =
359 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
360 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
361 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
362 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
363 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
364 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
365 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
366 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
367 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
368 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
369 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
370 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
371 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
372 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
373 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
374 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
375 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
376 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
377 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
378 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
379 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
380 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
381 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
382 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
383 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
384 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
385 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
386 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
387 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
388 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
389 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
390 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
391 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
392 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
393 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
396 static const u32 golden_settings_iceland_a11[] =
398 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
399 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
400 mmDB_DEBUG3, 0xc0000000, 0xc0000000,
401 mmGB_GPU_ID, 0x0000000f, 0x00000000,
402 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
403 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
404 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
405 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
406 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
407 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
408 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
409 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
410 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
411 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
412 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
413 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
416 static const u32 iceland_golden_common_all[] =
418 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
419 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
420 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
421 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
422 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
423 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
424 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
425 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
428 static const u32 iceland_mgcg_cgcg_init[] =
430 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
431 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
432 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
433 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
434 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
435 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
436 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
437 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
438 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
439 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
440 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
441 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
442 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
443 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
444 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
445 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
446 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
447 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
448 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
449 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
450 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
451 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
452 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
453 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
454 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
455 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
456 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
457 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
458 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
459 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
460 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
461 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
462 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
463 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
464 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
465 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
466 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
467 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
468 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
469 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
470 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
471 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
472 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
473 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
474 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
475 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
476 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
477 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
478 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
479 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
480 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
481 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
482 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
483 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
484 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
485 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
486 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
487 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
488 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
489 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
490 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
491 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
492 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
493 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
496 static const u32 cz_golden_settings_a11[] =
498 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
499 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
500 mmGB_GPU_ID, 0x0000000f, 0x00000000,
501 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
502 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
503 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
504 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
505 mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
506 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
507 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
508 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
509 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
512 static const u32 cz_golden_common_all[] =
514 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
515 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
516 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
517 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
518 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
519 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
520 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
521 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
524 static const u32 cz_mgcg_cgcg_init[] =
526 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
527 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
528 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
529 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
530 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
531 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
532 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
533 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
534 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
535 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
536 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
537 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
538 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
539 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
540 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
541 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
542 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
543 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
544 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
545 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
546 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
547 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
548 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
549 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
550 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
551 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
552 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
553 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
554 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
555 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
556 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
557 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
558 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
559 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
560 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
561 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
562 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
563 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
564 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
565 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
566 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
567 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
568 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
569 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
570 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
571 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
572 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
573 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
574 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
575 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
576 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
577 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
578 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
579 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
580 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
581 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
582 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
583 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
584 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
585 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
586 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
587 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
588 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
589 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
590 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
591 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
592 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
593 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
594 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
595 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
596 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
597 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
598 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
599 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
600 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
603 static const u32 stoney_golden_settings_a11[] =
605 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
606 mmGB_GPU_ID, 0x0000000f, 0x00000000,
607 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
608 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
609 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
610 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
611 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
612 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
613 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
614 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
617 static const u32 stoney_golden_common_all[] =
619 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
620 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
621 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
622 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
623 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
624 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
625 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
626 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
629 static const u32 stoney_mgcg_cgcg_init[] =
631 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
632 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
633 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
634 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
635 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
639 static const char * const sq_edc_source_names[] = {
640 "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
641 "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
642 "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
643 "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
644 "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
645 "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
646 "SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
649 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
650 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
651 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
652 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
653 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
654 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
655 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
656 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
658 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
660 switch (adev->asic_type) {
662 amdgpu_device_program_register_sequence(adev,
663 iceland_mgcg_cgcg_init,
664 ARRAY_SIZE(iceland_mgcg_cgcg_init));
665 amdgpu_device_program_register_sequence(adev,
666 golden_settings_iceland_a11,
667 ARRAY_SIZE(golden_settings_iceland_a11));
668 amdgpu_device_program_register_sequence(adev,
669 iceland_golden_common_all,
670 ARRAY_SIZE(iceland_golden_common_all));
673 amdgpu_device_program_register_sequence(adev,
675 ARRAY_SIZE(fiji_mgcg_cgcg_init));
676 amdgpu_device_program_register_sequence(adev,
677 golden_settings_fiji_a10,
678 ARRAY_SIZE(golden_settings_fiji_a10));
679 amdgpu_device_program_register_sequence(adev,
680 fiji_golden_common_all,
681 ARRAY_SIZE(fiji_golden_common_all));
685 amdgpu_device_program_register_sequence(adev,
686 tonga_mgcg_cgcg_init,
687 ARRAY_SIZE(tonga_mgcg_cgcg_init));
688 amdgpu_device_program_register_sequence(adev,
689 golden_settings_tonga_a11,
690 ARRAY_SIZE(golden_settings_tonga_a11));
691 amdgpu_device_program_register_sequence(adev,
692 tonga_golden_common_all,
693 ARRAY_SIZE(tonga_golden_common_all));
696 amdgpu_device_program_register_sequence(adev,
697 golden_settings_vegam_a11,
698 ARRAY_SIZE(golden_settings_vegam_a11));
699 amdgpu_device_program_register_sequence(adev,
700 vegam_golden_common_all,
701 ARRAY_SIZE(vegam_golden_common_all));
705 amdgpu_device_program_register_sequence(adev,
706 golden_settings_polaris11_a11,
707 ARRAY_SIZE(golden_settings_polaris11_a11));
708 amdgpu_device_program_register_sequence(adev,
709 polaris11_golden_common_all,
710 ARRAY_SIZE(polaris11_golden_common_all));
713 amdgpu_device_program_register_sequence(adev,
714 golden_settings_polaris10_a11,
715 ARRAY_SIZE(golden_settings_polaris10_a11));
716 amdgpu_device_program_register_sequence(adev,
717 polaris10_golden_common_all,
718 ARRAY_SIZE(polaris10_golden_common_all));
719 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
720 if (adev->pdev->revision == 0xc7 &&
721 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
722 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
723 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
724 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
725 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
729 amdgpu_device_program_register_sequence(adev,
731 ARRAY_SIZE(cz_mgcg_cgcg_init));
732 amdgpu_device_program_register_sequence(adev,
733 cz_golden_settings_a11,
734 ARRAY_SIZE(cz_golden_settings_a11));
735 amdgpu_device_program_register_sequence(adev,
736 cz_golden_common_all,
737 ARRAY_SIZE(cz_golden_common_all));
740 amdgpu_device_program_register_sequence(adev,
741 stoney_mgcg_cgcg_init,
742 ARRAY_SIZE(stoney_mgcg_cgcg_init));
743 amdgpu_device_program_register_sequence(adev,
744 stoney_golden_settings_a11,
745 ARRAY_SIZE(stoney_golden_settings_a11));
746 amdgpu_device_program_register_sequence(adev,
747 stoney_golden_common_all,
748 ARRAY_SIZE(stoney_golden_common_all));
755 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
757 adev->gfx.scratch.num_reg = 8;
758 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
759 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
762 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
764 struct amdgpu_device *adev = ring->adev;
770 r = amdgpu_gfx_scratch_get(adev, &scratch);
774 WREG32(scratch, 0xCAFEDEAD);
775 r = amdgpu_ring_alloc(ring, 3);
777 goto error_free_scratch;
779 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
780 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
781 amdgpu_ring_write(ring, 0xDEADBEEF);
782 amdgpu_ring_commit(ring);
784 for (i = 0; i < adev->usec_timeout; i++) {
785 tmp = RREG32(scratch);
786 if (tmp == 0xDEADBEEF)
791 if (i >= adev->usec_timeout)
795 amdgpu_gfx_scratch_free(adev, scratch);
799 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
801 struct amdgpu_device *adev = ring->adev;
803 struct dma_fence *f = NULL;
810 r = amdgpu_device_wb_get(adev, &index);
814 gpu_addr = adev->wb.gpu_addr + (index * 4);
815 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
816 memset(&ib, 0, sizeof(ib));
817 r = amdgpu_ib_get(adev, NULL, 16, &ib);
821 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
822 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
823 ib.ptr[2] = lower_32_bits(gpu_addr);
824 ib.ptr[3] = upper_32_bits(gpu_addr);
825 ib.ptr[4] = 0xDEADBEEF;
828 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
832 r = dma_fence_wait_timeout(f, false, timeout);
840 tmp = adev->wb.wb[index];
841 if (tmp == 0xDEADBEEF)
847 amdgpu_ib_free(adev, &ib, NULL);
850 amdgpu_device_wb_free(adev, index);
855 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
857 release_firmware(adev->gfx.pfp_fw);
858 adev->gfx.pfp_fw = NULL;
859 release_firmware(adev->gfx.me_fw);
860 adev->gfx.me_fw = NULL;
861 release_firmware(adev->gfx.ce_fw);
862 adev->gfx.ce_fw = NULL;
863 release_firmware(adev->gfx.rlc_fw);
864 adev->gfx.rlc_fw = NULL;
865 release_firmware(adev->gfx.mec_fw);
866 adev->gfx.mec_fw = NULL;
867 if ((adev->asic_type != CHIP_STONEY) &&
868 (adev->asic_type != CHIP_TOPAZ))
869 release_firmware(adev->gfx.mec2_fw);
870 adev->gfx.mec2_fw = NULL;
872 kfree(adev->gfx.rlc.register_list_format);
875 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
877 const char *chip_name;
880 struct amdgpu_firmware_info *info = NULL;
881 const struct common_firmware_header *header = NULL;
882 const struct gfx_firmware_header_v1_0 *cp_hdr;
883 const struct rlc_firmware_header_v2_0 *rlc_hdr;
884 unsigned int *tmp = NULL, i;
888 switch (adev->asic_type) {
896 chip_name = "carrizo";
902 chip_name = "stoney";
905 chip_name = "polaris10";
908 chip_name = "polaris11";
911 chip_name = "polaris12";
920 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
921 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
922 err = reject_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
923 if (err == -ENOENT) {
924 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
925 err = reject_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
928 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
929 err = reject_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
933 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
936 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
937 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
938 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
940 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
941 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
942 err = reject_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
943 if (err == -ENOENT) {
944 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
945 err = reject_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
948 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
949 err = reject_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
953 err = amdgpu_ucode_validate(adev->gfx.me_fw);
956 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
957 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
959 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
961 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
962 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
963 err = reject_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
964 if (err == -ENOENT) {
965 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
966 err = reject_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
969 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
970 err = reject_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
974 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
977 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
978 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
979 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
982 * Support for MCBP/Virtualization in combination with chained IBs is
983 * formal released on feature version #46
985 if (adev->gfx.ce_feature_version >= 46 &&
986 adev->gfx.pfp_feature_version >= 46) {
987 adev->virt.chained_ib_support = true;
988 DRM_INFO("Chained IB support enabled!\n");
990 adev->virt.chained_ib_support = false;
992 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
993 err = reject_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
996 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
997 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
998 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
999 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1001 adev->gfx.rlc.save_and_restore_offset =
1002 le32_to_cpu(rlc_hdr->save_and_restore_offset);
1003 adev->gfx.rlc.clear_state_descriptor_offset =
1004 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1005 adev->gfx.rlc.avail_scratch_ram_locations =
1006 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1007 adev->gfx.rlc.reg_restore_list_size =
1008 le32_to_cpu(rlc_hdr->reg_restore_list_size);
1009 adev->gfx.rlc.reg_list_format_start =
1010 le32_to_cpu(rlc_hdr->reg_list_format_start);
1011 adev->gfx.rlc.reg_list_format_separate_start =
1012 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1013 adev->gfx.rlc.starting_offsets_start =
1014 le32_to_cpu(rlc_hdr->starting_offsets_start);
1015 adev->gfx.rlc.reg_list_format_size_bytes =
1016 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1017 adev->gfx.rlc.reg_list_size_bytes =
1018 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1020 adev->gfx.rlc.register_list_format =
1021 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1022 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1024 if (!adev->gfx.rlc.register_list_format) {
1029 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1030 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1031 for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1032 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1034 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1036 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1037 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1038 for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1039 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1041 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1042 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1043 err = reject_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1044 if (err == -ENOENT) {
1045 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1046 err = reject_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1049 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1050 err = reject_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1054 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1057 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1058 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1059 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1061 if ((adev->asic_type != CHIP_STONEY) &&
1062 (adev->asic_type != CHIP_TOPAZ)) {
1063 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1064 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1065 err = reject_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1066 if (err == -ENOENT) {
1067 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1068 err = reject_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1071 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1072 err = reject_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1075 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1078 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1079 adev->gfx.mec2_fw->data;
1080 adev->gfx.mec2_fw_version =
1081 le32_to_cpu(cp_hdr->header.ucode_version);
1082 adev->gfx.mec2_feature_version =
1083 le32_to_cpu(cp_hdr->ucode_feature_version);
1086 adev->gfx.mec2_fw = NULL;
1090 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1091 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1092 info->fw = adev->gfx.pfp_fw;
1093 header = (const struct common_firmware_header *)info->fw->data;
1094 adev->firmware.fw_size +=
1095 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1097 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1098 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1099 info->fw = adev->gfx.me_fw;
1100 header = (const struct common_firmware_header *)info->fw->data;
1101 adev->firmware.fw_size +=
1102 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1104 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1105 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1106 info->fw = adev->gfx.ce_fw;
1107 header = (const struct common_firmware_header *)info->fw->data;
1108 adev->firmware.fw_size +=
1109 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1111 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1112 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1113 info->fw = adev->gfx.rlc_fw;
1114 header = (const struct common_firmware_header *)info->fw->data;
1115 adev->firmware.fw_size +=
1116 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1118 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1119 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1120 info->fw = adev->gfx.mec_fw;
1121 header = (const struct common_firmware_header *)info->fw->data;
1122 adev->firmware.fw_size +=
1123 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1125 /* we need account JT in */
1126 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1127 adev->firmware.fw_size +=
1128 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1130 if (amdgpu_sriov_vf(adev)) {
1131 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1132 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1133 info->fw = adev->gfx.mec_fw;
1134 adev->firmware.fw_size +=
1135 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1138 if (adev->gfx.mec2_fw) {
1139 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1140 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1141 info->fw = adev->gfx.mec2_fw;
1142 header = (const struct common_firmware_header *)info->fw->data;
1143 adev->firmware.fw_size +=
1144 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1150 "gfx8: Failed to load firmware \"%s\"\n",
1152 release_firmware(adev->gfx.pfp_fw);
1153 adev->gfx.pfp_fw = NULL;
1154 release_firmware(adev->gfx.me_fw);
1155 adev->gfx.me_fw = NULL;
1156 release_firmware(adev->gfx.ce_fw);
1157 adev->gfx.ce_fw = NULL;
1158 release_firmware(adev->gfx.rlc_fw);
1159 adev->gfx.rlc_fw = NULL;
1160 release_firmware(adev->gfx.mec_fw);
1161 adev->gfx.mec_fw = NULL;
1162 release_firmware(adev->gfx.mec2_fw);
1163 adev->gfx.mec2_fw = NULL;
1168 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1169 volatile u32 *buffer)
1172 const struct cs_section_def *sect = NULL;
1173 const struct cs_extent_def *ext = NULL;
1175 if (adev->gfx.rlc.cs_data == NULL)
1180 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1181 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1183 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1184 buffer[count++] = cpu_to_le32(0x80000000);
1185 buffer[count++] = cpu_to_le32(0x80000000);
1187 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1188 for (ext = sect->section; ext->extent != NULL; ++ext) {
1189 if (sect->id == SECT_CONTEXT) {
1191 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1192 buffer[count++] = cpu_to_le32(ext->reg_index -
1193 PACKET3_SET_CONTEXT_REG_START);
1194 for (i = 0; i < ext->reg_count; i++)
1195 buffer[count++] = cpu_to_le32(ext->extent[i]);
1202 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1203 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1204 PACKET3_SET_CONTEXT_REG_START);
1205 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1206 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1208 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1209 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1211 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1212 buffer[count++] = cpu_to_le32(0);
1215 static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
1217 if (adev->asic_type == CHIP_CARRIZO)
1223 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1225 const struct cs_section_def *cs_data;
1228 adev->gfx.rlc.cs_data = vi_cs_data;
1230 cs_data = adev->gfx.rlc.cs_data;
1233 /* init clear state block */
1234 r = amdgpu_gfx_rlc_init_csb(adev);
1239 if ((adev->asic_type == CHIP_CARRIZO) ||
1240 (adev->asic_type == CHIP_STONEY)) {
1241 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1242 r = amdgpu_gfx_rlc_init_cpt(adev);
1247 /* init spm vmid with 0xf */
1248 if (adev->gfx.rlc.funcs->update_spm_vmid)
1249 adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
1254 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1256 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1259 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1263 size_t mec_hpd_size;
1265 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1267 /* take ownership of the relevant compute queues */
1268 amdgpu_gfx_compute_queue_acquire(adev);
1270 mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1272 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1273 AMDGPU_GEM_DOMAIN_VRAM,
1274 &adev->gfx.mec.hpd_eop_obj,
1275 &adev->gfx.mec.hpd_eop_gpu_addr,
1278 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1282 memset(hpd, 0, mec_hpd_size);
1284 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1285 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1290 static const u32 vgpr_init_compute_shader[] =
1292 0x7e000209, 0x7e020208,
1293 0x7e040207, 0x7e060206,
1294 0x7e080205, 0x7e0a0204,
1295 0x7e0c0203, 0x7e0e0202,
1296 0x7e100201, 0x7e120200,
1297 0x7e140209, 0x7e160208,
1298 0x7e180207, 0x7e1a0206,
1299 0x7e1c0205, 0x7e1e0204,
1300 0x7e200203, 0x7e220202,
1301 0x7e240201, 0x7e260200,
1302 0x7e280209, 0x7e2a0208,
1303 0x7e2c0207, 0x7e2e0206,
1304 0x7e300205, 0x7e320204,
1305 0x7e340203, 0x7e360202,
1306 0x7e380201, 0x7e3a0200,
1307 0x7e3c0209, 0x7e3e0208,
1308 0x7e400207, 0x7e420206,
1309 0x7e440205, 0x7e460204,
1310 0x7e480203, 0x7e4a0202,
1311 0x7e4c0201, 0x7e4e0200,
1312 0x7e500209, 0x7e520208,
1313 0x7e540207, 0x7e560206,
1314 0x7e580205, 0x7e5a0204,
1315 0x7e5c0203, 0x7e5e0202,
1316 0x7e600201, 0x7e620200,
1317 0x7e640209, 0x7e660208,
1318 0x7e680207, 0x7e6a0206,
1319 0x7e6c0205, 0x7e6e0204,
1320 0x7e700203, 0x7e720202,
1321 0x7e740201, 0x7e760200,
1322 0x7e780209, 0x7e7a0208,
1323 0x7e7c0207, 0x7e7e0206,
1324 0xbf8a0000, 0xbf810000,
1327 static const u32 sgpr_init_compute_shader[] =
1329 0xbe8a0100, 0xbe8c0102,
1330 0xbe8e0104, 0xbe900106,
1331 0xbe920108, 0xbe940100,
1332 0xbe960102, 0xbe980104,
1333 0xbe9a0106, 0xbe9c0108,
1334 0xbe9e0100, 0xbea00102,
1335 0xbea20104, 0xbea40106,
1336 0xbea60108, 0xbea80100,
1337 0xbeaa0102, 0xbeac0104,
1338 0xbeae0106, 0xbeb00108,
1339 0xbeb20100, 0xbeb40102,
1340 0xbeb60104, 0xbeb80106,
1341 0xbeba0108, 0xbebc0100,
1342 0xbebe0102, 0xbec00104,
1343 0xbec20106, 0xbec40108,
1344 0xbec60100, 0xbec80102,
1345 0xbee60004, 0xbee70005,
1346 0xbeea0006, 0xbeeb0007,
1347 0xbee80008, 0xbee90009,
1348 0xbefc0000, 0xbf8a0000,
1349 0xbf810000, 0x00000000,
1352 static const u32 vgpr_init_regs[] =
1354 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1355 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1356 mmCOMPUTE_NUM_THREAD_X, 256*4,
1357 mmCOMPUTE_NUM_THREAD_Y, 1,
1358 mmCOMPUTE_NUM_THREAD_Z, 1,
1359 mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1360 mmCOMPUTE_PGM_RSRC2, 20,
1361 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1362 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1363 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1364 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1365 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1366 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1367 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1368 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1369 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1370 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1373 static const u32 sgpr1_init_regs[] =
1375 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1376 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1377 mmCOMPUTE_NUM_THREAD_X, 256*5,
1378 mmCOMPUTE_NUM_THREAD_Y, 1,
1379 mmCOMPUTE_NUM_THREAD_Z, 1,
1380 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1381 mmCOMPUTE_PGM_RSRC2, 20,
1382 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1383 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1384 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1385 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1386 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1387 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1388 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1389 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1390 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1391 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1394 static const u32 sgpr2_init_regs[] =
1396 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1397 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1398 mmCOMPUTE_NUM_THREAD_X, 256*5,
1399 mmCOMPUTE_NUM_THREAD_Y, 1,
1400 mmCOMPUTE_NUM_THREAD_Z, 1,
1401 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1402 mmCOMPUTE_PGM_RSRC2, 20,
1403 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1404 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1405 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1406 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1407 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1408 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1409 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1410 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1411 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1412 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1415 static const u32 sec_ded_counter_registers[] =
1418 mmCPC_EDC_SCRATCH_CNT,
1419 mmCPC_EDC_UCODE_CNT,
1426 mmDC_EDC_CSINVOC_CNT,
1427 mmDC_EDC_RESTORE_CNT,
1433 mmSQC_ATC_EDC_GATCL1_CNT,
1439 mmTCP_ATC_EDC_GATCL1_CNT,
1444 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1446 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1447 struct amdgpu_ib ib;
1448 struct dma_fence *f = NULL;
1451 unsigned total_size, vgpr_offset, sgpr_offset;
1454 /* only supported on CZ */
1455 if (adev->asic_type != CHIP_CARRIZO)
1458 /* bail if the compute ring is not ready */
1459 if (!ring->sched.ready)
1462 tmp = RREG32(mmGB_EDC_MODE);
1463 WREG32(mmGB_EDC_MODE, 0);
1466 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1468 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1470 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1471 total_size = ALIGN(total_size, 256);
1472 vgpr_offset = total_size;
1473 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1474 sgpr_offset = total_size;
1475 total_size += sizeof(sgpr_init_compute_shader);
1477 /* allocate an indirect buffer to put the commands in */
1478 memset(&ib, 0, sizeof(ib));
1479 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1481 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1485 /* load the compute shaders */
1486 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1487 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1489 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1490 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1492 /* init the ib length to 0 */
1496 /* write the register state for the compute dispatch */
1497 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1498 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1499 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1500 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1502 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1503 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1504 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1505 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1506 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1507 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1509 /* write dispatch packet */
1510 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1511 ib.ptr[ib.length_dw++] = 8; /* x */
1512 ib.ptr[ib.length_dw++] = 1; /* y */
1513 ib.ptr[ib.length_dw++] = 1; /* z */
1514 ib.ptr[ib.length_dw++] =
1515 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1517 /* write CS partial flush packet */
1518 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1519 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1522 /* write the register state for the compute dispatch */
1523 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1524 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1525 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1526 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1528 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1529 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1530 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1531 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1532 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1533 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1535 /* write dispatch packet */
1536 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1537 ib.ptr[ib.length_dw++] = 8; /* x */
1538 ib.ptr[ib.length_dw++] = 1; /* y */
1539 ib.ptr[ib.length_dw++] = 1; /* z */
1540 ib.ptr[ib.length_dw++] =
1541 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1543 /* write CS partial flush packet */
1544 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1545 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1548 /* write the register state for the compute dispatch */
1549 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1550 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1551 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1552 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1554 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1555 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1556 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1557 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1558 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1559 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1561 /* write dispatch packet */
1562 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1563 ib.ptr[ib.length_dw++] = 8; /* x */
1564 ib.ptr[ib.length_dw++] = 1; /* y */
1565 ib.ptr[ib.length_dw++] = 1; /* z */
1566 ib.ptr[ib.length_dw++] =
1567 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1569 /* write CS partial flush packet */
1570 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1571 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1573 /* shedule the ib on the ring */
1574 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1576 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1580 /* wait for the GPU to finish processing the IB */
1581 r = dma_fence_wait(f, false);
1583 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1587 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1588 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1589 WREG32(mmGB_EDC_MODE, tmp);
1591 tmp = RREG32(mmCC_GC_EDC_CONFIG);
1592 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1593 WREG32(mmCC_GC_EDC_CONFIG, tmp);
1596 /* read back registers to clear the counters */
1597 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1598 RREG32(sec_ded_counter_registers[i]);
1601 amdgpu_ib_free(adev, &ib, NULL);
1607 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1611 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1615 switch (adev->asic_type) {
1617 adev->gfx.config.max_shader_engines = 1;
1618 adev->gfx.config.max_tile_pipes = 2;
1619 adev->gfx.config.max_cu_per_sh = 6;
1620 adev->gfx.config.max_sh_per_se = 1;
1621 adev->gfx.config.max_backends_per_se = 2;
1622 adev->gfx.config.max_texture_channel_caches = 2;
1623 adev->gfx.config.max_gprs = 256;
1624 adev->gfx.config.max_gs_threads = 32;
1625 adev->gfx.config.max_hw_contexts = 8;
1627 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1628 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1629 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1630 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1631 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1634 adev->gfx.config.max_shader_engines = 4;
1635 adev->gfx.config.max_tile_pipes = 16;
1636 adev->gfx.config.max_cu_per_sh = 16;
1637 adev->gfx.config.max_sh_per_se = 1;
1638 adev->gfx.config.max_backends_per_se = 4;
1639 adev->gfx.config.max_texture_channel_caches = 16;
1640 adev->gfx.config.max_gprs = 256;
1641 adev->gfx.config.max_gs_threads = 32;
1642 adev->gfx.config.max_hw_contexts = 8;
1644 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1645 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1646 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1647 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1648 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1650 case CHIP_POLARIS11:
1651 case CHIP_POLARIS12:
1652 ret = amdgpu_atombios_get_gfx_info(adev);
1655 adev->gfx.config.max_gprs = 256;
1656 adev->gfx.config.max_gs_threads = 32;
1657 adev->gfx.config.max_hw_contexts = 8;
1659 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1660 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1661 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1662 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1663 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1665 case CHIP_POLARIS10:
1667 ret = amdgpu_atombios_get_gfx_info(adev);
1670 adev->gfx.config.max_gprs = 256;
1671 adev->gfx.config.max_gs_threads = 32;
1672 adev->gfx.config.max_hw_contexts = 8;
1674 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1675 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1676 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1677 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1678 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1681 adev->gfx.config.max_shader_engines = 4;
1682 adev->gfx.config.max_tile_pipes = 8;
1683 adev->gfx.config.max_cu_per_sh = 8;
1684 adev->gfx.config.max_sh_per_se = 1;
1685 adev->gfx.config.max_backends_per_se = 2;
1686 adev->gfx.config.max_texture_channel_caches = 8;
1687 adev->gfx.config.max_gprs = 256;
1688 adev->gfx.config.max_gs_threads = 32;
1689 adev->gfx.config.max_hw_contexts = 8;
1691 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1692 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1693 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1694 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1695 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1698 adev->gfx.config.max_shader_engines = 1;
1699 adev->gfx.config.max_tile_pipes = 2;
1700 adev->gfx.config.max_sh_per_se = 1;
1701 adev->gfx.config.max_backends_per_se = 2;
1702 adev->gfx.config.max_cu_per_sh = 8;
1703 adev->gfx.config.max_texture_channel_caches = 2;
1704 adev->gfx.config.max_gprs = 256;
1705 adev->gfx.config.max_gs_threads = 32;
1706 adev->gfx.config.max_hw_contexts = 8;
1708 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1709 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1710 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1711 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1712 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1715 adev->gfx.config.max_shader_engines = 1;
1716 adev->gfx.config.max_tile_pipes = 2;
1717 adev->gfx.config.max_sh_per_se = 1;
1718 adev->gfx.config.max_backends_per_se = 1;
1719 adev->gfx.config.max_cu_per_sh = 3;
1720 adev->gfx.config.max_texture_channel_caches = 2;
1721 adev->gfx.config.max_gprs = 256;
1722 adev->gfx.config.max_gs_threads = 16;
1723 adev->gfx.config.max_hw_contexts = 8;
1725 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1726 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1727 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1728 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1729 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1732 adev->gfx.config.max_shader_engines = 2;
1733 adev->gfx.config.max_tile_pipes = 4;
1734 adev->gfx.config.max_cu_per_sh = 2;
1735 adev->gfx.config.max_sh_per_se = 1;
1736 adev->gfx.config.max_backends_per_se = 2;
1737 adev->gfx.config.max_texture_channel_caches = 4;
1738 adev->gfx.config.max_gprs = 256;
1739 adev->gfx.config.max_gs_threads = 32;
1740 adev->gfx.config.max_hw_contexts = 8;
1742 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1743 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1744 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1745 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1746 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1750 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1751 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1753 adev->gfx.config.num_banks = REG_GET_FIELD(mc_arb_ramcfg,
1754 MC_ARB_RAMCFG, NOOFBANK);
1755 adev->gfx.config.num_ranks = REG_GET_FIELD(mc_arb_ramcfg,
1756 MC_ARB_RAMCFG, NOOFRANKS);
1758 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1759 adev->gfx.config.mem_max_burst_length_bytes = 256;
1760 if (adev->flags & AMD_IS_APU) {
1761 /* Get memory bank mapping mode. */
1762 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1763 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1764 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1766 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1767 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1768 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1770 /* Validate settings in case only one DIMM installed. */
1771 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1772 dimm00_addr_map = 0;
1773 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1774 dimm01_addr_map = 0;
1775 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1776 dimm10_addr_map = 0;
1777 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1778 dimm11_addr_map = 0;
1780 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1781 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1782 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1783 adev->gfx.config.mem_row_size_in_kb = 2;
1785 adev->gfx.config.mem_row_size_in_kb = 1;
1787 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1788 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1789 if (adev->gfx.config.mem_row_size_in_kb > 4)
1790 adev->gfx.config.mem_row_size_in_kb = 4;
1793 adev->gfx.config.shader_engine_tile_size = 32;
1794 adev->gfx.config.num_gpus = 1;
1795 adev->gfx.config.multi_gpu_tile_size = 64;
1797 /* fix up row size */
1798 switch (adev->gfx.config.mem_row_size_in_kb) {
1801 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1804 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1807 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1810 adev->gfx.config.gb_addr_config = gb_addr_config;
1815 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1816 int mec, int pipe, int queue)
1820 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1822 ring = &adev->gfx.compute_ring[ring_id];
1827 ring->queue = queue;
1829 ring->ring_obj = NULL;
1830 ring->use_doorbell = true;
1831 ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
1832 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1833 + (ring_id * GFX8_MEC_HPD_SIZE);
1834 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1836 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1837 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1840 /* type-2 packets are deprecated on MEC, use type-3 instead */
1841 r = amdgpu_ring_init(adev, ring, 1024,
1842 &adev->gfx.eop_irq, irq_type);
1850 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
1852 static int gfx_v8_0_sw_init(void *handle)
1854 int i, j, k, r, ring_id;
1855 struct amdgpu_ring *ring;
1856 struct amdgpu_kiq *kiq;
1857 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1859 switch (adev->asic_type) {
1863 case CHIP_POLARIS10:
1864 case CHIP_POLARIS11:
1865 case CHIP_POLARIS12:
1867 adev->gfx.mec.num_mec = 2;
1872 adev->gfx.mec.num_mec = 1;
1876 adev->gfx.mec.num_pipe_per_mec = 4;
1877 adev->gfx.mec.num_queue_per_pipe = 8;
1880 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
1884 /* Privileged reg */
1885 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
1886 &adev->gfx.priv_reg_irq);
1890 /* Privileged inst */
1891 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
1892 &adev->gfx.priv_inst_irq);
1896 /* Add CP EDC/ECC irq */
1897 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
1898 &adev->gfx.cp_ecc_error_irq);
1902 /* SQ interrupts. */
1903 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
1906 DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
1910 INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
1912 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1914 gfx_v8_0_scratch_init(adev);
1916 r = gfx_v8_0_init_microcode(adev);
1918 DRM_ERROR("Failed to load gfx firmware!\n");
1922 r = adev->gfx.rlc.funcs->init(adev);
1924 DRM_ERROR("Failed to init rlc BOs!\n");
1928 r = gfx_v8_0_mec_init(adev);
1930 DRM_ERROR("Failed to init MEC BOs!\n");
1934 /* set up the gfx ring */
1935 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1936 ring = &adev->gfx.gfx_ring[i];
1937 ring->ring_obj = NULL;
1938 sprintf(ring->name, "gfx");
1939 /* no gfx doorbells on iceland */
1940 if (adev->asic_type != CHIP_TOPAZ) {
1941 ring->use_doorbell = true;
1942 ring->doorbell_index = adev->doorbell_index.gfx_ring0;
1945 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
1946 AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
1952 /* set up the compute queues - allocate horizontally across pipes */
1954 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1955 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1956 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1957 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
1960 r = gfx_v8_0_compute_ring_init(adev,
1971 r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
1973 DRM_ERROR("Failed to init KIQ BOs!\n");
1977 kiq = &adev->gfx.kiq;
1978 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
1982 /* create MQD for all compute queues as well as KIQ for SRIOV case */
1983 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
1987 adev->gfx.ce_ram_size = 0x8000;
1989 r = gfx_v8_0_gpu_early_init(adev);
1996 static int gfx_v8_0_sw_fini(void *handle)
1998 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2001 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2002 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2003 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2004 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2006 amdgpu_gfx_mqd_sw_fini(adev);
2007 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2008 amdgpu_gfx_kiq_fini(adev);
2010 gfx_v8_0_mec_fini(adev);
2011 amdgpu_gfx_rlc_fini(adev);
2012 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2013 &adev->gfx.rlc.clear_state_gpu_addr,
2014 (void **)&adev->gfx.rlc.cs_ptr);
2015 if ((adev->asic_type == CHIP_CARRIZO) ||
2016 (adev->asic_type == CHIP_STONEY)) {
2017 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2018 &adev->gfx.rlc.cp_table_gpu_addr,
2019 (void **)&adev->gfx.rlc.cp_table_ptr);
2021 gfx_v8_0_free_microcode(adev);
2026 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2028 uint32_t *modearray, *mod2array;
2029 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2030 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2033 modearray = adev->gfx.config.tile_mode_array;
2034 mod2array = adev->gfx.config.macrotile_mode_array;
2036 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2037 modearray[reg_offset] = 0;
2039 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2040 mod2array[reg_offset] = 0;
2042 switch (adev->asic_type) {
2044 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2045 PIPE_CONFIG(ADDR_SURF_P2) |
2046 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2047 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2048 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2049 PIPE_CONFIG(ADDR_SURF_P2) |
2050 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2051 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2052 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2053 PIPE_CONFIG(ADDR_SURF_P2) |
2054 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2055 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2056 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2057 PIPE_CONFIG(ADDR_SURF_P2) |
2058 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2059 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2060 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2061 PIPE_CONFIG(ADDR_SURF_P2) |
2062 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2063 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2064 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2065 PIPE_CONFIG(ADDR_SURF_P2) |
2066 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2067 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2068 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2069 PIPE_CONFIG(ADDR_SURF_P2) |
2070 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2071 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2072 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2073 PIPE_CONFIG(ADDR_SURF_P2));
2074 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2075 PIPE_CONFIG(ADDR_SURF_P2) |
2076 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2077 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2078 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2079 PIPE_CONFIG(ADDR_SURF_P2) |
2080 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2081 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2082 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2083 PIPE_CONFIG(ADDR_SURF_P2) |
2084 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2085 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2086 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2087 PIPE_CONFIG(ADDR_SURF_P2) |
2088 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2089 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2090 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2091 PIPE_CONFIG(ADDR_SURF_P2) |
2092 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2093 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2094 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2095 PIPE_CONFIG(ADDR_SURF_P2) |
2096 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2097 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2098 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2099 PIPE_CONFIG(ADDR_SURF_P2) |
2100 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2101 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2102 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2103 PIPE_CONFIG(ADDR_SURF_P2) |
2104 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2105 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2106 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2107 PIPE_CONFIG(ADDR_SURF_P2) |
2108 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2109 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2110 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2111 PIPE_CONFIG(ADDR_SURF_P2) |
2112 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2113 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2114 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2115 PIPE_CONFIG(ADDR_SURF_P2) |
2116 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2117 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2118 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2119 PIPE_CONFIG(ADDR_SURF_P2) |
2120 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2121 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2122 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2123 PIPE_CONFIG(ADDR_SURF_P2) |
2124 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2125 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2126 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2127 PIPE_CONFIG(ADDR_SURF_P2) |
2128 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2129 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2130 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2131 PIPE_CONFIG(ADDR_SURF_P2) |
2132 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2133 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2134 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2135 PIPE_CONFIG(ADDR_SURF_P2) |
2136 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2137 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2138 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2139 PIPE_CONFIG(ADDR_SURF_P2) |
2140 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2141 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2142 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2143 PIPE_CONFIG(ADDR_SURF_P2) |
2144 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2145 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2147 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2148 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2149 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2150 NUM_BANKS(ADDR_SURF_8_BANK));
2151 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2152 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2153 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2154 NUM_BANKS(ADDR_SURF_8_BANK));
2155 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2156 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2157 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2158 NUM_BANKS(ADDR_SURF_8_BANK));
2159 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2160 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2161 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2162 NUM_BANKS(ADDR_SURF_8_BANK));
2163 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2164 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2165 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2166 NUM_BANKS(ADDR_SURF_8_BANK));
2167 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2168 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2169 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2170 NUM_BANKS(ADDR_SURF_8_BANK));
2171 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2172 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2173 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2174 NUM_BANKS(ADDR_SURF_8_BANK));
2175 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2176 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2177 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2178 NUM_BANKS(ADDR_SURF_16_BANK));
2179 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2180 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2181 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2182 NUM_BANKS(ADDR_SURF_16_BANK));
2183 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2184 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2185 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2186 NUM_BANKS(ADDR_SURF_16_BANK));
2187 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2188 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2189 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2190 NUM_BANKS(ADDR_SURF_16_BANK));
2191 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2192 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2193 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2194 NUM_BANKS(ADDR_SURF_16_BANK));
2195 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2196 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2197 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2198 NUM_BANKS(ADDR_SURF_16_BANK));
2199 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2200 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2201 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2202 NUM_BANKS(ADDR_SURF_8_BANK));
2204 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2205 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2207 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2209 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2210 if (reg_offset != 7)
2211 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2216 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2217 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2218 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2219 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2220 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2221 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2222 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2223 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2224 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2225 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2226 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2227 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2228 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2229 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2230 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2231 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2232 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2233 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2234 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2235 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2236 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2237 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2238 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2239 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2240 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2241 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2242 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2243 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2244 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2245 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2246 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2247 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2248 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2249 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2250 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2251 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2252 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2253 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2254 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2255 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2256 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2257 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2258 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2259 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2260 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2261 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2262 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2263 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2264 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2265 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2266 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2267 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2268 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2269 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2270 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2271 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2272 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2273 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2274 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2275 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2276 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2277 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2278 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2279 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2280 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2281 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2282 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2283 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2284 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2285 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2286 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2287 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2288 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2289 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2290 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2291 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2292 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2293 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2294 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2295 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2296 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2297 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2298 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2299 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2300 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2301 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2302 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2303 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2304 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2305 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2306 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2307 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2308 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2309 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2310 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2311 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2312 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2313 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2314 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2315 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2316 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2317 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2318 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2319 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2320 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2321 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2322 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2323 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2324 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2325 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2326 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2327 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2328 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2329 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2330 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2331 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2332 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2333 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2334 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2335 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2336 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2337 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2339 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2340 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2341 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2342 NUM_BANKS(ADDR_SURF_8_BANK));
2343 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2344 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2345 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2346 NUM_BANKS(ADDR_SURF_8_BANK));
2347 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2348 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2349 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2350 NUM_BANKS(ADDR_SURF_8_BANK));
2351 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2352 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2353 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2354 NUM_BANKS(ADDR_SURF_8_BANK));
2355 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2356 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2357 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2358 NUM_BANKS(ADDR_SURF_8_BANK));
2359 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2360 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2361 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2362 NUM_BANKS(ADDR_SURF_8_BANK));
2363 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2364 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2365 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2366 NUM_BANKS(ADDR_SURF_8_BANK));
2367 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2368 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2369 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2370 NUM_BANKS(ADDR_SURF_8_BANK));
2371 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2372 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2373 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2374 NUM_BANKS(ADDR_SURF_8_BANK));
2375 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2376 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2377 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2378 NUM_BANKS(ADDR_SURF_8_BANK));
2379 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2380 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2381 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2382 NUM_BANKS(ADDR_SURF_8_BANK));
2383 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2384 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2385 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2386 NUM_BANKS(ADDR_SURF_8_BANK));
2387 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2388 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2389 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2390 NUM_BANKS(ADDR_SURF_8_BANK));
2391 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2392 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2393 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2394 NUM_BANKS(ADDR_SURF_4_BANK));
2396 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2397 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2399 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2400 if (reg_offset != 7)
2401 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2405 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2406 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2407 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2408 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2409 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2410 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2411 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2412 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2413 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2414 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2415 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2416 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2417 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2418 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2419 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2420 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2421 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2422 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2423 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2424 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2425 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2426 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2427 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2428 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2429 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2430 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2431 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2432 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2433 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2434 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2435 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2436 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2437 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2438 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2439 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2440 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2441 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2442 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2443 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2444 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2445 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2446 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2447 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2448 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2449 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2450 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2451 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2452 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2453 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2454 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2455 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2456 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2457 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2458 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2459 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2460 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2461 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2462 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2463 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2464 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2465 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2466 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2467 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2468 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2469 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2470 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2471 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2472 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2473 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2474 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2475 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2476 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2477 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2478 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2479 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2480 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2481 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2482 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2483 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2484 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2485 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2486 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2487 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2488 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2489 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2490 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2491 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2492 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2493 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2494 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2495 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2496 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2497 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2498 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2499 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2500 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2501 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2502 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2503 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2504 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2505 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2506 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2507 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2508 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2509 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2510 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2511 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2512 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2513 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2514 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2515 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2516 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2517 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2518 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2519 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2520 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2521 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2522 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2523 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2524 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2525 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2526 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2528 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2529 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2530 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2531 NUM_BANKS(ADDR_SURF_16_BANK));
2532 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2533 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2534 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2535 NUM_BANKS(ADDR_SURF_16_BANK));
2536 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2537 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2538 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2539 NUM_BANKS(ADDR_SURF_16_BANK));
2540 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2541 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2542 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2543 NUM_BANKS(ADDR_SURF_16_BANK));
2544 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2545 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2546 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2547 NUM_BANKS(ADDR_SURF_16_BANK));
2548 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2549 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2550 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2551 NUM_BANKS(ADDR_SURF_16_BANK));
2552 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2553 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2554 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2555 NUM_BANKS(ADDR_SURF_16_BANK));
2556 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2557 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2558 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2559 NUM_BANKS(ADDR_SURF_16_BANK));
2560 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2561 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2562 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2563 NUM_BANKS(ADDR_SURF_16_BANK));
2564 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2565 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2566 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2567 NUM_BANKS(ADDR_SURF_16_BANK));
2568 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2569 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2570 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2571 NUM_BANKS(ADDR_SURF_16_BANK));
2572 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2573 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2574 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2575 NUM_BANKS(ADDR_SURF_8_BANK));
2576 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2577 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2578 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2579 NUM_BANKS(ADDR_SURF_4_BANK));
2580 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2581 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2582 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2583 NUM_BANKS(ADDR_SURF_4_BANK));
2585 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2586 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2588 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2589 if (reg_offset != 7)
2590 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2593 case CHIP_POLARIS11:
2594 case CHIP_POLARIS12:
2595 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2596 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2597 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2598 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2599 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2600 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2601 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2602 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2603 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2604 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2605 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2606 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2607 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2608 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2609 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2610 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2611 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2612 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2613 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2614 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2615 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2616 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2617 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2618 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2619 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2620 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2621 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2622 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2623 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2624 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2625 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2626 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2627 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2628 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2629 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2630 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2631 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2632 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2633 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2634 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2635 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2636 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2637 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2638 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2639 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2640 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2641 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2642 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2643 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2644 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2645 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2646 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2647 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2648 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2649 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2650 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2651 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2652 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2653 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2654 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2655 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2656 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2657 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2658 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2659 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2660 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2661 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2662 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2663 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2664 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2665 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2666 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2667 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2668 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2669 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2670 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2671 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2672 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2673 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2674 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2675 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2676 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2677 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2678 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2680 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2681 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2682 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2684 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2685 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2686 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2688 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2689 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2690 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2691 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2692 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2693 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2694 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2695 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2696 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2697 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2698 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2699 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2700 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2701 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2702 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2703 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2704 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2705 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2706 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2708 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2709 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2710 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2711 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2712 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2713 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2714 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2715 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2716 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2718 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2719 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2720 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2721 NUM_BANKS(ADDR_SURF_16_BANK));
2723 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2724 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2725 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2726 NUM_BANKS(ADDR_SURF_16_BANK));
2728 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2729 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2730 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2731 NUM_BANKS(ADDR_SURF_16_BANK));
2733 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2734 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2735 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2736 NUM_BANKS(ADDR_SURF_16_BANK));
2738 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2739 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2740 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2741 NUM_BANKS(ADDR_SURF_16_BANK));
2743 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2744 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2745 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2746 NUM_BANKS(ADDR_SURF_16_BANK));
2748 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2749 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2750 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2751 NUM_BANKS(ADDR_SURF_16_BANK));
2753 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2754 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2755 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2756 NUM_BANKS(ADDR_SURF_16_BANK));
2758 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2759 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2760 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2761 NUM_BANKS(ADDR_SURF_16_BANK));
2763 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2764 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2765 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2766 NUM_BANKS(ADDR_SURF_16_BANK));
2768 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2769 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2770 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2771 NUM_BANKS(ADDR_SURF_16_BANK));
2773 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2774 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2775 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2776 NUM_BANKS(ADDR_SURF_16_BANK));
2778 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2779 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2780 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2781 NUM_BANKS(ADDR_SURF_8_BANK));
2783 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2784 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2785 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2786 NUM_BANKS(ADDR_SURF_4_BANK));
2788 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2789 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2791 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2792 if (reg_offset != 7)
2793 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2796 case CHIP_POLARIS10:
2797 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2798 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2799 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2800 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2801 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2802 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2803 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2804 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2805 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2806 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2807 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2808 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2809 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2810 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2811 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2812 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2813 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2814 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2815 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2816 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2817 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2818 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2819 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2820 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2821 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2822 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2823 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2824 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2825 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2826 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2827 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2828 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2829 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2830 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2831 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2832 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2833 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2834 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2835 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2836 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2837 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2838 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2839 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2840 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2841 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2842 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2843 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2844 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2845 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2846 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2847 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2848 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2849 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2850 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2851 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2852 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2853 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2854 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2855 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2856 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2857 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2858 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2859 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2860 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2861 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2862 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2863 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2864 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2865 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2866 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2867 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2868 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2869 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2870 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2871 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2872 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2873 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2874 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2875 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2876 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2877 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2878 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2879 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2880 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2881 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2882 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2883 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2884 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2885 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2886 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2887 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2888 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2889 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2890 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2891 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2892 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2893 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2894 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2895 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2896 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2897 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2898 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2899 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2900 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2901 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2902 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2903 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2904 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2905 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2906 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2907 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2908 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2909 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2910 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2911 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2912 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2913 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2914 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2915 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2916 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2917 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2918 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2920 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2921 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2922 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2923 NUM_BANKS(ADDR_SURF_16_BANK));
2925 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2926 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2927 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2928 NUM_BANKS(ADDR_SURF_16_BANK));
2930 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2931 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2932 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2933 NUM_BANKS(ADDR_SURF_16_BANK));
2935 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2936 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2937 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2938 NUM_BANKS(ADDR_SURF_16_BANK));
2940 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2941 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2942 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2943 NUM_BANKS(ADDR_SURF_16_BANK));
2945 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2946 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2947 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2948 NUM_BANKS(ADDR_SURF_16_BANK));
2950 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2951 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2952 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2953 NUM_BANKS(ADDR_SURF_16_BANK));
2955 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2956 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2957 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2958 NUM_BANKS(ADDR_SURF_16_BANK));
2960 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2961 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2962 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2963 NUM_BANKS(ADDR_SURF_16_BANK));
2965 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2966 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2967 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2968 NUM_BANKS(ADDR_SURF_16_BANK));
2970 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2971 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2972 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2973 NUM_BANKS(ADDR_SURF_16_BANK));
2975 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2976 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2977 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2978 NUM_BANKS(ADDR_SURF_8_BANK));
2980 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2981 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2982 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2983 NUM_BANKS(ADDR_SURF_4_BANK));
2985 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2986 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2987 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2988 NUM_BANKS(ADDR_SURF_4_BANK));
2990 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2991 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2993 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2994 if (reg_offset != 7)
2995 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2999 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3000 PIPE_CONFIG(ADDR_SURF_P2) |
3001 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3002 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3003 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3004 PIPE_CONFIG(ADDR_SURF_P2) |
3005 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3006 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3007 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3008 PIPE_CONFIG(ADDR_SURF_P2) |
3009 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3010 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3011 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3012 PIPE_CONFIG(ADDR_SURF_P2) |
3013 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3014 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3015 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3016 PIPE_CONFIG(ADDR_SURF_P2) |
3017 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3018 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3019 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3020 PIPE_CONFIG(ADDR_SURF_P2) |
3021 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3022 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3023 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3024 PIPE_CONFIG(ADDR_SURF_P2) |
3025 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3026 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3027 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3028 PIPE_CONFIG(ADDR_SURF_P2));
3029 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3030 PIPE_CONFIG(ADDR_SURF_P2) |
3031 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3032 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3033 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3034 PIPE_CONFIG(ADDR_SURF_P2) |
3035 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3036 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3037 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3038 PIPE_CONFIG(ADDR_SURF_P2) |
3039 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3040 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3041 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3042 PIPE_CONFIG(ADDR_SURF_P2) |
3043 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3044 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3045 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3046 PIPE_CONFIG(ADDR_SURF_P2) |
3047 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3048 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3049 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3050 PIPE_CONFIG(ADDR_SURF_P2) |
3051 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3052 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3053 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3054 PIPE_CONFIG(ADDR_SURF_P2) |
3055 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3056 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3057 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3058 PIPE_CONFIG(ADDR_SURF_P2) |
3059 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3060 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3061 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3062 PIPE_CONFIG(ADDR_SURF_P2) |
3063 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3064 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3065 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3066 PIPE_CONFIG(ADDR_SURF_P2) |
3067 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3068 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3069 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3070 PIPE_CONFIG(ADDR_SURF_P2) |
3071 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3072 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3073 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3074 PIPE_CONFIG(ADDR_SURF_P2) |
3075 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3076 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3077 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3078 PIPE_CONFIG(ADDR_SURF_P2) |
3079 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3080 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3081 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3082 PIPE_CONFIG(ADDR_SURF_P2) |
3083 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3084 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3085 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3086 PIPE_CONFIG(ADDR_SURF_P2) |
3087 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3088 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3089 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3090 PIPE_CONFIG(ADDR_SURF_P2) |
3091 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3092 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3093 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3094 PIPE_CONFIG(ADDR_SURF_P2) |
3095 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3096 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3097 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3098 PIPE_CONFIG(ADDR_SURF_P2) |
3099 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3100 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3102 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3103 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3104 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3105 NUM_BANKS(ADDR_SURF_8_BANK));
3106 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3107 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3108 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3109 NUM_BANKS(ADDR_SURF_8_BANK));
3110 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3111 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3112 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3113 NUM_BANKS(ADDR_SURF_8_BANK));
3114 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3115 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3116 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3117 NUM_BANKS(ADDR_SURF_8_BANK));
3118 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3119 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3120 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3121 NUM_BANKS(ADDR_SURF_8_BANK));
3122 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3123 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3124 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3125 NUM_BANKS(ADDR_SURF_8_BANK));
3126 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3127 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3128 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3129 NUM_BANKS(ADDR_SURF_8_BANK));
3130 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3131 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3132 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3133 NUM_BANKS(ADDR_SURF_16_BANK));
3134 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3135 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3136 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3137 NUM_BANKS(ADDR_SURF_16_BANK));
3138 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3139 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3140 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3141 NUM_BANKS(ADDR_SURF_16_BANK));
3142 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3143 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3144 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3145 NUM_BANKS(ADDR_SURF_16_BANK));
3146 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3147 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3148 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3149 NUM_BANKS(ADDR_SURF_16_BANK));
3150 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3151 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3152 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3153 NUM_BANKS(ADDR_SURF_16_BANK));
3154 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3155 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3156 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3157 NUM_BANKS(ADDR_SURF_8_BANK));
3159 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3160 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3162 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3164 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3165 if (reg_offset != 7)
3166 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3171 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3176 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3177 PIPE_CONFIG(ADDR_SURF_P2) |
3178 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3179 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3180 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3181 PIPE_CONFIG(ADDR_SURF_P2) |
3182 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3183 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3184 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3185 PIPE_CONFIG(ADDR_SURF_P2) |
3186 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3187 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3188 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3189 PIPE_CONFIG(ADDR_SURF_P2) |
3190 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3191 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3192 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3193 PIPE_CONFIG(ADDR_SURF_P2) |
3194 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3195 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3196 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3197 PIPE_CONFIG(ADDR_SURF_P2) |
3198 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3199 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3200 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3201 PIPE_CONFIG(ADDR_SURF_P2) |
3202 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3203 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3204 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3205 PIPE_CONFIG(ADDR_SURF_P2));
3206 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3207 PIPE_CONFIG(ADDR_SURF_P2) |
3208 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3209 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3210 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3211 PIPE_CONFIG(ADDR_SURF_P2) |
3212 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3213 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3214 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3215 PIPE_CONFIG(ADDR_SURF_P2) |
3216 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3217 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3218 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3219 PIPE_CONFIG(ADDR_SURF_P2) |
3220 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3221 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3222 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3223 PIPE_CONFIG(ADDR_SURF_P2) |
3224 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3225 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3226 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3227 PIPE_CONFIG(ADDR_SURF_P2) |
3228 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3229 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3230 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3231 PIPE_CONFIG(ADDR_SURF_P2) |
3232 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3233 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3234 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3235 PIPE_CONFIG(ADDR_SURF_P2) |
3236 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3237 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3238 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3239 PIPE_CONFIG(ADDR_SURF_P2) |
3240 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3241 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3242 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3243 PIPE_CONFIG(ADDR_SURF_P2) |
3244 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3245 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3246 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3247 PIPE_CONFIG(ADDR_SURF_P2) |
3248 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3249 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3250 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3251 PIPE_CONFIG(ADDR_SURF_P2) |
3252 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3253 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3254 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3255 PIPE_CONFIG(ADDR_SURF_P2) |
3256 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3257 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3258 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3259 PIPE_CONFIG(ADDR_SURF_P2) |
3260 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3261 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3262 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3263 PIPE_CONFIG(ADDR_SURF_P2) |
3264 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3265 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3266 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3267 PIPE_CONFIG(ADDR_SURF_P2) |
3268 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3269 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3270 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3271 PIPE_CONFIG(ADDR_SURF_P2) |
3272 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3273 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3274 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3275 PIPE_CONFIG(ADDR_SURF_P2) |
3276 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3277 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3279 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3280 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3281 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3282 NUM_BANKS(ADDR_SURF_8_BANK));
3283 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3284 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3285 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3286 NUM_BANKS(ADDR_SURF_8_BANK));
3287 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3288 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3289 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3290 NUM_BANKS(ADDR_SURF_8_BANK));
3291 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3292 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3293 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3294 NUM_BANKS(ADDR_SURF_8_BANK));
3295 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3296 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3297 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3298 NUM_BANKS(ADDR_SURF_8_BANK));
3299 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3300 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3301 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3302 NUM_BANKS(ADDR_SURF_8_BANK));
3303 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3304 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3305 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3306 NUM_BANKS(ADDR_SURF_8_BANK));
3307 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3308 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3309 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3310 NUM_BANKS(ADDR_SURF_16_BANK));
3311 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3312 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3313 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3314 NUM_BANKS(ADDR_SURF_16_BANK));
3315 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3316 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3317 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3318 NUM_BANKS(ADDR_SURF_16_BANK));
3319 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3320 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3321 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3322 NUM_BANKS(ADDR_SURF_16_BANK));
3323 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3324 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3325 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3326 NUM_BANKS(ADDR_SURF_16_BANK));
3327 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3328 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3329 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3330 NUM_BANKS(ADDR_SURF_16_BANK));
3331 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3332 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3333 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3334 NUM_BANKS(ADDR_SURF_8_BANK));
3336 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3337 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3339 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3341 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3342 if (reg_offset != 7)
3343 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3349 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3350 u32 se_num, u32 sh_num, u32 instance)
3354 if (instance == 0xffffffff)
3355 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3357 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3359 if (se_num == 0xffffffff)
3360 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3362 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3364 if (sh_num == 0xffffffff)
3365 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3367 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3369 WREG32(mmGRBM_GFX_INDEX, data);
3372 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3373 u32 me, u32 pipe, u32 q, u32 vm)
3375 vi_srbm_select(adev, me, pipe, q, vm);
3378 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3382 data = RREG32(mmCC_RB_BACKEND_DISABLE) |
3383 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3385 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3387 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3388 adev->gfx.config.max_sh_per_se);
3390 return (~data) & mask;
3394 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3396 switch (adev->asic_type) {
3399 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3400 RB_XSEL2(1) | PKR_MAP(2) |
3401 PKR_XSEL(1) | PKR_YSEL(1) |
3402 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3403 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3407 case CHIP_POLARIS10:
3408 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3409 SE_XSEL(1) | SE_YSEL(1);
3410 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3415 *rconf |= RB_MAP_PKR0(2);
3418 case CHIP_POLARIS11:
3419 case CHIP_POLARIS12:
3420 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3421 SE_XSEL(1) | SE_YSEL(1);
3429 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3435 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3436 u32 raster_config, u32 raster_config_1,
3437 unsigned rb_mask, unsigned num_rb)
3439 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3440 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3441 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3442 unsigned rb_per_se = num_rb / num_se;
3443 unsigned se_mask[4];
3446 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3447 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3448 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3449 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3451 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3452 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3453 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3455 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3456 (!se_mask[2] && !se_mask[3]))) {
3457 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3459 if (!se_mask[0] && !se_mask[1]) {
3461 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3464 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3468 for (se = 0; se < num_se; se++) {
3469 unsigned raster_config_se = raster_config;
3470 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3471 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3472 int idx = (se / 2) * 2;
3474 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3475 raster_config_se &= ~SE_MAP_MASK;
3477 if (!se_mask[idx]) {
3478 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3480 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3484 pkr0_mask &= rb_mask;
3485 pkr1_mask &= rb_mask;
3486 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3487 raster_config_se &= ~PKR_MAP_MASK;
3490 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3492 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3496 if (rb_per_se >= 2) {
3497 unsigned rb0_mask = 1 << (se * rb_per_se);
3498 unsigned rb1_mask = rb0_mask << 1;
3500 rb0_mask &= rb_mask;
3501 rb1_mask &= rb_mask;
3502 if (!rb0_mask || !rb1_mask) {
3503 raster_config_se &= ~RB_MAP_PKR0_MASK;
3507 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3510 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3514 if (rb_per_se > 2) {
3515 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3516 rb1_mask = rb0_mask << 1;
3517 rb0_mask &= rb_mask;
3518 rb1_mask &= rb_mask;
3519 if (!rb0_mask || !rb1_mask) {
3520 raster_config_se &= ~RB_MAP_PKR1_MASK;
3524 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3527 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3533 /* GRBM_GFX_INDEX has a different offset on VI */
3534 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3535 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3536 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3539 /* GRBM_GFX_INDEX has a different offset on VI */
3540 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3543 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3547 u32 raster_config = 0, raster_config_1 = 0;
3549 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3550 adev->gfx.config.max_sh_per_se;
3551 unsigned num_rb_pipes;
3553 mutex_lock(&adev->grbm_idx_mutex);
3554 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3555 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3556 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3557 data = gfx_v8_0_get_rb_active_bitmap(adev);
3558 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3559 rb_bitmap_width_per_sh);
3562 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3564 adev->gfx.config.backend_enable_mask = active_rbs;
3565 adev->gfx.config.num_rbs = hweight32(active_rbs);
3567 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3568 adev->gfx.config.max_shader_engines, 16);
3570 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3572 if (!adev->gfx.config.backend_enable_mask ||
3573 adev->gfx.config.num_rbs >= num_rb_pipes) {
3574 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3575 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3577 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3578 adev->gfx.config.backend_enable_mask,
3582 /* cache the values for userspace */
3583 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3584 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3585 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3586 adev->gfx.config.rb_config[i][j].rb_backend_disable =
3587 RREG32(mmCC_RB_BACKEND_DISABLE);
3588 adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3589 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3590 adev->gfx.config.rb_config[i][j].raster_config =
3591 RREG32(mmPA_SC_RASTER_CONFIG);
3592 adev->gfx.config.rb_config[i][j].raster_config_1 =
3593 RREG32(mmPA_SC_RASTER_CONFIG_1);
3596 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3597 mutex_unlock(&adev->grbm_idx_mutex);
3601 * gfx_v8_0_init_compute_vmid - gart enable
3603 * @adev: amdgpu_device pointer
3605 * Initialize compute vmid sh_mem registers
3608 #define DEFAULT_SH_MEM_BASES (0x6000)
3609 #define FIRST_COMPUTE_VMID (8)
3610 #define LAST_COMPUTE_VMID (16)
3611 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3614 uint32_t sh_mem_config;
3615 uint32_t sh_mem_bases;
3618 * Configure apertures:
3619 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
3620 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
3621 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
3623 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3625 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3626 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3627 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3628 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3629 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3630 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3632 mutex_lock(&adev->srbm_mutex);
3633 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3634 vi_srbm_select(adev, 0, 0, 0, i);
3635 /* CP and shaders */
3636 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3637 WREG32(mmSH_MEM_APE1_BASE, 1);
3638 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3639 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3641 vi_srbm_select(adev, 0, 0, 0, 0);
3642 mutex_unlock(&adev->srbm_mutex);
3644 /* Initialize all compute VMIDs to have no GDS, GWS, or OA
3645 acccess. These should be enabled by FW for target VMIDs. */
3646 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3647 WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
3648 WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
3649 WREG32(amdgpu_gds_reg_offset[i].gws, 0);
3650 WREG32(amdgpu_gds_reg_offset[i].oa, 0);
3654 static void gfx_v8_0_init_gds_vmid(struct amdgpu_device *adev)
3659 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
3660 * access. Compute VMIDs should be enabled by FW for target VMIDs,
3661 * the driver can enable them for graphics. VMID0 should maintain
3662 * access so that HWS firmware can save/restore entries.
3664 for (vmid = 1; vmid < 16; vmid++) {
3665 WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0);
3666 WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0);
3667 WREG32(amdgpu_gds_reg_offset[vmid].gws, 0);
3668 WREG32(amdgpu_gds_reg_offset[vmid].oa, 0);
3672 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3674 switch (adev->asic_type) {
3676 adev->gfx.config.double_offchip_lds_buf = 1;
3680 adev->gfx.config.double_offchip_lds_buf = 0;
3685 static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
3687 u32 tmp, sh_static_mem_cfg;
3690 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3691 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3692 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3693 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3695 gfx_v8_0_tiling_mode_table_init(adev);
3696 gfx_v8_0_setup_rb(adev);
3697 gfx_v8_0_get_cu_info(adev);
3698 gfx_v8_0_config_init(adev);
3700 /* XXX SH_MEM regs */
3701 /* where to put LDS, scratch, GPUVM in FSA64 space */
3702 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3704 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3706 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3708 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3710 mutex_lock(&adev->srbm_mutex);
3711 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3712 vi_srbm_select(adev, 0, 0, 0, i);
3713 /* CP and shaders */
3715 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3716 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3717 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3718 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3719 WREG32(mmSH_MEM_CONFIG, tmp);
3720 WREG32(mmSH_MEM_BASES, 0);
3722 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3723 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3724 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3725 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3726 WREG32(mmSH_MEM_CONFIG, tmp);
3727 tmp = adev->gmc.shared_aperture_start >> 48;
3728 WREG32(mmSH_MEM_BASES, tmp);
3731 WREG32(mmSH_MEM_APE1_BASE, 1);
3732 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3734 vi_srbm_select(adev, 0, 0, 0, 0);
3735 mutex_unlock(&adev->srbm_mutex);
3737 gfx_v8_0_init_compute_vmid(adev);
3738 gfx_v8_0_init_gds_vmid(adev);
3740 mutex_lock(&adev->grbm_idx_mutex);
3742 * making sure that the following register writes will be broadcasted
3743 * to all the shaders
3745 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3747 WREG32(mmPA_SC_FIFO_SIZE,
3748 (adev->gfx.config.sc_prim_fifo_size_frontend <<
3749 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3750 (adev->gfx.config.sc_prim_fifo_size_backend <<
3751 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3752 (adev->gfx.config.sc_hiz_tile_fifo_size <<
3753 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3754 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3755 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3757 tmp = RREG32(mmSPI_ARB_PRIORITY);
3758 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3759 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3760 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3761 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3762 WREG32(mmSPI_ARB_PRIORITY, tmp);
3764 mutex_unlock(&adev->grbm_idx_mutex);
3768 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3773 mutex_lock(&adev->grbm_idx_mutex);
3774 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3775 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3776 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3777 for (k = 0; k < adev->usec_timeout; k++) {
3778 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3782 if (k == adev->usec_timeout) {
3783 gfx_v8_0_select_se_sh(adev, 0xffffffff,
3784 0xffffffff, 0xffffffff);
3785 mutex_unlock(&adev->grbm_idx_mutex);
3786 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3792 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3793 mutex_unlock(&adev->grbm_idx_mutex);
3795 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3796 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3797 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3798 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3799 for (k = 0; k < adev->usec_timeout; k++) {
3800 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3806 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3809 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3811 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3812 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3813 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3814 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3816 WREG32(mmCP_INT_CNTL_RING0, tmp);
3819 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3821 adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
3823 WREG32(mmRLC_CSIB_ADDR_HI,
3824 adev->gfx.rlc.clear_state_gpu_addr >> 32);
3825 WREG32(mmRLC_CSIB_ADDR_LO,
3826 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3827 WREG32(mmRLC_CSIB_LENGTH,
3828 adev->gfx.rlc.clear_state_size);
3831 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3834 int *unique_indices,
3837 int *ind_start_offsets,
3842 bool new_entry = true;
3844 for (; ind_offset < list_size; ind_offset++) {
3848 ind_start_offsets[*offset_count] = ind_offset;
3849 *offset_count = *offset_count + 1;
3850 BUG_ON(*offset_count >= max_offset);
3853 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3860 /* look for the matching indice */
3862 indices < *indices_count;
3864 if (unique_indices[indices] ==
3865 register_list_format[ind_offset])
3869 if (indices >= *indices_count) {
3870 unique_indices[*indices_count] =
3871 register_list_format[ind_offset];
3872 indices = *indices_count;
3873 *indices_count = *indices_count + 1;
3874 BUG_ON(*indices_count >= max_indices);
3877 register_list_format[ind_offset] = indices;
3881 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3884 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3885 int indices_count = 0;
3886 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3887 int offset_count = 0;
3890 unsigned int *register_list_format =
3891 kmemdup(adev->gfx.rlc.register_list_format,
3892 adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3893 if (!register_list_format)
3896 gfx_v8_0_parse_ind_reg_list(register_list_format,
3897 RLC_FormatDirectRegListLength,
3898 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3901 ARRAY_SIZE(unique_indices),
3902 indirect_start_offsets,
3904 ARRAY_SIZE(indirect_start_offsets));
3906 /* save and restore list */
3907 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3909 WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3910 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3911 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3914 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3915 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3916 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3918 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3919 list_size = list_size >> 1;
3920 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3921 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3923 /* starting offsets starts */
3924 WREG32(mmRLC_GPM_SCRATCH_ADDR,
3925 adev->gfx.rlc.starting_offsets_start);
3926 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
3927 WREG32(mmRLC_GPM_SCRATCH_DATA,
3928 indirect_start_offsets[i]);
3930 /* unique indices */
3931 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3932 data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3933 for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
3934 if (unique_indices[i] != 0) {
3935 WREG32(temp + i, unique_indices[i] & 0x3FFFF);
3936 WREG32(data + i, unique_indices[i] >> 20);
3939 kfree(register_list_format);
3944 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3946 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
3949 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3953 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
3955 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
3956 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
3957 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
3958 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
3959 WREG32(mmRLC_PG_DELAY, data);
3961 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
3962 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
3966 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
3969 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
3972 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
3975 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
3978 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
3980 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
3983 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
3985 if ((adev->asic_type == CHIP_CARRIZO) ||
3986 (adev->asic_type == CHIP_STONEY)) {
3987 gfx_v8_0_init_csb(adev);
3988 gfx_v8_0_init_save_restore_list(adev);
3989 gfx_v8_0_enable_save_restore_machine(adev);
3990 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
3991 gfx_v8_0_init_power_gating(adev);
3992 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
3993 } else if ((adev->asic_type == CHIP_POLARIS11) ||
3994 (adev->asic_type == CHIP_POLARIS12) ||
3995 (adev->asic_type == CHIP_VEGAM)) {
3996 gfx_v8_0_init_csb(adev);
3997 gfx_v8_0_init_save_restore_list(adev);
3998 gfx_v8_0_enable_save_restore_machine(adev);
3999 gfx_v8_0_init_power_gating(adev);
4004 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4006 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4008 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4009 gfx_v8_0_wait_for_rlc_serdes(adev);
4012 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4014 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4017 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4021 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4023 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4025 /* carrizo do enable cp interrupt after cp inited */
4026 if (!(adev->flags & AMD_IS_APU))
4027 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4032 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4034 if (amdgpu_sriov_vf(adev)) {
4035 gfx_v8_0_init_csb(adev);
4039 adev->gfx.rlc.funcs->stop(adev);
4040 adev->gfx.rlc.funcs->reset(adev);
4041 gfx_v8_0_init_pg(adev);
4042 adev->gfx.rlc.funcs->start(adev);
4047 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4050 u32 tmp = RREG32(mmCP_ME_CNTL);
4053 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4054 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4055 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4057 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4058 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4059 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4060 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4061 adev->gfx.gfx_ring[i].sched.ready = false;
4063 WREG32(mmCP_ME_CNTL, tmp);
4067 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4070 const struct cs_section_def *sect = NULL;
4071 const struct cs_extent_def *ext = NULL;
4073 /* begin clear state */
4075 /* context control state */
4078 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4079 for (ext = sect->section; ext->extent != NULL; ++ext) {
4080 if (sect->id == SECT_CONTEXT)
4081 count += 2 + ext->reg_count;
4086 /* pa_sc_raster_config/pa_sc_raster_config1 */
4088 /* end clear state */
4096 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4098 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4099 const struct cs_section_def *sect = NULL;
4100 const struct cs_extent_def *ext = NULL;
4104 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4105 WREG32(mmCP_ENDIAN_SWAP, 0);
4106 WREG32(mmCP_DEVICE_ID, 1);
4108 gfx_v8_0_cp_gfx_enable(adev, true);
4110 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4112 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4116 /* clear state buffer */
4117 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4118 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4120 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4121 amdgpu_ring_write(ring, 0x80000000);
4122 amdgpu_ring_write(ring, 0x80000000);
4124 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4125 for (ext = sect->section; ext->extent != NULL; ++ext) {
4126 if (sect->id == SECT_CONTEXT) {
4127 amdgpu_ring_write(ring,
4128 PACKET3(PACKET3_SET_CONTEXT_REG,
4130 amdgpu_ring_write(ring,
4131 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4132 for (i = 0; i < ext->reg_count; i++)
4133 amdgpu_ring_write(ring, ext->extent[i]);
4138 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4139 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4140 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4141 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4143 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4144 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4146 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4147 amdgpu_ring_write(ring, 0);
4149 /* init the CE partitions */
4150 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4151 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4152 amdgpu_ring_write(ring, 0x8000);
4153 amdgpu_ring_write(ring, 0x8000);
4155 amdgpu_ring_commit(ring);
4159 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4162 /* no gfx doorbells on iceland */
4163 if (adev->asic_type == CHIP_TOPAZ)
4166 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4168 if (ring->use_doorbell) {
4169 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4170 DOORBELL_OFFSET, ring->doorbell_index);
4171 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4173 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4176 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4179 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4181 if (adev->flags & AMD_IS_APU)
4184 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4185 DOORBELL_RANGE_LOWER,
4186 adev->doorbell_index.gfx_ring0);
4187 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4189 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4190 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4193 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4195 struct amdgpu_ring *ring;
4198 u64 rb_addr, rptr_addr, wptr_gpu_addr;
4200 /* Set the write pointer delay */
4201 WREG32(mmCP_RB_WPTR_DELAY, 0);
4203 /* set the RB to use vmid 0 */
4204 WREG32(mmCP_RB_VMID, 0);
4206 /* Set ring buffer size */
4207 ring = &adev->gfx.gfx_ring[0];
4208 rb_bufsz = order_base_2(ring->ring_size / 8);
4209 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4210 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4211 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4212 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4214 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4216 WREG32(mmCP_RB0_CNTL, tmp);
4218 /* Initialize the ring buffer's read and write pointers */
4219 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4221 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4223 /* set the wb address wether it's enabled or not */
4224 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4225 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4226 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4228 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4229 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4230 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4232 WREG32(mmCP_RB0_CNTL, tmp);
4234 rb_addr = ring->gpu_addr >> 8;
4235 WREG32(mmCP_RB0_BASE, rb_addr);
4236 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4238 gfx_v8_0_set_cpg_door_bell(adev, ring);
4239 /* start the ring */
4240 amdgpu_ring_clear_ring(ring);
4241 gfx_v8_0_cp_gfx_start(adev);
4242 ring->sched.ready = true;
4247 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4252 WREG32(mmCP_MEC_CNTL, 0);
4254 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4255 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4256 adev->gfx.compute_ring[i].sched.ready = false;
4257 adev->gfx.kiq.ring.sched.ready = false;
4263 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4266 struct amdgpu_device *adev = ring->adev;
4268 /* tell RLC which is KIQ queue */
4269 tmp = RREG32(mmRLC_CP_SCHEDULERS);
4271 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4272 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4274 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4277 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4279 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4280 uint64_t queue_mask = 0;
4283 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4284 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4287 /* This situation may be hit in the future if a new HW
4288 * generation exposes more than 64 queues. If so, the
4289 * definition of queue_mask needs updating */
4290 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4291 DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4295 queue_mask |= (1ull << i);
4298 r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
4300 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4304 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4305 amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4306 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4307 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4308 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4309 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4310 amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4311 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4312 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4313 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4314 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4315 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4318 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4319 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4320 amdgpu_ring_write(kiq_ring,
4321 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4322 amdgpu_ring_write(kiq_ring,
4323 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4324 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4325 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4326 PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4327 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4328 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4329 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4330 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4333 amdgpu_ring_commit(kiq_ring);
4338 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4342 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4343 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4344 for (i = 0; i < adev->usec_timeout; i++) {
4345 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4349 if (i == adev->usec_timeout)
4352 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4353 WREG32(mmCP_HQD_PQ_RPTR, 0);
4354 WREG32(mmCP_HQD_PQ_WPTR, 0);
4359 static void gfx_v8_0_mqd_set_priority(struct amdgpu_ring *ring, struct vi_mqd *mqd)
4361 struct amdgpu_device *adev = ring->adev;
4363 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4364 if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue)) {
4365 mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
4366 ring->has_high_prio = true;
4367 mqd->cp_hqd_queue_priority =
4368 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
4370 ring->has_high_prio = false;
4375 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4377 struct amdgpu_device *adev = ring->adev;
4378 struct vi_mqd *mqd = ring->mqd_ptr;
4379 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4382 mqd->header = 0xC0310800;
4383 mqd->compute_pipelinestat_enable = 0x00000001;
4384 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4385 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4386 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4387 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4388 mqd->compute_misc_reserved = 0x00000003;
4389 mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4390 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4391 mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4392 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4393 eop_base_addr = ring->eop_gpu_addr >> 8;
4394 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4395 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4397 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4398 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4399 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4400 (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4402 mqd->cp_hqd_eop_control = tmp;
4404 /* enable doorbell? */
4405 tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4406 CP_HQD_PQ_DOORBELL_CONTROL,
4408 ring->use_doorbell ? 1 : 0);
4410 mqd->cp_hqd_pq_doorbell_control = tmp;
4412 /* set the pointer to the MQD */
4413 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4414 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4416 /* set MQD vmid to 0 */
4417 tmp = RREG32(mmCP_MQD_CONTROL);
4418 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4419 mqd->cp_mqd_control = tmp;
4421 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4422 hqd_gpu_addr = ring->gpu_addr >> 8;
4423 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4424 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4426 /* set up the HQD, this is similar to CP_RB0_CNTL */
4427 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4428 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4429 (order_base_2(ring->ring_size / 4) - 1));
4430 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4431 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4433 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4435 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4436 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4437 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4438 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4439 mqd->cp_hqd_pq_control = tmp;
4441 /* set the wb address whether it's enabled or not */
4442 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4443 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4444 mqd->cp_hqd_pq_rptr_report_addr_hi =
4445 upper_32_bits(wb_gpu_addr) & 0xffff;
4447 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4448 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4449 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4450 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4453 /* enable the doorbell if requested */
4454 if (ring->use_doorbell) {
4455 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4456 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4457 DOORBELL_OFFSET, ring->doorbell_index);
4459 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4461 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4462 DOORBELL_SOURCE, 0);
4463 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4467 mqd->cp_hqd_pq_doorbell_control = tmp;
4469 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4471 mqd->cp_hqd_pq_wptr = ring->wptr;
4472 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4474 /* set the vmid for the queue */
4475 mqd->cp_hqd_vmid = 0;
4477 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4478 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4479 mqd->cp_hqd_persistent_state = tmp;
4482 tmp = RREG32(mmCP_HQD_IB_CONTROL);
4483 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4484 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4485 mqd->cp_hqd_ib_control = tmp;
4487 tmp = RREG32(mmCP_HQD_IQ_TIMER);
4488 tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4489 mqd->cp_hqd_iq_timer = tmp;
4491 tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4492 tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4493 mqd->cp_hqd_ctx_save_control = tmp;
4496 mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4497 mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4498 mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4499 mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4500 mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4501 mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4502 mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4503 mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4504 mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4505 mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4506 mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4507 mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4509 /* set static priority for a queue/ring */
4510 gfx_v8_0_mqd_set_priority(ring, mqd);
4511 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4513 /* map_queues packet doesn't need activate the queue,
4514 * so only kiq need set this field.
4516 if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
4517 mqd->cp_hqd_active = 1;
4522 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4528 /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4529 mqd_data = &mqd->cp_mqd_base_addr_lo;
4531 /* disable wptr polling */
4532 WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4534 /* program all HQD registers */
4535 for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4536 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4538 /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4539 * This is safe since EOP RPTR==WPTR for any inactive HQD
4540 * on ASICs that do not support context-save.
4541 * EOP writes/reads can start anywhere in the ring.
4543 if (adev->asic_type != CHIP_TONGA) {
4544 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4545 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4546 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4549 for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4550 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4552 /* activate the HQD */
4553 for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4554 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4559 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4561 struct amdgpu_device *adev = ring->adev;
4562 struct vi_mqd *mqd = ring->mqd_ptr;
4563 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4565 gfx_v8_0_kiq_setting(ring);
4567 if (adev->in_gpu_reset) { /* for GPU_RESET case */
4568 /* reset MQD to a clean status */
4569 if (adev->gfx.mec.mqd_backup[mqd_idx])
4570 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4572 /* reset ring buffer */
4574 amdgpu_ring_clear_ring(ring);
4575 mutex_lock(&adev->srbm_mutex);
4576 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4577 gfx_v8_0_mqd_commit(adev, mqd);
4578 vi_srbm_select(adev, 0, 0, 0, 0);
4579 mutex_unlock(&adev->srbm_mutex);
4581 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4582 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4583 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4584 mutex_lock(&adev->srbm_mutex);
4585 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4586 gfx_v8_0_mqd_init(ring);
4587 gfx_v8_0_mqd_commit(adev, mqd);
4588 vi_srbm_select(adev, 0, 0, 0, 0);
4589 mutex_unlock(&adev->srbm_mutex);
4591 if (adev->gfx.mec.mqd_backup[mqd_idx])
4592 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4598 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4600 struct amdgpu_device *adev = ring->adev;
4601 struct vi_mqd *mqd = ring->mqd_ptr;
4602 int mqd_idx = ring - &adev->gfx.compute_ring[0];
4604 if (!adev->in_gpu_reset && !adev->in_suspend) {
4605 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4606 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4607 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4608 mutex_lock(&adev->srbm_mutex);
4609 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4610 gfx_v8_0_mqd_init(ring);
4611 vi_srbm_select(adev, 0, 0, 0, 0);
4612 mutex_unlock(&adev->srbm_mutex);
4614 if (adev->gfx.mec.mqd_backup[mqd_idx])
4615 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4616 } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
4617 /* reset MQD to a clean status */
4618 if (adev->gfx.mec.mqd_backup[mqd_idx])
4619 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4620 /* reset ring buffer */
4622 amdgpu_ring_clear_ring(ring);
4624 amdgpu_ring_clear_ring(ring);
4629 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4631 if (adev->asic_type > CHIP_TONGA) {
4632 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, adev->doorbell_index.kiq << 2);
4633 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, adev->doorbell_index.mec_ring7 << 2);
4635 /* enable doorbells */
4636 WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4639 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4641 struct amdgpu_ring *ring;
4644 ring = &adev->gfx.kiq.ring;
4646 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4647 if (unlikely(r != 0))
4650 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4651 if (unlikely(r != 0))
4654 gfx_v8_0_kiq_init_queue(ring);
4655 amdgpu_bo_kunmap(ring->mqd_obj);
4656 ring->mqd_ptr = NULL;
4657 amdgpu_bo_unreserve(ring->mqd_obj);
4658 ring->sched.ready = true;
4662 static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
4664 struct amdgpu_ring *ring = NULL;
4667 gfx_v8_0_cp_compute_enable(adev, true);
4669 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4670 ring = &adev->gfx.compute_ring[i];
4672 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4673 if (unlikely(r != 0))
4675 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4677 r = gfx_v8_0_kcq_init_queue(ring);
4678 amdgpu_bo_kunmap(ring->mqd_obj);
4679 ring->mqd_ptr = NULL;
4681 amdgpu_bo_unreserve(ring->mqd_obj);
4686 gfx_v8_0_set_mec_doorbell_range(adev);
4688 r = gfx_v8_0_kiq_kcq_enable(adev);
4696 static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev)
4699 struct amdgpu_ring *ring;
4701 /* collect all the ring_tests here, gfx, kiq, compute */
4702 ring = &adev->gfx.gfx_ring[0];
4703 r = amdgpu_ring_test_helper(ring);
4707 ring = &adev->gfx.kiq.ring;
4708 r = amdgpu_ring_test_helper(ring);
4712 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4713 ring = &adev->gfx.compute_ring[i];
4714 amdgpu_ring_test_helper(ring);
4720 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4724 if (!(adev->flags & AMD_IS_APU))
4725 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4727 r = gfx_v8_0_kiq_resume(adev);
4731 r = gfx_v8_0_cp_gfx_resume(adev);
4735 r = gfx_v8_0_kcq_resume(adev);
4739 r = gfx_v8_0_cp_test_all_rings(adev);
4743 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4748 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4750 gfx_v8_0_cp_gfx_enable(adev, enable);
4751 gfx_v8_0_cp_compute_enable(adev, enable);
4754 static int gfx_v8_0_hw_init(void *handle)
4757 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4759 gfx_v8_0_init_golden_registers(adev);
4760 gfx_v8_0_constants_init(adev);
4762 r = adev->gfx.rlc.funcs->resume(adev);
4766 r = gfx_v8_0_cp_resume(adev);
4771 static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
4774 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4776 r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
4778 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4780 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4781 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4783 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
4784 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
4785 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
4786 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
4787 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
4788 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
4789 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
4790 amdgpu_ring_write(kiq_ring, 0);
4791 amdgpu_ring_write(kiq_ring, 0);
4792 amdgpu_ring_write(kiq_ring, 0);
4794 r = amdgpu_ring_test_helper(kiq_ring);
4796 DRM_ERROR("KCQ disable failed\n");
4801 static bool gfx_v8_0_is_idle(void *handle)
4803 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4805 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
4806 || RREG32(mmGRBM_STATUS2) != 0x8)
4812 static bool gfx_v8_0_rlc_is_idle(void *handle)
4814 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4816 if (RREG32(mmGRBM_STATUS2) != 0x8)
4822 static int gfx_v8_0_wait_for_rlc_idle(void *handle)
4825 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4827 for (i = 0; i < adev->usec_timeout; i++) {
4828 if (gfx_v8_0_rlc_is_idle(handle))
4836 static int gfx_v8_0_wait_for_idle(void *handle)
4839 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4841 for (i = 0; i < adev->usec_timeout; i++) {
4842 if (gfx_v8_0_is_idle(handle))
4850 static int gfx_v8_0_hw_fini(void *handle)
4852 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4854 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4855 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4857 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4859 amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
4861 /* disable KCQ to avoid CPC touch memory not valid anymore */
4862 gfx_v8_0_kcq_disable(adev);
4864 if (amdgpu_sriov_vf(adev)) {
4865 pr_debug("For SRIOV client, shouldn't do anything.\n");
4868 amdgpu_gfx_rlc_enter_safe_mode(adev);
4869 if (!gfx_v8_0_wait_for_idle(adev))
4870 gfx_v8_0_cp_enable(adev, false);
4872 pr_err("cp is busy, skip halt cp\n");
4873 if (!gfx_v8_0_wait_for_rlc_idle(adev))
4874 adev->gfx.rlc.funcs->stop(adev);
4876 pr_err("rlc is busy, skip halt rlc\n");
4877 amdgpu_gfx_rlc_exit_safe_mode(adev);
4882 static int gfx_v8_0_suspend(void *handle)
4884 return gfx_v8_0_hw_fini(handle);
4887 static int gfx_v8_0_resume(void *handle)
4889 return gfx_v8_0_hw_init(handle);
4892 static bool gfx_v8_0_check_soft_reset(void *handle)
4894 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4895 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4899 tmp = RREG32(mmGRBM_STATUS);
4900 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4901 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4902 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4903 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4904 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4905 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
4906 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4907 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4908 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4909 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4910 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4911 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4912 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4916 tmp = RREG32(mmGRBM_STATUS2);
4917 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4918 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4919 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4921 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
4922 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
4923 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
4924 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4926 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4928 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4930 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
4931 SOFT_RESET_GRBM, 1);
4935 tmp = RREG32(mmSRBM_STATUS);
4936 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4937 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4938 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4939 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
4940 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4941 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
4943 if (grbm_soft_reset || srbm_soft_reset) {
4944 adev->gfx.grbm_soft_reset = grbm_soft_reset;
4945 adev->gfx.srbm_soft_reset = srbm_soft_reset;
4948 adev->gfx.grbm_soft_reset = 0;
4949 adev->gfx.srbm_soft_reset = 0;
4954 static int gfx_v8_0_pre_soft_reset(void *handle)
4956 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4957 u32 grbm_soft_reset = 0;
4959 if ((!adev->gfx.grbm_soft_reset) &&
4960 (!adev->gfx.srbm_soft_reset))
4963 grbm_soft_reset = adev->gfx.grbm_soft_reset;
4966 adev->gfx.rlc.funcs->stop(adev);
4968 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
4969 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
4970 /* Disable GFX parsing/prefetching */
4971 gfx_v8_0_cp_gfx_enable(adev, false);
4973 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
4974 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
4975 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
4976 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
4979 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4980 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4982 mutex_lock(&adev->srbm_mutex);
4983 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4984 gfx_v8_0_deactivate_hqd(adev, 2);
4985 vi_srbm_select(adev, 0, 0, 0, 0);
4986 mutex_unlock(&adev->srbm_mutex);
4988 /* Disable MEC parsing/prefetching */
4989 gfx_v8_0_cp_compute_enable(adev, false);
4995 static int gfx_v8_0_soft_reset(void *handle)
4997 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4998 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5001 if ((!adev->gfx.grbm_soft_reset) &&
5002 (!adev->gfx.srbm_soft_reset))
5005 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5006 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5008 if (grbm_soft_reset || srbm_soft_reset) {
5009 tmp = RREG32(mmGMCON_DEBUG);
5010 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5011 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5012 WREG32(mmGMCON_DEBUG, tmp);
5016 if (grbm_soft_reset) {
5017 tmp = RREG32(mmGRBM_SOFT_RESET);
5018 tmp |= grbm_soft_reset;
5019 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5020 WREG32(mmGRBM_SOFT_RESET, tmp);
5021 tmp = RREG32(mmGRBM_SOFT_RESET);
5025 tmp &= ~grbm_soft_reset;
5026 WREG32(mmGRBM_SOFT_RESET, tmp);
5027 tmp = RREG32(mmGRBM_SOFT_RESET);
5030 if (srbm_soft_reset) {
5031 tmp = RREG32(mmSRBM_SOFT_RESET);
5032 tmp |= srbm_soft_reset;
5033 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5034 WREG32(mmSRBM_SOFT_RESET, tmp);
5035 tmp = RREG32(mmSRBM_SOFT_RESET);
5039 tmp &= ~srbm_soft_reset;
5040 WREG32(mmSRBM_SOFT_RESET, tmp);
5041 tmp = RREG32(mmSRBM_SOFT_RESET);
5044 if (grbm_soft_reset || srbm_soft_reset) {
5045 tmp = RREG32(mmGMCON_DEBUG);
5046 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5047 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5048 WREG32(mmGMCON_DEBUG, tmp);
5051 /* Wait a little for things to settle down */
5057 static int gfx_v8_0_post_soft_reset(void *handle)
5059 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5060 u32 grbm_soft_reset = 0;
5062 if ((!adev->gfx.grbm_soft_reset) &&
5063 (!adev->gfx.srbm_soft_reset))
5066 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5068 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5069 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5070 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5071 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5074 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5075 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5077 mutex_lock(&adev->srbm_mutex);
5078 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5079 gfx_v8_0_deactivate_hqd(adev, 2);
5080 vi_srbm_select(adev, 0, 0, 0, 0);
5081 mutex_unlock(&adev->srbm_mutex);
5083 gfx_v8_0_kiq_resume(adev);
5084 gfx_v8_0_kcq_resume(adev);
5087 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5088 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5089 gfx_v8_0_cp_gfx_resume(adev);
5091 gfx_v8_0_cp_test_all_rings(adev);
5093 adev->gfx.rlc.funcs->start(adev);
5099 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5101 * @adev: amdgpu_device pointer
5103 * Fetches a GPU clock counter snapshot.
5104 * Returns the 64 bit clock counter snapshot.
5106 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5110 mutex_lock(&adev->gfx.gpu_clock_mutex);
5111 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5112 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5113 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5114 mutex_unlock(&adev->gfx.gpu_clock_mutex);
5118 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5120 uint32_t gds_base, uint32_t gds_size,
5121 uint32_t gws_base, uint32_t gws_size,
5122 uint32_t oa_base, uint32_t oa_size)
5125 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5126 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5127 WRITE_DATA_DST_SEL(0)));
5128 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5129 amdgpu_ring_write(ring, 0);
5130 amdgpu_ring_write(ring, gds_base);
5133 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5134 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5135 WRITE_DATA_DST_SEL(0)));
5136 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5137 amdgpu_ring_write(ring, 0);
5138 amdgpu_ring_write(ring, gds_size);
5141 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5142 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5143 WRITE_DATA_DST_SEL(0)));
5144 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5145 amdgpu_ring_write(ring, 0);
5146 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5149 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5150 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5151 WRITE_DATA_DST_SEL(0)));
5152 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5153 amdgpu_ring_write(ring, 0);
5154 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5157 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5159 WREG32(mmSQ_IND_INDEX,
5160 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5161 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5162 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5163 (SQ_IND_INDEX__FORCE_READ_MASK));
5164 return RREG32(mmSQ_IND_DATA);
5167 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5168 uint32_t wave, uint32_t thread,
5169 uint32_t regno, uint32_t num, uint32_t *out)
5171 WREG32(mmSQ_IND_INDEX,
5172 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5173 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5174 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5175 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5176 (SQ_IND_INDEX__FORCE_READ_MASK) |
5177 (SQ_IND_INDEX__AUTO_INCR_MASK));
5179 *(out++) = RREG32(mmSQ_IND_DATA);
5182 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5184 /* type 0 wave data */
5185 dst[(*no_fields)++] = 0;
5186 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5187 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5188 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5189 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5190 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5191 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5192 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5193 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5194 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5195 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5196 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5197 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5198 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5199 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5200 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5201 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5202 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5203 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5206 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5207 uint32_t wave, uint32_t start,
5208 uint32_t size, uint32_t *dst)
5211 adev, simd, wave, 0,
5212 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5216 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5217 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5218 .select_se_sh = &gfx_v8_0_select_se_sh,
5219 .read_wave_data = &gfx_v8_0_read_wave_data,
5220 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5221 .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5224 static int gfx_v8_0_early_init(void *handle)
5226 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5228 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5229 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5230 adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5231 gfx_v8_0_set_ring_funcs(adev);
5232 gfx_v8_0_set_irq_funcs(adev);
5233 gfx_v8_0_set_gds_init(adev);
5234 gfx_v8_0_set_rlc_funcs(adev);
5239 static int gfx_v8_0_late_init(void *handle)
5241 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5244 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5248 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5252 /* requires IBs so do in late init after IB pool is initialized */
5253 r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5257 r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5259 DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5263 r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5266 "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5274 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5277 if (((adev->asic_type == CHIP_POLARIS11) ||
5278 (adev->asic_type == CHIP_POLARIS12) ||
5279 (adev->asic_type == CHIP_VEGAM)) &&
5280 adev->powerplay.pp_funcs->set_powergating_by_smu)
5281 /* Send msg to SMU via Powerplay */
5282 amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
5284 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5287 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5290 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5293 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5296 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5299 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5302 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5305 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5308 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5310 /* Read any GFX register to wake up GFX. */
5312 RREG32(mmDB_RENDER_CONTROL);
5315 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5318 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5319 cz_enable_gfx_cg_power_gating(adev, true);
5320 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5321 cz_enable_gfx_pipeline_power_gating(adev, true);
5323 cz_enable_gfx_cg_power_gating(adev, false);
5324 cz_enable_gfx_pipeline_power_gating(adev, false);
5328 static int gfx_v8_0_set_powergating_state(void *handle,
5329 enum amd_powergating_state state)
5331 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5332 bool enable = (state == AMD_PG_STATE_GATE);
5334 if (amdgpu_sriov_vf(adev))
5337 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5338 AMD_PG_SUPPORT_RLC_SMU_HS |
5340 AMD_PG_SUPPORT_GFX_DMG))
5341 amdgpu_gfx_rlc_enter_safe_mode(adev);
5342 switch (adev->asic_type) {
5346 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5347 cz_enable_sck_slow_down_on_power_up(adev, true);
5348 cz_enable_sck_slow_down_on_power_down(adev, true);
5350 cz_enable_sck_slow_down_on_power_up(adev, false);
5351 cz_enable_sck_slow_down_on_power_down(adev, false);
5353 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5354 cz_enable_cp_power_gating(adev, true);
5356 cz_enable_cp_power_gating(adev, false);
5358 cz_update_gfx_cg_power_gating(adev, enable);
5360 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5361 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5363 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5365 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5366 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5368 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5370 case CHIP_POLARIS11:
5371 case CHIP_POLARIS12:
5373 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5374 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5376 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5378 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5379 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5381 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5383 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5384 polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5386 polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5391 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5392 AMD_PG_SUPPORT_RLC_SMU_HS |
5394 AMD_PG_SUPPORT_GFX_DMG))
5395 amdgpu_gfx_rlc_exit_safe_mode(adev);
5399 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5401 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5404 if (amdgpu_sriov_vf(adev))
5407 /* AMD_CG_SUPPORT_GFX_MGCG */
5408 data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5409 if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5410 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5412 /* AMD_CG_SUPPORT_GFX_CGLG */
5413 data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5414 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5415 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5417 /* AMD_CG_SUPPORT_GFX_CGLS */
5418 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5419 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5421 /* AMD_CG_SUPPORT_GFX_CGTS */
5422 data = RREG32(mmCGTS_SM_CTRL_REG);
5423 if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5424 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5426 /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5427 if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5428 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5430 /* AMD_CG_SUPPORT_GFX_RLC_LS */
5431 data = RREG32(mmRLC_MEM_SLP_CNTL);
5432 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5433 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5435 /* AMD_CG_SUPPORT_GFX_CP_LS */
5436 data = RREG32(mmCP_MEM_SLP_CNTL);
5437 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5438 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5441 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5442 uint32_t reg_addr, uint32_t cmd)
5446 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5448 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5449 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5451 data = RREG32(mmRLC_SERDES_WR_CTRL);
5452 if (adev->asic_type == CHIP_STONEY)
5453 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5454 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5455 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5456 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5457 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5458 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5459 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5460 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5461 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5463 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5464 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5465 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5466 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5467 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5468 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5469 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5470 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5471 RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5472 RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5473 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5474 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5475 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5476 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5477 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5479 WREG32(mmRLC_SERDES_WR_CTRL, data);
5482 #define MSG_ENTER_RLC_SAFE_MODE 1
5483 #define MSG_EXIT_RLC_SAFE_MODE 0
5484 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5485 #define RLC_GPR_REG2__REQ__SHIFT 0
5486 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5487 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5489 static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev)
5491 uint32_t rlc_setting;
5493 rlc_setting = RREG32(mmRLC_CNTL);
5494 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
5500 static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev)
5504 data = RREG32(mmRLC_CNTL);
5505 data |= RLC_SAFE_MODE__CMD_MASK;
5506 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5507 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5508 WREG32(mmRLC_SAFE_MODE, data);
5510 /* wait for RLC_SAFE_MODE */
5511 for (i = 0; i < adev->usec_timeout; i++) {
5512 if ((RREG32(mmRLC_GPM_STAT) &
5513 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5514 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5515 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5516 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5520 for (i = 0; i < adev->usec_timeout; i++) {
5521 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5527 static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev)
5532 data = RREG32(mmRLC_CNTL);
5533 data |= RLC_SAFE_MODE__CMD_MASK;
5534 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5535 WREG32(mmRLC_SAFE_MODE, data);
5537 for (i = 0; i < adev->usec_timeout; i++) {
5538 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5544 static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
5548 data = RREG32(mmRLC_SPM_VMID);
5550 data &= ~RLC_SPM_VMID__RLC_SPM_VMID_MASK;
5551 data |= (vmid & RLC_SPM_VMID__RLC_SPM_VMID_MASK) << RLC_SPM_VMID__RLC_SPM_VMID__SHIFT;
5553 WREG32(mmRLC_SPM_VMID, data);
5556 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5557 .is_rlc_enabled = gfx_v8_0_is_rlc_enabled,
5558 .set_safe_mode = gfx_v8_0_set_safe_mode,
5559 .unset_safe_mode = gfx_v8_0_unset_safe_mode,
5560 .init = gfx_v8_0_rlc_init,
5561 .get_csb_size = gfx_v8_0_get_csb_size,
5562 .get_csb_buffer = gfx_v8_0_get_csb_buffer,
5563 .get_cp_table_num = gfx_v8_0_cp_jump_table_num,
5564 .resume = gfx_v8_0_rlc_resume,
5565 .stop = gfx_v8_0_rlc_stop,
5566 .reset = gfx_v8_0_rlc_reset,
5567 .start = gfx_v8_0_rlc_start,
5568 .update_spm_vmid = gfx_v8_0_update_spm_vmid
5571 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5574 uint32_t temp, data;
5576 amdgpu_gfx_rlc_enter_safe_mode(adev);
5578 /* It is disabled by HW by default */
5579 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5580 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5581 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5582 /* 1 - RLC memory Light sleep */
5583 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5585 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5586 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5589 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5590 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5591 if (adev->flags & AMD_IS_APU)
5592 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5593 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5594 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5596 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5597 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5598 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5599 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5602 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5604 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5605 gfx_v8_0_wait_for_rlc_serdes(adev);
5607 /* 5 - clear mgcg override */
5608 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5610 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5611 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5612 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5613 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5614 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5615 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5616 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5617 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5618 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5619 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5620 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5621 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5623 WREG32(mmCGTS_SM_CTRL_REG, data);
5627 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5628 gfx_v8_0_wait_for_rlc_serdes(adev);
5630 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5631 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5632 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5633 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5634 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5635 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5637 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5639 /* 2 - disable MGLS in RLC */
5640 data = RREG32(mmRLC_MEM_SLP_CNTL);
5641 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5642 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5643 WREG32(mmRLC_MEM_SLP_CNTL, data);
5646 /* 3 - disable MGLS in CP */
5647 data = RREG32(mmCP_MEM_SLP_CNTL);
5648 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5649 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5650 WREG32(mmCP_MEM_SLP_CNTL, data);
5653 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5654 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5655 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5656 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5658 WREG32(mmCGTS_SM_CTRL_REG, data);
5660 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5661 gfx_v8_0_wait_for_rlc_serdes(adev);
5663 /* 6 - set mgcg override */
5664 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5668 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5669 gfx_v8_0_wait_for_rlc_serdes(adev);
5672 amdgpu_gfx_rlc_exit_safe_mode(adev);
5675 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5678 uint32_t temp, temp1, data, data1;
5680 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5682 amdgpu_gfx_rlc_enter_safe_mode(adev);
5684 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5685 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5686 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5688 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5690 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5691 gfx_v8_0_wait_for_rlc_serdes(adev);
5693 /* 2 - clear cgcg override */
5694 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5696 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5697 gfx_v8_0_wait_for_rlc_serdes(adev);
5699 /* 3 - write cmd to set CGLS */
5700 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5702 /* 4 - enable cgcg */
5703 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5705 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5707 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5709 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5710 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5713 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5715 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5719 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5721 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5722 * Cmp_busy/GFX_Idle interrupts
5724 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5726 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5727 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5730 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5731 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5732 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5734 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5736 /* read gfx register to wake up cgcg */
5737 RREG32(mmCB_CGTT_SCLK_CTRL);
5738 RREG32(mmCB_CGTT_SCLK_CTRL);
5739 RREG32(mmCB_CGTT_SCLK_CTRL);
5740 RREG32(mmCB_CGTT_SCLK_CTRL);
5742 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5743 gfx_v8_0_wait_for_rlc_serdes(adev);
5745 /* write cmd to Set CGCG Overrride */
5746 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5748 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5749 gfx_v8_0_wait_for_rlc_serdes(adev);
5751 /* write cmd to Clear CGLS */
5752 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5754 /* disable cgcg, cgls should be disabled too. */
5755 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5756 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5758 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5759 /* enable interrupts again for PG */
5760 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5763 gfx_v8_0_wait_for_rlc_serdes(adev);
5765 amdgpu_gfx_rlc_exit_safe_mode(adev);
5767 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5771 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5772 * === MGCG + MGLS + TS(CG/LS) ===
5774 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5775 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5777 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5778 * === CGCG + CGLS ===
5780 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5781 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5786 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5787 enum amd_clockgating_state state)
5789 uint32_t msg_id, pp_state = 0;
5790 uint32_t pp_support_state = 0;
5792 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5793 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5794 pp_support_state = PP_STATE_SUPPORT_LS;
5795 pp_state = PP_STATE_LS;
5797 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5798 pp_support_state |= PP_STATE_SUPPORT_CG;
5799 pp_state |= PP_STATE_CG;
5801 if (state == AMD_CG_STATE_UNGATE)
5804 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5808 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5809 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5812 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5813 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5814 pp_support_state = PP_STATE_SUPPORT_LS;
5815 pp_state = PP_STATE_LS;
5818 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5819 pp_support_state |= PP_STATE_SUPPORT_CG;
5820 pp_state |= PP_STATE_CG;
5823 if (state == AMD_CG_STATE_UNGATE)
5826 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5830 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5831 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5837 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5838 enum amd_clockgating_state state)
5841 uint32_t msg_id, pp_state = 0;
5842 uint32_t pp_support_state = 0;
5844 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5845 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5846 pp_support_state = PP_STATE_SUPPORT_LS;
5847 pp_state = PP_STATE_LS;
5849 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5850 pp_support_state |= PP_STATE_SUPPORT_CG;
5851 pp_state |= PP_STATE_CG;
5853 if (state == AMD_CG_STATE_UNGATE)
5856 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5860 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5861 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5864 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5865 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5866 pp_support_state = PP_STATE_SUPPORT_LS;
5867 pp_state = PP_STATE_LS;
5869 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5870 pp_support_state |= PP_STATE_SUPPORT_CG;
5871 pp_state |= PP_STATE_CG;
5873 if (state == AMD_CG_STATE_UNGATE)
5876 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5880 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5881 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5884 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5885 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5886 pp_support_state = PP_STATE_SUPPORT_LS;
5887 pp_state = PP_STATE_LS;
5890 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5891 pp_support_state |= PP_STATE_SUPPORT_CG;
5892 pp_state |= PP_STATE_CG;
5895 if (state == AMD_CG_STATE_UNGATE)
5898 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5902 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5903 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5906 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5907 pp_support_state = PP_STATE_SUPPORT_LS;
5909 if (state == AMD_CG_STATE_UNGATE)
5912 pp_state = PP_STATE_LS;
5914 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5918 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5919 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5922 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5923 pp_support_state = PP_STATE_SUPPORT_LS;
5925 if (state == AMD_CG_STATE_UNGATE)
5928 pp_state = PP_STATE_LS;
5929 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5933 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5934 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5940 static int gfx_v8_0_set_clockgating_state(void *handle,
5941 enum amd_clockgating_state state)
5943 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5945 if (amdgpu_sriov_vf(adev))
5948 switch (adev->asic_type) {
5952 gfx_v8_0_update_gfx_clock_gating(adev,
5953 state == AMD_CG_STATE_GATE);
5956 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
5958 case CHIP_POLARIS10:
5959 case CHIP_POLARIS11:
5960 case CHIP_POLARIS12:
5962 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
5970 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
5972 return ring->adev->wb.wb[ring->rptr_offs];
5975 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5977 struct amdgpu_device *adev = ring->adev;
5979 if (ring->use_doorbell)
5980 /* XXX check if swapping is necessary on BE */
5981 return ring->adev->wb.wb[ring->wptr_offs];
5983 return RREG32(mmCP_RB0_WPTR);
5986 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5988 struct amdgpu_device *adev = ring->adev;
5990 if (ring->use_doorbell) {
5991 /* XXX check if swapping is necessary on BE */
5992 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
5993 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
5995 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5996 (void)RREG32(mmCP_RB0_WPTR);
6000 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6002 u32 ref_and_mask, reg_mem_engine;
6004 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6005 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6008 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6011 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6018 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6019 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6022 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6023 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6024 WAIT_REG_MEM_FUNCTION(3) | /* == */
6026 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6027 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6028 amdgpu_ring_write(ring, ref_and_mask);
6029 amdgpu_ring_write(ring, ref_and_mask);
6030 amdgpu_ring_write(ring, 0x20); /* poll interval */
6033 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6035 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6036 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6039 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6040 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6044 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6045 struct amdgpu_job *job,
6046 struct amdgpu_ib *ib,
6049 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6050 u32 header, control = 0;
6052 if (ib->flags & AMDGPU_IB_FLAG_CE)
6053 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6055 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6057 control |= ib->length_dw | (vmid << 24);
6059 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6060 control |= INDIRECT_BUFFER_PRE_ENB(1);
6062 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
6063 gfx_v8_0_ring_emit_de_meta(ring);
6066 amdgpu_ring_write(ring, header);
6067 amdgpu_ring_write(ring,
6071 (ib->gpu_addr & 0xFFFFFFFC));
6072 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6073 amdgpu_ring_write(ring, control);
6076 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6077 struct amdgpu_job *job,
6078 struct amdgpu_ib *ib,
6081 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6082 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6084 /* Currently, there is a high possibility to get wave ID mismatch
6085 * between ME and GDS, leading to a hw deadlock, because ME generates
6086 * different wave IDs than the GDS expects. This situation happens
6087 * randomly when at least 5 compute pipes use GDS ordered append.
6088 * The wave IDs generated by ME are also wrong after suspend/resume.
6089 * Those are probably bugs somewhere else in the kernel driver.
6091 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
6092 * GDS to 0 for this ring (me/pipe).
6094 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
6095 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
6096 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
6097 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
6100 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6101 amdgpu_ring_write(ring,
6105 (ib->gpu_addr & 0xFFFFFFFC));
6106 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6107 amdgpu_ring_write(ring, control);
6110 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6111 u64 seq, unsigned flags)
6113 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6114 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6116 /* Workaround for cache flush problems. First send a dummy EOP
6117 * event down the pipe with seq one below.
6119 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6120 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6122 EOP_TC_WB_ACTION_EN |
6123 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6125 amdgpu_ring_write(ring, addr & 0xfffffffc);
6126 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6127 DATA_SEL(1) | INT_SEL(0));
6128 amdgpu_ring_write(ring, lower_32_bits(seq - 1));
6129 amdgpu_ring_write(ring, upper_32_bits(seq - 1));
6131 /* Then send the real EOP event down the pipe:
6132 * EVENT_WRITE_EOP - flush caches, send int */
6133 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6134 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6136 EOP_TC_WB_ACTION_EN |
6137 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6139 amdgpu_ring_write(ring, addr & 0xfffffffc);
6140 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6141 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6142 amdgpu_ring_write(ring, lower_32_bits(seq));
6143 amdgpu_ring_write(ring, upper_32_bits(seq));
6147 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6149 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6150 uint32_t seq = ring->fence_drv.sync_seq;
6151 uint64_t addr = ring->fence_drv.gpu_addr;
6153 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6154 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6155 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6156 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6157 amdgpu_ring_write(ring, addr & 0xfffffffc);
6158 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6159 amdgpu_ring_write(ring, seq);
6160 amdgpu_ring_write(ring, 0xffffffff);
6161 amdgpu_ring_write(ring, 4); /* poll interval */
6164 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6165 unsigned vmid, uint64_t pd_addr)
6167 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6169 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6171 /* wait for the invalidate to complete */
6172 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6173 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6174 WAIT_REG_MEM_FUNCTION(0) | /* always */
6175 WAIT_REG_MEM_ENGINE(0))); /* me */
6176 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6177 amdgpu_ring_write(ring, 0);
6178 amdgpu_ring_write(ring, 0); /* ref */
6179 amdgpu_ring_write(ring, 0); /* mask */
6180 amdgpu_ring_write(ring, 0x20); /* poll interval */
6182 /* compute doesn't have PFP */
6184 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6185 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6186 amdgpu_ring_write(ring, 0x0);
6190 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6192 return ring->adev->wb.wb[ring->wptr_offs];
6195 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6197 struct amdgpu_device *adev = ring->adev;
6199 /* XXX check if swapping is necessary on BE */
6200 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6201 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6204 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6208 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6209 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6211 /* RELEASE_MEM - flush caches, send int */
6212 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6213 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6215 EOP_TC_WB_ACTION_EN |
6216 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6218 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6219 amdgpu_ring_write(ring, addr & 0xfffffffc);
6220 amdgpu_ring_write(ring, upper_32_bits(addr));
6221 amdgpu_ring_write(ring, lower_32_bits(seq));
6222 amdgpu_ring_write(ring, upper_32_bits(seq));
6225 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6226 u64 seq, unsigned int flags)
6228 /* we only allocate 32bit for each seq wb address */
6229 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6231 /* write fence seq to the "addr" */
6232 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6233 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6234 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6235 amdgpu_ring_write(ring, lower_32_bits(addr));
6236 amdgpu_ring_write(ring, upper_32_bits(addr));
6237 amdgpu_ring_write(ring, lower_32_bits(seq));
6239 if (flags & AMDGPU_FENCE_FLAG_INT) {
6240 /* set register to trigger INT */
6241 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6242 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6243 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6244 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6245 amdgpu_ring_write(ring, 0);
6246 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6250 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6252 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6253 amdgpu_ring_write(ring, 0);
6256 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6260 if (amdgpu_sriov_vf(ring->adev))
6261 gfx_v8_0_ring_emit_ce_meta(ring);
6263 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6264 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6265 gfx_v8_0_ring_emit_vgt_flush(ring);
6266 /* set load_global_config & load_global_uconfig */
6268 /* set load_cs_sh_regs */
6270 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6273 /* set load_ce_ram if preamble presented */
6274 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6277 /* still load_ce_ram if this is the first time preamble presented
6278 * although there is no context switch happens.
6280 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6284 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6285 amdgpu_ring_write(ring, dw2);
6286 amdgpu_ring_write(ring, 0);
6289 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6293 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6294 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6295 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6296 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6297 ret = ring->wptr & ring->buf_mask;
6298 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6302 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6306 BUG_ON(offset > ring->buf_mask);
6307 BUG_ON(ring->ring[offset] != 0x55aa55aa);
6309 cur = (ring->wptr & ring->buf_mask) - 1;
6310 if (likely(cur > offset))
6311 ring->ring[offset] = cur - offset;
6313 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6316 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6318 struct amdgpu_device *adev = ring->adev;
6319 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
6321 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6322 amdgpu_ring_write(ring, 0 | /* src: register*/
6323 (5 << 8) | /* dst: memory */
6324 (1 << 20)); /* write confirm */
6325 amdgpu_ring_write(ring, reg);
6326 amdgpu_ring_write(ring, 0);
6327 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6328 kiq->reg_val_offs * 4));
6329 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6330 kiq->reg_val_offs * 4));
6333 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6338 switch (ring->funcs->type) {
6339 case AMDGPU_RING_TYPE_GFX:
6340 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6342 case AMDGPU_RING_TYPE_KIQ:
6343 cmd = 1 << 16; /* no inc addr */
6350 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6351 amdgpu_ring_write(ring, cmd);
6352 amdgpu_ring_write(ring, reg);
6353 amdgpu_ring_write(ring, 0);
6354 amdgpu_ring_write(ring, val);
6357 static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
6359 struct amdgpu_device *adev = ring->adev;
6362 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
6363 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
6364 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
6365 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
6366 WREG32(mmSQ_CMD, value);
6369 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6370 enum amdgpu_interrupt_state state)
6372 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6373 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6376 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6378 enum amdgpu_interrupt_state state)
6380 u32 mec_int_cntl, mec_int_cntl_reg;
6383 * amdgpu controls only the first MEC. That's why this function only
6384 * handles the setting of interrupts for this specific MEC. All other
6385 * pipes' interrupts are set by amdkfd.
6391 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6394 mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6397 mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6400 mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6403 DRM_DEBUG("invalid pipe %d\n", pipe);
6407 DRM_DEBUG("invalid me %d\n", me);
6412 case AMDGPU_IRQ_STATE_DISABLE:
6413 mec_int_cntl = RREG32(mec_int_cntl_reg);
6414 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6415 WREG32(mec_int_cntl_reg, mec_int_cntl);
6417 case AMDGPU_IRQ_STATE_ENABLE:
6418 mec_int_cntl = RREG32(mec_int_cntl_reg);
6419 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6420 WREG32(mec_int_cntl_reg, mec_int_cntl);
6427 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6428 struct amdgpu_irq_src *source,
6430 enum amdgpu_interrupt_state state)
6432 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6433 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6438 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6439 struct amdgpu_irq_src *source,
6441 enum amdgpu_interrupt_state state)
6443 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6444 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6449 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6450 struct amdgpu_irq_src *src,
6452 enum amdgpu_interrupt_state state)
6455 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6456 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6458 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6459 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6461 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6462 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6464 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6465 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6467 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6468 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6470 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6471 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6473 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6474 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6476 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6477 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6479 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6480 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6488 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6489 struct amdgpu_irq_src *source,
6491 enum amdgpu_interrupt_state state)
6496 case AMDGPU_IRQ_STATE_DISABLE:
6500 case AMDGPU_IRQ_STATE_ENABLE:
6508 WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6509 WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6510 WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6511 WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6512 WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6513 WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6515 WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6517 WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6519 WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6521 WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6523 WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6525 WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6527 WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6533 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6534 struct amdgpu_irq_src *source,
6536 enum amdgpu_interrupt_state state)
6541 case AMDGPU_IRQ_STATE_DISABLE:
6545 case AMDGPU_IRQ_STATE_ENABLE:
6553 WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6559 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6560 struct amdgpu_irq_src *source,
6561 struct amdgpu_iv_entry *entry)
6564 u8 me_id, pipe_id, queue_id;
6565 struct amdgpu_ring *ring;
6567 DRM_DEBUG("IH: CP EOP\n");
6568 me_id = (entry->ring_id & 0x0c) >> 2;
6569 pipe_id = (entry->ring_id & 0x03) >> 0;
6570 queue_id = (entry->ring_id & 0x70) >> 4;
6574 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6578 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6579 ring = &adev->gfx.compute_ring[i];
6580 /* Per-queue interrupt is supported for MEC starting from VI.
6581 * The interrupt can only be enabled/disabled per pipe instead of per queue.
6583 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6584 amdgpu_fence_process(ring);
6591 static void gfx_v8_0_fault(struct amdgpu_device *adev,
6592 struct amdgpu_iv_entry *entry)
6594 u8 me_id, pipe_id, queue_id;
6595 struct amdgpu_ring *ring;
6598 me_id = (entry->ring_id & 0x0c) >> 2;
6599 pipe_id = (entry->ring_id & 0x03) >> 0;
6600 queue_id = (entry->ring_id & 0x70) >> 4;
6604 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6608 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6609 ring = &adev->gfx.compute_ring[i];
6610 if (ring->me == me_id && ring->pipe == pipe_id &&
6611 ring->queue == queue_id)
6612 drm_sched_fault(&ring->sched);
6618 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6619 struct amdgpu_irq_src *source,
6620 struct amdgpu_iv_entry *entry)
6622 DRM_ERROR("Illegal register access in command stream\n");
6623 gfx_v8_0_fault(adev, entry);
6627 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6628 struct amdgpu_irq_src *source,
6629 struct amdgpu_iv_entry *entry)
6631 DRM_ERROR("Illegal instruction in command stream\n");
6632 gfx_v8_0_fault(adev, entry);
6636 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6637 struct amdgpu_irq_src *source,
6638 struct amdgpu_iv_entry *entry)
6640 DRM_ERROR("CP EDC/ECC error detected.");
6644 static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data)
6646 u32 enc, se_id, sh_id, cu_id;
6648 int sq_edc_source = -1;
6650 enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6651 se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6655 DRM_INFO("SQ general purpose intr detected:"
6656 "se_id %d, immed_overflow %d, host_reg_overflow %d,"
6657 "host_cmd_overflow %d, cmd_timestamp %d,"
6658 "reg_timestamp %d, thread_trace_buff_full %d,"
6659 "wlt %d, thread_trace %d.\n",
6661 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6662 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6663 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6664 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6665 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6666 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6667 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6668 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
6674 cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
6675 sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
6678 * This function can be called either directly from ISR
6679 * or from BH in which case we can access SQ_EDC_INFO
6683 mutex_lock(&adev->grbm_idx_mutex);
6684 gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
6686 sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
6688 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6689 mutex_unlock(&adev->grbm_idx_mutex);
6693 sprintf(type, "instruction intr");
6695 sprintf(type, "EDC/ECC error");
6699 "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
6700 "trap %s, sq_ed_info.source %s.\n",
6701 type, se_id, sh_id, cu_id,
6702 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
6703 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
6704 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
6705 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
6706 (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
6710 DRM_ERROR("SQ invalid encoding type\n.");
6714 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
6717 struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
6718 struct sq_work *sq_work = container_of(work, struct sq_work, work);
6720 gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data);
6723 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
6724 struct amdgpu_irq_src *source,
6725 struct amdgpu_iv_entry *entry)
6727 unsigned ih_data = entry->src_data[0];
6730 * Try to submit work so SQ_EDC_INFO can be accessed from
6731 * BH. If previous work submission hasn't finished yet
6732 * just print whatever info is possible directly from the ISR.
6734 if (work_pending(&adev->gfx.sq_work.work)) {
6735 gfx_v8_0_parse_sq_irq(adev, ih_data);
6737 adev->gfx.sq_work.ih_data = ih_data;
6738 schedule_work(&adev->gfx.sq_work.work);
6744 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6746 .early_init = gfx_v8_0_early_init,
6747 .late_init = gfx_v8_0_late_init,
6748 .sw_init = gfx_v8_0_sw_init,
6749 .sw_fini = gfx_v8_0_sw_fini,
6750 .hw_init = gfx_v8_0_hw_init,
6751 .hw_fini = gfx_v8_0_hw_fini,
6752 .suspend = gfx_v8_0_suspend,
6753 .resume = gfx_v8_0_resume,
6754 .is_idle = gfx_v8_0_is_idle,
6755 .wait_for_idle = gfx_v8_0_wait_for_idle,
6756 .check_soft_reset = gfx_v8_0_check_soft_reset,
6757 .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6758 .soft_reset = gfx_v8_0_soft_reset,
6759 .post_soft_reset = gfx_v8_0_post_soft_reset,
6760 .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6761 .set_powergating_state = gfx_v8_0_set_powergating_state,
6762 .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6765 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6766 .type = AMDGPU_RING_TYPE_GFX,
6768 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6769 .support_64bit_ptrs = false,
6770 .get_rptr = gfx_v8_0_ring_get_rptr,
6771 .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6772 .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6773 .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6775 7 + /* PIPELINE_SYNC */
6776 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
6777 12 + /* FENCE for VM_FLUSH */
6778 20 + /* GDS switch */
6779 4 + /* double SWITCH_BUFFER,
6780 the first COND_EXEC jump to the place just
6781 prior to this double SWITCH_BUFFER */
6789 12 + 12 + /* FENCE x2 */
6790 2, /* SWITCH_BUFFER */
6791 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6792 .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6793 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6794 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6795 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6796 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6797 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6798 .test_ring = gfx_v8_0_ring_test_ring,
6799 .test_ib = gfx_v8_0_ring_test_ib,
6800 .insert_nop = amdgpu_ring_insert_nop,
6801 .pad_ib = amdgpu_ring_generic_pad_ib,
6802 .emit_switch_buffer = gfx_v8_ring_emit_sb,
6803 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6804 .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6805 .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6806 .emit_wreg = gfx_v8_0_ring_emit_wreg,
6807 .soft_recovery = gfx_v8_0_ring_soft_recovery,
6810 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6811 .type = AMDGPU_RING_TYPE_COMPUTE,
6813 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6814 .support_64bit_ptrs = false,
6815 .get_rptr = gfx_v8_0_ring_get_rptr,
6816 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6817 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6819 20 + /* gfx_v8_0_ring_emit_gds_switch */
6820 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6821 5 + /* hdp_invalidate */
6822 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6823 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
6824 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6825 .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
6826 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6827 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6828 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6829 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6830 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6831 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6832 .test_ring = gfx_v8_0_ring_test_ring,
6833 .test_ib = gfx_v8_0_ring_test_ib,
6834 .insert_nop = amdgpu_ring_insert_nop,
6835 .pad_ib = amdgpu_ring_generic_pad_ib,
6836 .emit_wreg = gfx_v8_0_ring_emit_wreg,
6839 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6840 .type = AMDGPU_RING_TYPE_KIQ,
6842 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6843 .support_64bit_ptrs = false,
6844 .get_rptr = gfx_v8_0_ring_get_rptr,
6845 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6846 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6848 20 + /* gfx_v8_0_ring_emit_gds_switch */
6849 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6850 5 + /* hdp_invalidate */
6851 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6852 17 + /* gfx_v8_0_ring_emit_vm_flush */
6853 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6854 .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
6855 .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6856 .test_ring = gfx_v8_0_ring_test_ring,
6857 .insert_nop = amdgpu_ring_insert_nop,
6858 .pad_ib = amdgpu_ring_generic_pad_ib,
6859 .emit_rreg = gfx_v8_0_ring_emit_rreg,
6860 .emit_wreg = gfx_v8_0_ring_emit_wreg,
6863 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6867 adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
6869 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6870 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6872 for (i = 0; i < adev->gfx.num_compute_rings; i++)
6873 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6876 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6877 .set = gfx_v8_0_set_eop_interrupt_state,
6878 .process = gfx_v8_0_eop_irq,
6881 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6882 .set = gfx_v8_0_set_priv_reg_fault_state,
6883 .process = gfx_v8_0_priv_reg_irq,
6886 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6887 .set = gfx_v8_0_set_priv_inst_fault_state,
6888 .process = gfx_v8_0_priv_inst_irq,
6891 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
6892 .set = gfx_v8_0_set_cp_ecc_int_state,
6893 .process = gfx_v8_0_cp_ecc_error_irq,
6896 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
6897 .set = gfx_v8_0_set_sq_int_state,
6898 .process = gfx_v8_0_sq_irq,
6901 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6903 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6904 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6906 adev->gfx.priv_reg_irq.num_types = 1;
6907 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6909 adev->gfx.priv_inst_irq.num_types = 1;
6910 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6912 adev->gfx.cp_ecc_error_irq.num_types = 1;
6913 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
6915 adev->gfx.sq_irq.num_types = 1;
6916 adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
6919 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6921 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6924 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6926 /* init asci gds info */
6927 adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
6928 adev->gds.gws_size = 64;
6929 adev->gds.oa_size = 16;
6930 adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
6933 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6941 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6942 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6944 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
6947 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6951 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
6952 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6954 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6956 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
6959 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6961 int i, j, k, counter, active_cu_number = 0;
6962 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6963 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6964 unsigned disable_masks[4 * 2];
6967 memset(cu_info, 0, sizeof(*cu_info));
6969 if (adev->flags & AMD_IS_APU)
6972 ao_cu_num = adev->gfx.config.max_cu_per_sh;
6974 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
6976 mutex_lock(&adev->grbm_idx_mutex);
6977 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6978 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6982 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
6984 gfx_v8_0_set_user_cu_inactive_bitmap(
6985 adev, disable_masks[i * 2 + j]);
6986 bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6987 cu_info->bitmap[i][j] = bitmap;
6989 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
6990 if (bitmap & mask) {
6991 if (counter < ao_cu_num)
6997 active_cu_number += counter;
6999 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7000 cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7003 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7004 mutex_unlock(&adev->grbm_idx_mutex);
7006 cu_info->number = active_cu_number;
7007 cu_info->ao_cu_mask = ao_cu_mask;
7008 cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7009 cu_info->max_waves_per_simd = 10;
7010 cu_info->max_scratch_slots_per_cu = 32;
7011 cu_info->wave_front_size = 64;
7012 cu_info->lds_size = 64;
7015 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7017 .type = AMD_IP_BLOCK_TYPE_GFX,
7021 .funcs = &gfx_v8_0_ip_funcs,
7024 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7026 .type = AMD_IP_BLOCK_TYPE_GFX,
7030 .funcs = &gfx_v8_0_ip_funcs,
7033 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7035 uint64_t ce_payload_addr;
7038 struct vi_ce_ib_state regular;
7039 struct vi_ce_ib_state_chained_ib chained;
7042 if (ring->adev->virt.chained_ib_support) {
7043 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7044 offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7045 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7047 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7048 offsetof(struct vi_gfx_meta_data, ce_payload);
7049 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7052 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7053 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7054 WRITE_DATA_DST_SEL(8) |
7056 WRITE_DATA_CACHE_POLICY(0));
7057 amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7058 amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7059 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7062 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7064 uint64_t de_payload_addr, gds_addr, csa_addr;
7067 struct vi_de_ib_state regular;
7068 struct vi_de_ib_state_chained_ib chained;
7071 csa_addr = amdgpu_csa_vaddr(ring->adev);
7072 gds_addr = csa_addr + 4096;
7073 if (ring->adev->virt.chained_ib_support) {
7074 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7075 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7076 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7077 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7079 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7080 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7081 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7082 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7085 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7086 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7087 WRITE_DATA_DST_SEL(8) |
7089 WRITE_DATA_CACHE_POLICY(0));
7090 amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7091 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7092 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);