2 * Copyright 2014 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
31 #include "amdgpu_gfx.h"
33 #include "vi_structs.h"
35 #include "amdgpu_ucode.h"
36 #include "amdgpu_atombios.h"
37 #include "atombios_i2c.h"
38 #include "clearstate_vi.h"
40 #include "gmc/gmc_8_2_d.h"
41 #include "gmc/gmc_8_2_sh_mask.h"
43 #include "oss/oss_3_0_d.h"
44 #include "oss/oss_3_0_sh_mask.h"
46 #include "bif/bif_5_0_d.h"
47 #include "bif/bif_5_0_sh_mask.h"
48 #include "gca/gfx_8_0_d.h"
49 #include "gca/gfx_8_0_enum.h"
50 #include "gca/gfx_8_0_sh_mask.h"
52 #include "dce/dce_10_0_d.h"
53 #include "dce/dce_10_0_sh_mask.h"
55 #include "smu/smu_7_1_3_d.h"
57 #include "ivsrcid/ivsrcid_vislands30.h"
59 #define GFX8_NUM_GFX_RINGS 1
60 #define GFX8_MEC_HPD_SIZE 4096
62 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
63 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
64 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
65 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
67 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
68 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
69 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
70 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
71 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
72 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
73 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
74 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
75 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
77 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L
78 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L
79 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L
80 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L
81 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L
82 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L
85 #define SET_BPM_SERDES_CMD 1
86 #define CLE_BPM_SERDES_CMD 0
88 /* BPM Register Address*/
90 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */
91 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */
92 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */
93 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */
94 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */
98 #define RLC_FormatDirectRegListLength 14
102 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
104 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
105 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
106 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
107 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
108 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
109 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
110 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
111 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
112 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
113 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
114 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
115 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
116 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
117 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
118 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
119 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
122 static const u32 golden_settings_tonga_a11[] =
124 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
125 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
126 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
127 mmGB_GPU_ID, 0x0000000f, 0x00000000,
128 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
129 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
130 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
131 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
132 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
133 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
134 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
135 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
136 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
137 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
138 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
139 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
142 static const u32 tonga_golden_common_all[] =
144 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
145 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
146 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
147 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
148 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
149 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
150 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
151 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
154 static const u32 tonga_mgcg_cgcg_init[] =
156 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
157 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
158 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
159 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
160 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
161 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
162 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
163 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
164 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
165 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
166 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
167 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
168 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
169 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
170 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
171 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
172 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
173 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
174 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
175 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
176 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
177 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
178 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
179 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
180 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
181 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
182 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
183 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
184 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
185 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
186 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
187 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
188 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
189 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
190 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
191 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
192 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
193 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
194 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
195 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
196 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
197 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
198 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
199 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
200 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
201 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
202 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
203 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
204 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
205 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
206 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
207 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
208 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
209 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
210 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
211 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
212 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
213 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
214 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
215 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
216 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
217 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
218 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
219 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
220 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
221 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
222 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
223 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
224 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
225 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
226 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
227 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
228 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
229 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
230 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
233 static const u32 golden_settings_vegam_a11[] =
235 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
236 mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
237 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
238 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
239 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
240 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
241 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
242 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
243 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
244 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
245 mmSQ_CONFIG, 0x07f80000, 0x01180000,
246 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
247 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
248 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
249 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
250 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
251 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
254 static const u32 vegam_golden_common_all[] =
256 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
257 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
258 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
259 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
260 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
261 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
264 static const u32 golden_settings_polaris11_a11[] =
266 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
267 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
268 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
269 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
270 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
271 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
272 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
273 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
274 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
275 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
276 mmSQ_CONFIG, 0x07f80000, 0x01180000,
277 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
278 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
279 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
280 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
281 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
282 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
285 static const u32 polaris11_golden_common_all[] =
287 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
288 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
289 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
290 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
291 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
292 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
295 static const u32 golden_settings_polaris10_a11[] =
297 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
298 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
299 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
300 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
301 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
302 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
303 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
304 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
305 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
306 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
307 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
308 mmSQ_CONFIG, 0x07f80000, 0x07180000,
309 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
310 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
311 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
312 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
313 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
316 static const u32 polaris10_golden_common_all[] =
318 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
319 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
320 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
321 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
322 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
323 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
324 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
325 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
328 static const u32 fiji_golden_common_all[] =
330 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
331 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
332 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
333 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
334 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
335 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
336 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
337 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
338 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
339 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
342 static const u32 golden_settings_fiji_a10[] =
344 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
345 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
346 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
347 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
348 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
349 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
350 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
351 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
352 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
353 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
354 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
357 static const u32 fiji_mgcg_cgcg_init[] =
359 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
360 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
361 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
362 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
363 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
364 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
365 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
366 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
367 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
368 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
369 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
370 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
371 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
372 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
373 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
374 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
375 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
376 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
377 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
378 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
379 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
380 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
381 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
382 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
383 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
384 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
385 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
386 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
387 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
388 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
389 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
390 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
391 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
392 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
393 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
396 static const u32 golden_settings_iceland_a11[] =
398 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
399 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
400 mmDB_DEBUG3, 0xc0000000, 0xc0000000,
401 mmGB_GPU_ID, 0x0000000f, 0x00000000,
402 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
403 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
404 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
405 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
406 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
407 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
408 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
409 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
410 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
411 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
412 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
413 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
416 static const u32 iceland_golden_common_all[] =
418 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
419 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
420 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
421 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
422 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
423 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
424 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
425 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
428 static const u32 iceland_mgcg_cgcg_init[] =
430 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
431 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
432 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
433 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
434 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
435 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
436 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
437 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
438 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
439 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
440 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
441 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
442 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
443 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
444 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
445 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
446 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
447 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
448 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
449 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
450 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
451 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
452 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
453 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
454 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
455 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
456 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
457 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
458 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
459 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
460 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
461 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
462 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
463 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
464 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
465 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
466 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
467 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
468 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
469 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
470 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
471 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
472 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
473 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
474 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
475 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
476 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
477 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
478 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
479 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
480 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
481 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
482 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
483 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
484 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
485 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
486 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
487 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
488 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
489 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
490 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
491 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
492 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
493 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
496 static const u32 cz_golden_settings_a11[] =
498 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
499 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
500 mmGB_GPU_ID, 0x0000000f, 0x00000000,
501 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
502 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
503 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
504 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
505 mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
506 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
507 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
508 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
509 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
512 static const u32 cz_golden_common_all[] =
514 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
515 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
516 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
517 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
518 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
519 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
520 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
521 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
524 static const u32 cz_mgcg_cgcg_init[] =
526 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
527 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
528 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
529 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
530 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
531 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
532 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
533 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
534 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
535 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
536 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
537 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
538 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
539 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
540 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
541 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
542 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
543 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
544 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
545 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
546 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
547 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
548 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
549 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
550 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
551 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
552 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
553 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
554 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
555 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
556 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
557 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
558 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
559 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
560 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
561 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
562 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
563 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
564 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
565 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
566 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
567 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
568 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
569 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
570 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
571 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
572 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
573 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
574 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
575 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
576 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
577 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
578 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
579 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
580 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
581 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
582 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
583 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
584 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
585 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
586 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
587 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
588 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
589 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
590 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
591 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
592 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
593 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
594 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
595 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
596 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
597 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
598 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
599 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
600 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
603 static const u32 stoney_golden_settings_a11[] =
605 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
606 mmGB_GPU_ID, 0x0000000f, 0x00000000,
607 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
608 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
609 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
610 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
611 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
612 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
613 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
614 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
617 static const u32 stoney_golden_common_all[] =
619 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
620 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
621 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
622 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
623 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
624 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
625 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
626 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
629 static const u32 stoney_mgcg_cgcg_init[] =
631 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
632 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
633 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
634 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
635 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
639 static const char * const sq_edc_source_names[] = {
640 "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
641 "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
642 "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
643 "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
644 "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
645 "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
646 "SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
649 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
650 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
651 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
652 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
653 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
654 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
655 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
656 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
658 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
660 switch (adev->asic_type) {
662 amdgpu_device_program_register_sequence(adev,
663 iceland_mgcg_cgcg_init,
664 ARRAY_SIZE(iceland_mgcg_cgcg_init));
665 amdgpu_device_program_register_sequence(adev,
666 golden_settings_iceland_a11,
667 ARRAY_SIZE(golden_settings_iceland_a11));
668 amdgpu_device_program_register_sequence(adev,
669 iceland_golden_common_all,
670 ARRAY_SIZE(iceland_golden_common_all));
673 amdgpu_device_program_register_sequence(adev,
675 ARRAY_SIZE(fiji_mgcg_cgcg_init));
676 amdgpu_device_program_register_sequence(adev,
677 golden_settings_fiji_a10,
678 ARRAY_SIZE(golden_settings_fiji_a10));
679 amdgpu_device_program_register_sequence(adev,
680 fiji_golden_common_all,
681 ARRAY_SIZE(fiji_golden_common_all));
685 amdgpu_device_program_register_sequence(adev,
686 tonga_mgcg_cgcg_init,
687 ARRAY_SIZE(tonga_mgcg_cgcg_init));
688 amdgpu_device_program_register_sequence(adev,
689 golden_settings_tonga_a11,
690 ARRAY_SIZE(golden_settings_tonga_a11));
691 amdgpu_device_program_register_sequence(adev,
692 tonga_golden_common_all,
693 ARRAY_SIZE(tonga_golden_common_all));
696 amdgpu_device_program_register_sequence(adev,
697 golden_settings_vegam_a11,
698 ARRAY_SIZE(golden_settings_vegam_a11));
699 amdgpu_device_program_register_sequence(adev,
700 vegam_golden_common_all,
701 ARRAY_SIZE(vegam_golden_common_all));
705 amdgpu_device_program_register_sequence(adev,
706 golden_settings_polaris11_a11,
707 ARRAY_SIZE(golden_settings_polaris11_a11));
708 amdgpu_device_program_register_sequence(adev,
709 polaris11_golden_common_all,
710 ARRAY_SIZE(polaris11_golden_common_all));
713 amdgpu_device_program_register_sequence(adev,
714 golden_settings_polaris10_a11,
715 ARRAY_SIZE(golden_settings_polaris10_a11));
716 amdgpu_device_program_register_sequence(adev,
717 polaris10_golden_common_all,
718 ARRAY_SIZE(polaris10_golden_common_all));
719 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
720 if (adev->pdev->revision == 0xc7 &&
721 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
722 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
723 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
724 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
725 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
729 amdgpu_device_program_register_sequence(adev,
731 ARRAY_SIZE(cz_mgcg_cgcg_init));
732 amdgpu_device_program_register_sequence(adev,
733 cz_golden_settings_a11,
734 ARRAY_SIZE(cz_golden_settings_a11));
735 amdgpu_device_program_register_sequence(adev,
736 cz_golden_common_all,
737 ARRAY_SIZE(cz_golden_common_all));
740 amdgpu_device_program_register_sequence(adev,
741 stoney_mgcg_cgcg_init,
742 ARRAY_SIZE(stoney_mgcg_cgcg_init));
743 amdgpu_device_program_register_sequence(adev,
744 stoney_golden_settings_a11,
745 ARRAY_SIZE(stoney_golden_settings_a11));
746 amdgpu_device_program_register_sequence(adev,
747 stoney_golden_common_all,
748 ARRAY_SIZE(stoney_golden_common_all));
755 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
757 adev->gfx.scratch.num_reg = 8;
758 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
759 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
762 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
764 struct amdgpu_device *adev = ring->adev;
770 r = amdgpu_gfx_scratch_get(adev, &scratch);
774 WREG32(scratch, 0xCAFEDEAD);
775 r = amdgpu_ring_alloc(ring, 3);
777 goto error_free_scratch;
779 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
780 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
781 amdgpu_ring_write(ring, 0xDEADBEEF);
782 amdgpu_ring_commit(ring);
784 for (i = 0; i < adev->usec_timeout; i++) {
785 tmp = RREG32(scratch);
786 if (tmp == 0xDEADBEEF)
791 if (i >= adev->usec_timeout)
795 amdgpu_gfx_scratch_free(adev, scratch);
799 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
801 struct amdgpu_device *adev = ring->adev;
803 struct dma_fence *f = NULL;
810 r = amdgpu_device_wb_get(adev, &index);
814 gpu_addr = adev->wb.gpu_addr + (index * 4);
815 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
816 memset(&ib, 0, sizeof(ib));
817 r = amdgpu_ib_get(adev, NULL, 16, &ib);
821 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
822 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
823 ib.ptr[2] = lower_32_bits(gpu_addr);
824 ib.ptr[3] = upper_32_bits(gpu_addr);
825 ib.ptr[4] = 0xDEADBEEF;
828 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
832 r = dma_fence_wait_timeout(f, false, timeout);
840 tmp = adev->wb.wb[index];
841 if (tmp == 0xDEADBEEF)
847 amdgpu_ib_free(adev, &ib, NULL);
850 amdgpu_device_wb_free(adev, index);
855 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
857 release_firmware(adev->gfx.pfp_fw);
858 adev->gfx.pfp_fw = NULL;
859 release_firmware(adev->gfx.me_fw);
860 adev->gfx.me_fw = NULL;
861 release_firmware(adev->gfx.ce_fw);
862 adev->gfx.ce_fw = NULL;
863 release_firmware(adev->gfx.rlc_fw);
864 adev->gfx.rlc_fw = NULL;
865 release_firmware(adev->gfx.mec_fw);
866 adev->gfx.mec_fw = NULL;
867 if ((adev->asic_type != CHIP_STONEY) &&
868 (adev->asic_type != CHIP_TOPAZ))
869 release_firmware(adev->gfx.mec2_fw);
870 adev->gfx.mec2_fw = NULL;
872 kfree(adev->gfx.rlc.register_list_format);
875 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
877 const char *chip_name;
880 struct amdgpu_firmware_info *info = NULL;
881 const struct common_firmware_header *header = NULL;
882 const struct gfx_firmware_header_v1_0 *cp_hdr;
883 const struct rlc_firmware_header_v2_0 *rlc_hdr;
884 unsigned int *tmp = NULL, i;
888 switch (adev->asic_type) {
896 chip_name = "carrizo";
902 chip_name = "stoney";
905 chip_name = "polaris10";
908 chip_name = "polaris11";
911 chip_name = "polaris12";
920 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
921 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
922 err = reject_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
923 if (err == -ENOENT) {
924 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
925 err = reject_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
928 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
929 err = reject_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
933 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
936 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
937 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
938 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
940 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
941 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
942 err = reject_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
943 if (err == -ENOENT) {
944 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
945 err = reject_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
948 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
949 err = reject_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
953 err = amdgpu_ucode_validate(adev->gfx.me_fw);
956 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
957 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
959 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
961 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
962 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
963 err = reject_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
964 if (err == -ENOENT) {
965 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
966 err = reject_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
969 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
970 err = reject_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
974 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
977 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
978 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
979 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
982 * Support for MCBP/Virtualization in combination with chained IBs is
983 * formal released on feature version #46
985 if (adev->gfx.ce_feature_version >= 46 &&
986 adev->gfx.pfp_feature_version >= 46) {
987 adev->virt.chained_ib_support = true;
988 DRM_INFO("Chained IB support enabled!\n");
990 adev->virt.chained_ib_support = false;
992 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
993 err = reject_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
996 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
997 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
998 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
999 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1001 adev->gfx.rlc.save_and_restore_offset =
1002 le32_to_cpu(rlc_hdr->save_and_restore_offset);
1003 adev->gfx.rlc.clear_state_descriptor_offset =
1004 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1005 adev->gfx.rlc.avail_scratch_ram_locations =
1006 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1007 adev->gfx.rlc.reg_restore_list_size =
1008 le32_to_cpu(rlc_hdr->reg_restore_list_size);
1009 adev->gfx.rlc.reg_list_format_start =
1010 le32_to_cpu(rlc_hdr->reg_list_format_start);
1011 adev->gfx.rlc.reg_list_format_separate_start =
1012 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1013 adev->gfx.rlc.starting_offsets_start =
1014 le32_to_cpu(rlc_hdr->starting_offsets_start);
1015 adev->gfx.rlc.reg_list_format_size_bytes =
1016 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1017 adev->gfx.rlc.reg_list_size_bytes =
1018 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1020 adev->gfx.rlc.register_list_format =
1021 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1022 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1024 if (!adev->gfx.rlc.register_list_format) {
1029 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1030 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1031 for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1032 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1034 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1036 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1037 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1038 for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1039 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1041 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1042 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1043 err = reject_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1044 if (err == -ENOENT) {
1045 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1046 err = reject_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1049 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1050 err = reject_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1054 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1057 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1058 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1059 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1061 if ((adev->asic_type != CHIP_STONEY) &&
1062 (adev->asic_type != CHIP_TOPAZ)) {
1063 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1064 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1065 err = reject_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1066 if (err == -ENOENT) {
1067 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1068 err = reject_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1071 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1072 err = reject_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1075 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1078 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1079 adev->gfx.mec2_fw->data;
1080 adev->gfx.mec2_fw_version =
1081 le32_to_cpu(cp_hdr->header.ucode_version);
1082 adev->gfx.mec2_feature_version =
1083 le32_to_cpu(cp_hdr->ucode_feature_version);
1086 adev->gfx.mec2_fw = NULL;
1090 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1091 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1092 info->fw = adev->gfx.pfp_fw;
1093 header = (const struct common_firmware_header *)info->fw->data;
1094 adev->firmware.fw_size +=
1095 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1097 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1098 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1099 info->fw = adev->gfx.me_fw;
1100 header = (const struct common_firmware_header *)info->fw->data;
1101 adev->firmware.fw_size +=
1102 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1104 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1105 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1106 info->fw = adev->gfx.ce_fw;
1107 header = (const struct common_firmware_header *)info->fw->data;
1108 adev->firmware.fw_size +=
1109 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1111 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1112 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1113 info->fw = adev->gfx.rlc_fw;
1114 header = (const struct common_firmware_header *)info->fw->data;
1115 adev->firmware.fw_size +=
1116 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1118 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1119 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1120 info->fw = adev->gfx.mec_fw;
1121 header = (const struct common_firmware_header *)info->fw->data;
1122 adev->firmware.fw_size +=
1123 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1125 /* we need account JT in */
1126 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1127 adev->firmware.fw_size +=
1128 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1130 if (amdgpu_sriov_vf(adev)) {
1131 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1132 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1133 info->fw = adev->gfx.mec_fw;
1134 adev->firmware.fw_size +=
1135 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1138 if (adev->gfx.mec2_fw) {
1139 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1140 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1141 info->fw = adev->gfx.mec2_fw;
1142 header = (const struct common_firmware_header *)info->fw->data;
1143 adev->firmware.fw_size +=
1144 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1150 "gfx8: Failed to load firmware \"%s\"\n",
1152 release_firmware(adev->gfx.pfp_fw);
1153 adev->gfx.pfp_fw = NULL;
1154 release_firmware(adev->gfx.me_fw);
1155 adev->gfx.me_fw = NULL;
1156 release_firmware(adev->gfx.ce_fw);
1157 adev->gfx.ce_fw = NULL;
1158 release_firmware(adev->gfx.rlc_fw);
1159 adev->gfx.rlc_fw = NULL;
1160 release_firmware(adev->gfx.mec_fw);
1161 adev->gfx.mec_fw = NULL;
1162 release_firmware(adev->gfx.mec2_fw);
1163 adev->gfx.mec2_fw = NULL;
1168 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1169 volatile u32 *buffer)
1172 const struct cs_section_def *sect = NULL;
1173 const struct cs_extent_def *ext = NULL;
1175 if (adev->gfx.rlc.cs_data == NULL)
1180 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1181 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1183 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1184 buffer[count++] = cpu_to_le32(0x80000000);
1185 buffer[count++] = cpu_to_le32(0x80000000);
1187 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1188 for (ext = sect->section; ext->extent != NULL; ++ext) {
1189 if (sect->id == SECT_CONTEXT) {
1191 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1192 buffer[count++] = cpu_to_le32(ext->reg_index -
1193 PACKET3_SET_CONTEXT_REG_START);
1194 for (i = 0; i < ext->reg_count; i++)
1195 buffer[count++] = cpu_to_le32(ext->extent[i]);
1202 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1203 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1204 PACKET3_SET_CONTEXT_REG_START);
1205 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1206 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1208 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1209 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1211 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1212 buffer[count++] = cpu_to_le32(0);
1215 static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
1217 if (adev->asic_type == CHIP_CARRIZO)
1223 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1225 const struct cs_section_def *cs_data;
1228 adev->gfx.rlc.cs_data = vi_cs_data;
1230 cs_data = adev->gfx.rlc.cs_data;
1233 /* init clear state block */
1234 r = amdgpu_gfx_rlc_init_csb(adev);
1239 if ((adev->asic_type == CHIP_CARRIZO) ||
1240 (adev->asic_type == CHIP_STONEY)) {
1241 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1242 r = amdgpu_gfx_rlc_init_cpt(adev);
1250 static int gfx_v8_0_csb_vram_pin(struct amdgpu_device *adev)
1254 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1255 if (unlikely(r != 0))
1258 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
1259 AMDGPU_GEM_DOMAIN_VRAM);
1261 adev->gfx.rlc.clear_state_gpu_addr =
1262 amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
1264 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1269 static void gfx_v8_0_csb_vram_unpin(struct amdgpu_device *adev)
1273 if (!adev->gfx.rlc.clear_state_obj)
1276 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
1277 if (likely(r == 0)) {
1278 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1279 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1283 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1285 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1288 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1292 size_t mec_hpd_size;
1294 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1296 /* take ownership of the relevant compute queues */
1297 amdgpu_gfx_compute_queue_acquire(adev);
1299 mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1301 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1302 AMDGPU_GEM_DOMAIN_VRAM,
1303 &adev->gfx.mec.hpd_eop_obj,
1304 &adev->gfx.mec.hpd_eop_gpu_addr,
1307 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1311 memset(hpd, 0, mec_hpd_size);
1313 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1314 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1319 static const u32 vgpr_init_compute_shader[] =
1321 0x7e000209, 0x7e020208,
1322 0x7e040207, 0x7e060206,
1323 0x7e080205, 0x7e0a0204,
1324 0x7e0c0203, 0x7e0e0202,
1325 0x7e100201, 0x7e120200,
1326 0x7e140209, 0x7e160208,
1327 0x7e180207, 0x7e1a0206,
1328 0x7e1c0205, 0x7e1e0204,
1329 0x7e200203, 0x7e220202,
1330 0x7e240201, 0x7e260200,
1331 0x7e280209, 0x7e2a0208,
1332 0x7e2c0207, 0x7e2e0206,
1333 0x7e300205, 0x7e320204,
1334 0x7e340203, 0x7e360202,
1335 0x7e380201, 0x7e3a0200,
1336 0x7e3c0209, 0x7e3e0208,
1337 0x7e400207, 0x7e420206,
1338 0x7e440205, 0x7e460204,
1339 0x7e480203, 0x7e4a0202,
1340 0x7e4c0201, 0x7e4e0200,
1341 0x7e500209, 0x7e520208,
1342 0x7e540207, 0x7e560206,
1343 0x7e580205, 0x7e5a0204,
1344 0x7e5c0203, 0x7e5e0202,
1345 0x7e600201, 0x7e620200,
1346 0x7e640209, 0x7e660208,
1347 0x7e680207, 0x7e6a0206,
1348 0x7e6c0205, 0x7e6e0204,
1349 0x7e700203, 0x7e720202,
1350 0x7e740201, 0x7e760200,
1351 0x7e780209, 0x7e7a0208,
1352 0x7e7c0207, 0x7e7e0206,
1353 0xbf8a0000, 0xbf810000,
1356 static const u32 sgpr_init_compute_shader[] =
1358 0xbe8a0100, 0xbe8c0102,
1359 0xbe8e0104, 0xbe900106,
1360 0xbe920108, 0xbe940100,
1361 0xbe960102, 0xbe980104,
1362 0xbe9a0106, 0xbe9c0108,
1363 0xbe9e0100, 0xbea00102,
1364 0xbea20104, 0xbea40106,
1365 0xbea60108, 0xbea80100,
1366 0xbeaa0102, 0xbeac0104,
1367 0xbeae0106, 0xbeb00108,
1368 0xbeb20100, 0xbeb40102,
1369 0xbeb60104, 0xbeb80106,
1370 0xbeba0108, 0xbebc0100,
1371 0xbebe0102, 0xbec00104,
1372 0xbec20106, 0xbec40108,
1373 0xbec60100, 0xbec80102,
1374 0xbee60004, 0xbee70005,
1375 0xbeea0006, 0xbeeb0007,
1376 0xbee80008, 0xbee90009,
1377 0xbefc0000, 0xbf8a0000,
1378 0xbf810000, 0x00000000,
1381 static const u32 vgpr_init_regs[] =
1383 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1384 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1385 mmCOMPUTE_NUM_THREAD_X, 256*4,
1386 mmCOMPUTE_NUM_THREAD_Y, 1,
1387 mmCOMPUTE_NUM_THREAD_Z, 1,
1388 mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1389 mmCOMPUTE_PGM_RSRC2, 20,
1390 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1391 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1392 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1393 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1394 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1395 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1396 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1397 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1398 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1399 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1402 static const u32 sgpr1_init_regs[] =
1404 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1405 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1406 mmCOMPUTE_NUM_THREAD_X, 256*5,
1407 mmCOMPUTE_NUM_THREAD_Y, 1,
1408 mmCOMPUTE_NUM_THREAD_Z, 1,
1409 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1410 mmCOMPUTE_PGM_RSRC2, 20,
1411 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1412 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1413 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1414 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1415 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1416 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1417 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1418 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1419 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1420 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1423 static const u32 sgpr2_init_regs[] =
1425 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1426 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1427 mmCOMPUTE_NUM_THREAD_X, 256*5,
1428 mmCOMPUTE_NUM_THREAD_Y, 1,
1429 mmCOMPUTE_NUM_THREAD_Z, 1,
1430 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1431 mmCOMPUTE_PGM_RSRC2, 20,
1432 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1433 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1434 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1435 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1436 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1437 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1438 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1439 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1440 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1441 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1444 static const u32 sec_ded_counter_registers[] =
1447 mmCPC_EDC_SCRATCH_CNT,
1448 mmCPC_EDC_UCODE_CNT,
1455 mmDC_EDC_CSINVOC_CNT,
1456 mmDC_EDC_RESTORE_CNT,
1462 mmSQC_ATC_EDC_GATCL1_CNT,
1468 mmTCP_ATC_EDC_GATCL1_CNT,
1473 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1475 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1476 struct amdgpu_ib ib;
1477 struct dma_fence *f = NULL;
1480 unsigned total_size, vgpr_offset, sgpr_offset;
1483 /* only supported on CZ */
1484 if (adev->asic_type != CHIP_CARRIZO)
1487 /* bail if the compute ring is not ready */
1488 if (!ring->sched.ready)
1491 tmp = RREG32(mmGB_EDC_MODE);
1492 WREG32(mmGB_EDC_MODE, 0);
1495 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1497 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1499 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1500 total_size = ALIGN(total_size, 256);
1501 vgpr_offset = total_size;
1502 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1503 sgpr_offset = total_size;
1504 total_size += sizeof(sgpr_init_compute_shader);
1506 /* allocate an indirect buffer to put the commands in */
1507 memset(&ib, 0, sizeof(ib));
1508 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1510 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1514 /* load the compute shaders */
1515 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1516 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1518 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1519 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1521 /* init the ib length to 0 */
1525 /* write the register state for the compute dispatch */
1526 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1527 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1528 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1529 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1531 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1532 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1533 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1534 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1535 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1536 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1538 /* write dispatch packet */
1539 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1540 ib.ptr[ib.length_dw++] = 8; /* x */
1541 ib.ptr[ib.length_dw++] = 1; /* y */
1542 ib.ptr[ib.length_dw++] = 1; /* z */
1543 ib.ptr[ib.length_dw++] =
1544 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1546 /* write CS partial flush packet */
1547 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1548 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1551 /* write the register state for the compute dispatch */
1552 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1553 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1554 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1555 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1557 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1558 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1559 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1560 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1561 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1562 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1564 /* write dispatch packet */
1565 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1566 ib.ptr[ib.length_dw++] = 8; /* x */
1567 ib.ptr[ib.length_dw++] = 1; /* y */
1568 ib.ptr[ib.length_dw++] = 1; /* z */
1569 ib.ptr[ib.length_dw++] =
1570 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1572 /* write CS partial flush packet */
1573 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1574 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1577 /* write the register state for the compute dispatch */
1578 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1579 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1580 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1581 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1583 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1584 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1585 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1586 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1587 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1588 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1590 /* write dispatch packet */
1591 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1592 ib.ptr[ib.length_dw++] = 8; /* x */
1593 ib.ptr[ib.length_dw++] = 1; /* y */
1594 ib.ptr[ib.length_dw++] = 1; /* z */
1595 ib.ptr[ib.length_dw++] =
1596 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1598 /* write CS partial flush packet */
1599 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1600 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1602 /* shedule the ib on the ring */
1603 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1605 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1609 /* wait for the GPU to finish processing the IB */
1610 r = dma_fence_wait(f, false);
1612 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1616 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1617 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1618 WREG32(mmGB_EDC_MODE, tmp);
1620 tmp = RREG32(mmCC_GC_EDC_CONFIG);
1621 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1622 WREG32(mmCC_GC_EDC_CONFIG, tmp);
1625 /* read back registers to clear the counters */
1626 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1627 RREG32(sec_ded_counter_registers[i]);
1630 amdgpu_ib_free(adev, &ib, NULL);
1636 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1639 u32 mc_shared_chmap, mc_arb_ramcfg;
1640 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1644 switch (adev->asic_type) {
1646 adev->gfx.config.max_shader_engines = 1;
1647 adev->gfx.config.max_tile_pipes = 2;
1648 adev->gfx.config.max_cu_per_sh = 6;
1649 adev->gfx.config.max_sh_per_se = 1;
1650 adev->gfx.config.max_backends_per_se = 2;
1651 adev->gfx.config.max_texture_channel_caches = 2;
1652 adev->gfx.config.max_gprs = 256;
1653 adev->gfx.config.max_gs_threads = 32;
1654 adev->gfx.config.max_hw_contexts = 8;
1656 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1657 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1658 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1659 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1660 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1663 adev->gfx.config.max_shader_engines = 4;
1664 adev->gfx.config.max_tile_pipes = 16;
1665 adev->gfx.config.max_cu_per_sh = 16;
1666 adev->gfx.config.max_sh_per_se = 1;
1667 adev->gfx.config.max_backends_per_se = 4;
1668 adev->gfx.config.max_texture_channel_caches = 16;
1669 adev->gfx.config.max_gprs = 256;
1670 adev->gfx.config.max_gs_threads = 32;
1671 adev->gfx.config.max_hw_contexts = 8;
1673 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1674 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1675 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1676 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1677 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1679 case CHIP_POLARIS11:
1680 case CHIP_POLARIS12:
1681 ret = amdgpu_atombios_get_gfx_info(adev);
1684 adev->gfx.config.max_gprs = 256;
1685 adev->gfx.config.max_gs_threads = 32;
1686 adev->gfx.config.max_hw_contexts = 8;
1688 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1689 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1690 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1691 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1692 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1694 case CHIP_POLARIS10:
1696 ret = amdgpu_atombios_get_gfx_info(adev);
1699 adev->gfx.config.max_gprs = 256;
1700 adev->gfx.config.max_gs_threads = 32;
1701 adev->gfx.config.max_hw_contexts = 8;
1703 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1704 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1705 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1706 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1707 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1710 adev->gfx.config.max_shader_engines = 4;
1711 adev->gfx.config.max_tile_pipes = 8;
1712 adev->gfx.config.max_cu_per_sh = 8;
1713 adev->gfx.config.max_sh_per_se = 1;
1714 adev->gfx.config.max_backends_per_se = 2;
1715 adev->gfx.config.max_texture_channel_caches = 8;
1716 adev->gfx.config.max_gprs = 256;
1717 adev->gfx.config.max_gs_threads = 32;
1718 adev->gfx.config.max_hw_contexts = 8;
1720 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1721 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1722 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1723 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1724 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1727 adev->gfx.config.max_shader_engines = 1;
1728 adev->gfx.config.max_tile_pipes = 2;
1729 adev->gfx.config.max_sh_per_se = 1;
1730 adev->gfx.config.max_backends_per_se = 2;
1731 adev->gfx.config.max_cu_per_sh = 8;
1732 adev->gfx.config.max_texture_channel_caches = 2;
1733 adev->gfx.config.max_gprs = 256;
1734 adev->gfx.config.max_gs_threads = 32;
1735 adev->gfx.config.max_hw_contexts = 8;
1737 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1738 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1739 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1740 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1741 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1744 adev->gfx.config.max_shader_engines = 1;
1745 adev->gfx.config.max_tile_pipes = 2;
1746 adev->gfx.config.max_sh_per_se = 1;
1747 adev->gfx.config.max_backends_per_se = 1;
1748 adev->gfx.config.max_cu_per_sh = 3;
1749 adev->gfx.config.max_texture_channel_caches = 2;
1750 adev->gfx.config.max_gprs = 256;
1751 adev->gfx.config.max_gs_threads = 16;
1752 adev->gfx.config.max_hw_contexts = 8;
1754 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1755 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1756 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1757 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1758 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1761 adev->gfx.config.max_shader_engines = 2;
1762 adev->gfx.config.max_tile_pipes = 4;
1763 adev->gfx.config.max_cu_per_sh = 2;
1764 adev->gfx.config.max_sh_per_se = 1;
1765 adev->gfx.config.max_backends_per_se = 2;
1766 adev->gfx.config.max_texture_channel_caches = 4;
1767 adev->gfx.config.max_gprs = 256;
1768 adev->gfx.config.max_gs_threads = 32;
1769 adev->gfx.config.max_hw_contexts = 8;
1771 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1772 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1773 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1774 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1775 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1779 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1780 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1781 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1783 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1784 adev->gfx.config.mem_max_burst_length_bytes = 256;
1785 if (adev->flags & AMD_IS_APU) {
1786 /* Get memory bank mapping mode. */
1787 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1788 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1789 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1791 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1792 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1793 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1795 /* Validate settings in case only one DIMM installed. */
1796 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1797 dimm00_addr_map = 0;
1798 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1799 dimm01_addr_map = 0;
1800 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1801 dimm10_addr_map = 0;
1802 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1803 dimm11_addr_map = 0;
1805 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1806 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1807 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1808 adev->gfx.config.mem_row_size_in_kb = 2;
1810 adev->gfx.config.mem_row_size_in_kb = 1;
1812 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1813 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1814 if (adev->gfx.config.mem_row_size_in_kb > 4)
1815 adev->gfx.config.mem_row_size_in_kb = 4;
1818 adev->gfx.config.shader_engine_tile_size = 32;
1819 adev->gfx.config.num_gpus = 1;
1820 adev->gfx.config.multi_gpu_tile_size = 64;
1822 /* fix up row size */
1823 switch (adev->gfx.config.mem_row_size_in_kb) {
1826 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1829 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1832 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1835 adev->gfx.config.gb_addr_config = gb_addr_config;
1840 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1841 int mec, int pipe, int queue)
1845 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1847 ring = &adev->gfx.compute_ring[ring_id];
1852 ring->queue = queue;
1854 ring->ring_obj = NULL;
1855 ring->use_doorbell = true;
1856 ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
1857 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1858 + (ring_id * GFX8_MEC_HPD_SIZE);
1859 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1861 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1862 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1865 /* type-2 packets are deprecated on MEC, use type-3 instead */
1866 r = amdgpu_ring_init(adev, ring, 1024,
1867 &adev->gfx.eop_irq, irq_type);
1875 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
1877 static int gfx_v8_0_sw_init(void *handle)
1879 int i, j, k, r, ring_id;
1880 struct amdgpu_ring *ring;
1881 struct amdgpu_kiq *kiq;
1882 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1884 switch (adev->asic_type) {
1888 case CHIP_POLARIS10:
1889 case CHIP_POLARIS11:
1890 case CHIP_POLARIS12:
1892 adev->gfx.mec.num_mec = 2;
1897 adev->gfx.mec.num_mec = 1;
1901 adev->gfx.mec.num_pipe_per_mec = 4;
1902 adev->gfx.mec.num_queue_per_pipe = 8;
1905 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
1909 /* Privileged reg */
1910 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
1911 &adev->gfx.priv_reg_irq);
1915 /* Privileged inst */
1916 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
1917 &adev->gfx.priv_inst_irq);
1921 /* Add CP EDC/ECC irq */
1922 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
1923 &adev->gfx.cp_ecc_error_irq);
1927 /* SQ interrupts. */
1928 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
1931 DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
1935 INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
1937 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1939 gfx_v8_0_scratch_init(adev);
1941 r = gfx_v8_0_init_microcode(adev);
1943 DRM_ERROR("Failed to load gfx firmware!\n");
1947 r = adev->gfx.rlc.funcs->init(adev);
1949 DRM_ERROR("Failed to init rlc BOs!\n");
1953 r = gfx_v8_0_mec_init(adev);
1955 DRM_ERROR("Failed to init MEC BOs!\n");
1959 /* set up the gfx ring */
1960 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1961 ring = &adev->gfx.gfx_ring[i];
1962 ring->ring_obj = NULL;
1963 sprintf(ring->name, "gfx");
1964 /* no gfx doorbells on iceland */
1965 if (adev->asic_type != CHIP_TOPAZ) {
1966 ring->use_doorbell = true;
1967 ring->doorbell_index = adev->doorbell_index.gfx_ring0;
1970 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
1971 AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
1977 /* set up the compute queues - allocate horizontally across pipes */
1979 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1980 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1981 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1982 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
1985 r = gfx_v8_0_compute_ring_init(adev,
1996 r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
1998 DRM_ERROR("Failed to init KIQ BOs!\n");
2002 kiq = &adev->gfx.kiq;
2003 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2007 /* create MQD for all compute queues as well as KIQ for SRIOV case */
2008 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2012 adev->gfx.ce_ram_size = 0x8000;
2014 r = gfx_v8_0_gpu_early_init(adev);
2021 static int gfx_v8_0_sw_fini(void *handle)
2023 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2026 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2027 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2028 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2029 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2031 amdgpu_gfx_mqd_sw_fini(adev);
2032 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2033 amdgpu_gfx_kiq_fini(adev);
2035 gfx_v8_0_mec_fini(adev);
2036 amdgpu_gfx_rlc_fini(adev);
2037 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2038 &adev->gfx.rlc.clear_state_gpu_addr,
2039 (void **)&adev->gfx.rlc.cs_ptr);
2040 if ((adev->asic_type == CHIP_CARRIZO) ||
2041 (adev->asic_type == CHIP_STONEY)) {
2042 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2043 &adev->gfx.rlc.cp_table_gpu_addr,
2044 (void **)&adev->gfx.rlc.cp_table_ptr);
2046 gfx_v8_0_free_microcode(adev);
2051 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2053 uint32_t *modearray, *mod2array;
2054 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2055 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2058 modearray = adev->gfx.config.tile_mode_array;
2059 mod2array = adev->gfx.config.macrotile_mode_array;
2061 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2062 modearray[reg_offset] = 0;
2064 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2065 mod2array[reg_offset] = 0;
2067 switch (adev->asic_type) {
2069 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2070 PIPE_CONFIG(ADDR_SURF_P2) |
2071 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2072 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2073 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2074 PIPE_CONFIG(ADDR_SURF_P2) |
2075 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2076 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2077 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2078 PIPE_CONFIG(ADDR_SURF_P2) |
2079 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2080 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2081 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2082 PIPE_CONFIG(ADDR_SURF_P2) |
2083 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2084 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2085 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2086 PIPE_CONFIG(ADDR_SURF_P2) |
2087 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2088 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2089 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2090 PIPE_CONFIG(ADDR_SURF_P2) |
2091 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2092 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2093 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2094 PIPE_CONFIG(ADDR_SURF_P2) |
2095 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2096 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2097 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2098 PIPE_CONFIG(ADDR_SURF_P2));
2099 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2100 PIPE_CONFIG(ADDR_SURF_P2) |
2101 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2102 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2103 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2104 PIPE_CONFIG(ADDR_SURF_P2) |
2105 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2106 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2107 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2108 PIPE_CONFIG(ADDR_SURF_P2) |
2109 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2110 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2111 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2112 PIPE_CONFIG(ADDR_SURF_P2) |
2113 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2114 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2115 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2116 PIPE_CONFIG(ADDR_SURF_P2) |
2117 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2118 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2119 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2120 PIPE_CONFIG(ADDR_SURF_P2) |
2121 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2122 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2123 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2124 PIPE_CONFIG(ADDR_SURF_P2) |
2125 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2126 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2127 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2128 PIPE_CONFIG(ADDR_SURF_P2) |
2129 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2130 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2131 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2132 PIPE_CONFIG(ADDR_SURF_P2) |
2133 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2134 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2135 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2136 PIPE_CONFIG(ADDR_SURF_P2) |
2137 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2138 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2139 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2140 PIPE_CONFIG(ADDR_SURF_P2) |
2141 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2142 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2143 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2144 PIPE_CONFIG(ADDR_SURF_P2) |
2145 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2146 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2147 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2148 PIPE_CONFIG(ADDR_SURF_P2) |
2149 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2150 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2151 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2152 PIPE_CONFIG(ADDR_SURF_P2) |
2153 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2154 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2155 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2156 PIPE_CONFIG(ADDR_SURF_P2) |
2157 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2158 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2159 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2160 PIPE_CONFIG(ADDR_SURF_P2) |
2161 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2162 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2163 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2164 PIPE_CONFIG(ADDR_SURF_P2) |
2165 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2166 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2167 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2168 PIPE_CONFIG(ADDR_SURF_P2) |
2169 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2170 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2172 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2173 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2174 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2175 NUM_BANKS(ADDR_SURF_8_BANK));
2176 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2177 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2178 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2179 NUM_BANKS(ADDR_SURF_8_BANK));
2180 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2181 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2182 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2183 NUM_BANKS(ADDR_SURF_8_BANK));
2184 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2185 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2186 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2187 NUM_BANKS(ADDR_SURF_8_BANK));
2188 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2189 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2190 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2191 NUM_BANKS(ADDR_SURF_8_BANK));
2192 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2193 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2194 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2195 NUM_BANKS(ADDR_SURF_8_BANK));
2196 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2197 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2198 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2199 NUM_BANKS(ADDR_SURF_8_BANK));
2200 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2201 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2202 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2203 NUM_BANKS(ADDR_SURF_16_BANK));
2204 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2205 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2206 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2207 NUM_BANKS(ADDR_SURF_16_BANK));
2208 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2209 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2210 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2211 NUM_BANKS(ADDR_SURF_16_BANK));
2212 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2213 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2214 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2215 NUM_BANKS(ADDR_SURF_16_BANK));
2216 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2217 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2218 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2219 NUM_BANKS(ADDR_SURF_16_BANK));
2220 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2221 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2222 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2223 NUM_BANKS(ADDR_SURF_16_BANK));
2224 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2225 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2226 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2227 NUM_BANKS(ADDR_SURF_8_BANK));
2229 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2230 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2232 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2234 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2235 if (reg_offset != 7)
2236 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2241 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2242 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2243 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2244 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2245 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2246 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2247 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2248 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2249 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2250 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2251 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2252 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2253 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2254 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2255 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2256 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2257 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2258 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2259 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2260 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2261 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2262 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2263 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2264 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2265 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2266 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2267 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2268 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2269 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2270 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2271 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2272 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2273 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2274 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2275 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2276 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2277 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2278 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2279 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2280 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2281 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2282 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2283 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2284 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2285 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2286 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2287 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2288 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2289 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2290 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2291 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2292 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2293 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2294 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2295 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2296 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2297 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2298 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2299 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2300 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2301 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2302 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2303 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2304 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2305 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2306 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2307 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2308 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2309 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2310 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2311 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2312 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2313 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2314 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2315 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2316 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2317 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2318 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2319 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2320 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2321 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2322 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2323 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2324 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2325 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2326 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2327 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2328 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2329 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2330 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2331 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2332 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2333 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2334 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2335 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2336 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2337 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2338 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2339 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2340 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2341 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2342 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2343 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2344 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2345 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2346 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2347 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2348 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2349 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2350 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2351 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2352 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2353 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2354 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2355 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2356 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2357 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2358 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2359 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2360 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2361 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2362 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2364 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2365 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2366 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2367 NUM_BANKS(ADDR_SURF_8_BANK));
2368 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2369 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2370 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2371 NUM_BANKS(ADDR_SURF_8_BANK));
2372 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2373 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2374 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2375 NUM_BANKS(ADDR_SURF_8_BANK));
2376 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2377 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2378 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2379 NUM_BANKS(ADDR_SURF_8_BANK));
2380 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2381 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2382 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2383 NUM_BANKS(ADDR_SURF_8_BANK));
2384 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2385 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2386 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2387 NUM_BANKS(ADDR_SURF_8_BANK));
2388 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2389 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2390 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2391 NUM_BANKS(ADDR_SURF_8_BANK));
2392 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2393 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2394 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2395 NUM_BANKS(ADDR_SURF_8_BANK));
2396 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2397 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2398 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2399 NUM_BANKS(ADDR_SURF_8_BANK));
2400 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2401 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2402 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2403 NUM_BANKS(ADDR_SURF_8_BANK));
2404 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2405 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2406 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2407 NUM_BANKS(ADDR_SURF_8_BANK));
2408 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2409 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2410 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2411 NUM_BANKS(ADDR_SURF_8_BANK));
2412 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2413 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2414 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2415 NUM_BANKS(ADDR_SURF_8_BANK));
2416 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2417 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2418 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2419 NUM_BANKS(ADDR_SURF_4_BANK));
2421 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2422 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2424 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2425 if (reg_offset != 7)
2426 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2430 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2431 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2432 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2433 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2434 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2435 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2436 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2437 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2438 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2439 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2440 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2441 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2442 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2443 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2444 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2445 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2446 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2447 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2448 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2449 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2450 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2451 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2452 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2453 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2454 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2455 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2456 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2457 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2458 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2459 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2460 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2461 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2462 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2463 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2464 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2465 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2466 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2467 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2468 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2469 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2470 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2471 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2472 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2473 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2474 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2475 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2476 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2477 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2478 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2479 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2480 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2481 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2482 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2483 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2484 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2485 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2486 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2487 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2488 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2489 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2490 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2491 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2492 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2493 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2494 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2495 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2496 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2497 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2498 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2499 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2500 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2501 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2502 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2503 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2504 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2505 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2506 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2507 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2508 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2509 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2510 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2511 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2512 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2513 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2514 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2515 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2516 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2517 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2518 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2519 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2520 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2521 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2522 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2523 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2524 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2525 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2526 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2527 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2528 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2529 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2530 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2531 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2532 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2533 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2534 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2535 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2536 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2537 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2538 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2539 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2540 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2541 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2542 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2543 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2544 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2545 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2546 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2547 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2548 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2549 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2550 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2551 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2553 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2554 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2555 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2556 NUM_BANKS(ADDR_SURF_16_BANK));
2557 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2558 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2559 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2560 NUM_BANKS(ADDR_SURF_16_BANK));
2561 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2562 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2563 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2564 NUM_BANKS(ADDR_SURF_16_BANK));
2565 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2566 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2567 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2568 NUM_BANKS(ADDR_SURF_16_BANK));
2569 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2570 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2571 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2572 NUM_BANKS(ADDR_SURF_16_BANK));
2573 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2574 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2575 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2576 NUM_BANKS(ADDR_SURF_16_BANK));
2577 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2578 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2579 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2580 NUM_BANKS(ADDR_SURF_16_BANK));
2581 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2582 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2583 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2584 NUM_BANKS(ADDR_SURF_16_BANK));
2585 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2586 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2587 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2588 NUM_BANKS(ADDR_SURF_16_BANK));
2589 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2590 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2591 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2592 NUM_BANKS(ADDR_SURF_16_BANK));
2593 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2594 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2595 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2596 NUM_BANKS(ADDR_SURF_16_BANK));
2597 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2598 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2599 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2600 NUM_BANKS(ADDR_SURF_8_BANK));
2601 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2602 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2603 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2604 NUM_BANKS(ADDR_SURF_4_BANK));
2605 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2606 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2607 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2608 NUM_BANKS(ADDR_SURF_4_BANK));
2610 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2611 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2613 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2614 if (reg_offset != 7)
2615 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2618 case CHIP_POLARIS11:
2619 case CHIP_POLARIS12:
2620 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2621 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2622 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2623 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2624 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2625 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2626 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2627 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2628 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2629 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2630 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2631 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2632 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2633 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2634 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2635 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2636 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2637 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2638 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2639 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2640 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2641 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2642 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2643 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2644 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2645 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2646 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2647 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2648 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2649 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2650 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2651 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2652 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2653 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2654 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2655 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2656 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2657 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2658 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2659 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2660 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2661 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2662 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2663 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2664 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2665 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2666 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2667 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2668 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2669 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2670 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2671 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2672 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2673 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2674 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2675 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2676 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2677 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2678 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2679 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2680 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2681 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2682 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2683 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2684 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2685 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2686 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2687 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2688 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2689 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2690 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2691 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2692 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2693 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2694 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2695 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2696 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2697 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2698 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2699 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2700 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2701 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2702 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2703 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2704 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2705 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2706 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2707 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2708 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2709 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2710 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2711 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2712 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2713 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2714 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2715 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2716 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2717 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2718 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2719 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2720 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2721 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2722 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2723 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2724 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2725 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2726 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2727 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2728 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2729 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2730 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2731 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2732 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2733 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2734 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2735 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2736 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2737 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2738 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2739 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2740 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2741 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2743 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2744 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2745 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2746 NUM_BANKS(ADDR_SURF_16_BANK));
2748 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2749 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2750 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2751 NUM_BANKS(ADDR_SURF_16_BANK));
2753 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2754 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2755 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2756 NUM_BANKS(ADDR_SURF_16_BANK));
2758 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2759 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2760 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2761 NUM_BANKS(ADDR_SURF_16_BANK));
2763 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2764 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2765 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2766 NUM_BANKS(ADDR_SURF_16_BANK));
2768 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2769 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2770 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2771 NUM_BANKS(ADDR_SURF_16_BANK));
2773 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2774 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2775 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2776 NUM_BANKS(ADDR_SURF_16_BANK));
2778 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2779 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2780 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2781 NUM_BANKS(ADDR_SURF_16_BANK));
2783 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2784 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2785 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2786 NUM_BANKS(ADDR_SURF_16_BANK));
2788 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2789 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2790 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2791 NUM_BANKS(ADDR_SURF_16_BANK));
2793 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2794 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2795 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2796 NUM_BANKS(ADDR_SURF_16_BANK));
2798 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2799 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2800 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2801 NUM_BANKS(ADDR_SURF_16_BANK));
2803 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2804 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2805 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2806 NUM_BANKS(ADDR_SURF_8_BANK));
2808 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2809 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2810 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2811 NUM_BANKS(ADDR_SURF_4_BANK));
2813 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2814 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2816 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2817 if (reg_offset != 7)
2818 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2821 case CHIP_POLARIS10:
2822 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2823 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2824 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2825 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2826 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2827 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2828 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2829 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2830 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2831 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2832 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2833 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2834 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2835 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2836 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2837 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2838 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2839 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2840 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2841 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2842 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2843 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2844 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2845 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2846 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2847 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2848 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2849 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2850 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2851 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2852 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2853 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2854 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2855 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2856 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2857 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2858 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2859 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2860 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2861 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2862 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2863 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2864 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2865 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2866 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2867 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2868 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2869 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2870 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2871 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2872 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2873 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2874 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2875 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2876 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2877 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2878 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2879 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2880 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2881 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2882 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2883 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2884 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2885 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2886 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2887 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2888 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2889 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2890 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2891 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2892 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2893 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2894 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2895 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2896 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2897 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2898 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2899 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2900 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2901 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2902 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2903 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2904 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2905 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2906 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2907 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2908 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2909 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2910 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2911 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2912 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2913 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2914 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2915 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2916 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2917 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2918 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2919 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2920 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2921 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2922 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2923 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2924 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2925 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2926 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2927 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2928 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2929 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2930 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2931 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2932 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2933 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2934 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2935 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2936 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2937 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2938 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2939 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2940 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2941 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2942 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2943 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2945 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2946 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2947 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2948 NUM_BANKS(ADDR_SURF_16_BANK));
2950 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2951 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2952 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2953 NUM_BANKS(ADDR_SURF_16_BANK));
2955 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2956 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2957 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2958 NUM_BANKS(ADDR_SURF_16_BANK));
2960 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2961 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2962 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2963 NUM_BANKS(ADDR_SURF_16_BANK));
2965 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2966 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2967 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2968 NUM_BANKS(ADDR_SURF_16_BANK));
2970 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2971 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2972 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2973 NUM_BANKS(ADDR_SURF_16_BANK));
2975 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2976 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2977 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2978 NUM_BANKS(ADDR_SURF_16_BANK));
2980 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2981 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2982 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2983 NUM_BANKS(ADDR_SURF_16_BANK));
2985 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2986 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2987 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2988 NUM_BANKS(ADDR_SURF_16_BANK));
2990 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2991 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2992 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2993 NUM_BANKS(ADDR_SURF_16_BANK));
2995 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2996 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2997 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2998 NUM_BANKS(ADDR_SURF_16_BANK));
3000 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3001 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3002 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3003 NUM_BANKS(ADDR_SURF_8_BANK));
3005 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3006 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3007 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3008 NUM_BANKS(ADDR_SURF_4_BANK));
3010 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3011 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3012 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3013 NUM_BANKS(ADDR_SURF_4_BANK));
3015 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3016 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3018 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3019 if (reg_offset != 7)
3020 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3024 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3025 PIPE_CONFIG(ADDR_SURF_P2) |
3026 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3027 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3028 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3029 PIPE_CONFIG(ADDR_SURF_P2) |
3030 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3031 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3032 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3033 PIPE_CONFIG(ADDR_SURF_P2) |
3034 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3035 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3036 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3037 PIPE_CONFIG(ADDR_SURF_P2) |
3038 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3039 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3040 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3041 PIPE_CONFIG(ADDR_SURF_P2) |
3042 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3043 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3044 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3045 PIPE_CONFIG(ADDR_SURF_P2) |
3046 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3047 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3048 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3049 PIPE_CONFIG(ADDR_SURF_P2) |
3050 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3051 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3052 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3053 PIPE_CONFIG(ADDR_SURF_P2));
3054 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3055 PIPE_CONFIG(ADDR_SURF_P2) |
3056 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3057 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3058 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3059 PIPE_CONFIG(ADDR_SURF_P2) |
3060 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3061 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3062 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3063 PIPE_CONFIG(ADDR_SURF_P2) |
3064 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3065 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3066 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3067 PIPE_CONFIG(ADDR_SURF_P2) |
3068 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3069 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3070 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3071 PIPE_CONFIG(ADDR_SURF_P2) |
3072 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3073 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3074 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3075 PIPE_CONFIG(ADDR_SURF_P2) |
3076 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3077 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3078 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3079 PIPE_CONFIG(ADDR_SURF_P2) |
3080 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3081 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3082 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3083 PIPE_CONFIG(ADDR_SURF_P2) |
3084 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3085 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3086 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3087 PIPE_CONFIG(ADDR_SURF_P2) |
3088 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3089 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3090 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3091 PIPE_CONFIG(ADDR_SURF_P2) |
3092 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3093 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3094 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3095 PIPE_CONFIG(ADDR_SURF_P2) |
3096 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3097 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3098 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3099 PIPE_CONFIG(ADDR_SURF_P2) |
3100 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3101 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3102 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3103 PIPE_CONFIG(ADDR_SURF_P2) |
3104 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3105 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3106 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3107 PIPE_CONFIG(ADDR_SURF_P2) |
3108 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3109 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3110 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3111 PIPE_CONFIG(ADDR_SURF_P2) |
3112 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3113 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3114 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3115 PIPE_CONFIG(ADDR_SURF_P2) |
3116 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3117 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3118 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3119 PIPE_CONFIG(ADDR_SURF_P2) |
3120 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3121 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3122 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3123 PIPE_CONFIG(ADDR_SURF_P2) |
3124 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3125 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3127 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3128 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3129 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3130 NUM_BANKS(ADDR_SURF_8_BANK));
3131 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3132 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3133 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3134 NUM_BANKS(ADDR_SURF_8_BANK));
3135 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3136 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3137 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3138 NUM_BANKS(ADDR_SURF_8_BANK));
3139 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3140 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3141 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3142 NUM_BANKS(ADDR_SURF_8_BANK));
3143 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3144 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3145 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3146 NUM_BANKS(ADDR_SURF_8_BANK));
3147 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3148 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3149 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3150 NUM_BANKS(ADDR_SURF_8_BANK));
3151 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3152 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3153 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3154 NUM_BANKS(ADDR_SURF_8_BANK));
3155 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3156 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3157 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3158 NUM_BANKS(ADDR_SURF_16_BANK));
3159 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3160 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3161 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3162 NUM_BANKS(ADDR_SURF_16_BANK));
3163 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3164 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3165 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3166 NUM_BANKS(ADDR_SURF_16_BANK));
3167 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3168 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3169 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3170 NUM_BANKS(ADDR_SURF_16_BANK));
3171 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3172 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3173 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3174 NUM_BANKS(ADDR_SURF_16_BANK));
3175 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3176 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3177 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3178 NUM_BANKS(ADDR_SURF_16_BANK));
3179 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3180 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3181 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3182 NUM_BANKS(ADDR_SURF_8_BANK));
3184 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3185 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3187 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3189 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3190 if (reg_offset != 7)
3191 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3196 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3201 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3202 PIPE_CONFIG(ADDR_SURF_P2) |
3203 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3204 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3205 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3206 PIPE_CONFIG(ADDR_SURF_P2) |
3207 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3208 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3209 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3210 PIPE_CONFIG(ADDR_SURF_P2) |
3211 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3212 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3213 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3214 PIPE_CONFIG(ADDR_SURF_P2) |
3215 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3216 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3217 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3218 PIPE_CONFIG(ADDR_SURF_P2) |
3219 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3220 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3221 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3222 PIPE_CONFIG(ADDR_SURF_P2) |
3223 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3224 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3225 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3226 PIPE_CONFIG(ADDR_SURF_P2) |
3227 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3228 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3229 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3230 PIPE_CONFIG(ADDR_SURF_P2));
3231 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3232 PIPE_CONFIG(ADDR_SURF_P2) |
3233 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3234 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3235 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3236 PIPE_CONFIG(ADDR_SURF_P2) |
3237 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3238 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3239 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3240 PIPE_CONFIG(ADDR_SURF_P2) |
3241 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3242 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3243 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3244 PIPE_CONFIG(ADDR_SURF_P2) |
3245 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3246 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3247 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3248 PIPE_CONFIG(ADDR_SURF_P2) |
3249 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3250 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3251 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3252 PIPE_CONFIG(ADDR_SURF_P2) |
3253 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3254 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3255 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3256 PIPE_CONFIG(ADDR_SURF_P2) |
3257 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3258 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3259 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3260 PIPE_CONFIG(ADDR_SURF_P2) |
3261 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3262 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3263 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3264 PIPE_CONFIG(ADDR_SURF_P2) |
3265 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3266 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3267 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3268 PIPE_CONFIG(ADDR_SURF_P2) |
3269 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3270 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3271 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3272 PIPE_CONFIG(ADDR_SURF_P2) |
3273 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3274 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3275 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3276 PIPE_CONFIG(ADDR_SURF_P2) |
3277 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3278 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3279 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3280 PIPE_CONFIG(ADDR_SURF_P2) |
3281 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3282 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3283 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3284 PIPE_CONFIG(ADDR_SURF_P2) |
3285 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3286 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3287 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3288 PIPE_CONFIG(ADDR_SURF_P2) |
3289 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3290 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3291 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3292 PIPE_CONFIG(ADDR_SURF_P2) |
3293 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3294 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3295 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3296 PIPE_CONFIG(ADDR_SURF_P2) |
3297 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3298 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3299 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3300 PIPE_CONFIG(ADDR_SURF_P2) |
3301 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3302 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3304 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3305 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3306 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3307 NUM_BANKS(ADDR_SURF_8_BANK));
3308 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3309 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3310 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3311 NUM_BANKS(ADDR_SURF_8_BANK));
3312 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3313 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3314 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3315 NUM_BANKS(ADDR_SURF_8_BANK));
3316 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3317 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3318 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3319 NUM_BANKS(ADDR_SURF_8_BANK));
3320 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3321 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3322 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3323 NUM_BANKS(ADDR_SURF_8_BANK));
3324 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3325 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3326 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3327 NUM_BANKS(ADDR_SURF_8_BANK));
3328 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3329 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3330 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3331 NUM_BANKS(ADDR_SURF_8_BANK));
3332 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3333 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3334 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3335 NUM_BANKS(ADDR_SURF_16_BANK));
3336 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3337 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3338 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3339 NUM_BANKS(ADDR_SURF_16_BANK));
3340 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3341 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3342 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3343 NUM_BANKS(ADDR_SURF_16_BANK));
3344 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3345 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3346 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3347 NUM_BANKS(ADDR_SURF_16_BANK));
3348 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3349 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3350 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3351 NUM_BANKS(ADDR_SURF_16_BANK));
3352 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3353 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3354 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3355 NUM_BANKS(ADDR_SURF_16_BANK));
3356 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3357 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3358 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3359 NUM_BANKS(ADDR_SURF_8_BANK));
3361 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3362 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3364 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3366 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3367 if (reg_offset != 7)
3368 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3374 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3375 u32 se_num, u32 sh_num, u32 instance)
3379 if (instance == 0xffffffff)
3380 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3382 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3384 if (se_num == 0xffffffff)
3385 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3387 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3389 if (sh_num == 0xffffffff)
3390 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3392 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3394 WREG32(mmGRBM_GFX_INDEX, data);
3397 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3398 u32 me, u32 pipe, u32 q, u32 vm)
3400 vi_srbm_select(adev, me, pipe, q, vm);
3403 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3407 data = RREG32(mmCC_RB_BACKEND_DISABLE) |
3408 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3410 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3412 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3413 adev->gfx.config.max_sh_per_se);
3415 return (~data) & mask;
3419 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3421 switch (adev->asic_type) {
3424 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3425 RB_XSEL2(1) | PKR_MAP(2) |
3426 PKR_XSEL(1) | PKR_YSEL(1) |
3427 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3428 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3432 case CHIP_POLARIS10:
3433 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3434 SE_XSEL(1) | SE_YSEL(1);
3435 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3440 *rconf |= RB_MAP_PKR0(2);
3443 case CHIP_POLARIS11:
3444 case CHIP_POLARIS12:
3445 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3446 SE_XSEL(1) | SE_YSEL(1);
3454 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3460 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3461 u32 raster_config, u32 raster_config_1,
3462 unsigned rb_mask, unsigned num_rb)
3464 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3465 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3466 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3467 unsigned rb_per_se = num_rb / num_se;
3468 unsigned se_mask[4];
3471 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3472 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3473 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3474 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3476 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3477 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3478 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3480 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3481 (!se_mask[2] && !se_mask[3]))) {
3482 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3484 if (!se_mask[0] && !se_mask[1]) {
3486 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3489 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3493 for (se = 0; se < num_se; se++) {
3494 unsigned raster_config_se = raster_config;
3495 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3496 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3497 int idx = (se / 2) * 2;
3499 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3500 raster_config_se &= ~SE_MAP_MASK;
3502 if (!se_mask[idx]) {
3503 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3505 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3509 pkr0_mask &= rb_mask;
3510 pkr1_mask &= rb_mask;
3511 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3512 raster_config_se &= ~PKR_MAP_MASK;
3515 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3517 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3521 if (rb_per_se >= 2) {
3522 unsigned rb0_mask = 1 << (se * rb_per_se);
3523 unsigned rb1_mask = rb0_mask << 1;
3525 rb0_mask &= rb_mask;
3526 rb1_mask &= rb_mask;
3527 if (!rb0_mask || !rb1_mask) {
3528 raster_config_se &= ~RB_MAP_PKR0_MASK;
3532 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3535 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3539 if (rb_per_se > 2) {
3540 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3541 rb1_mask = rb0_mask << 1;
3542 rb0_mask &= rb_mask;
3543 rb1_mask &= rb_mask;
3544 if (!rb0_mask || !rb1_mask) {
3545 raster_config_se &= ~RB_MAP_PKR1_MASK;
3549 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3552 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3558 /* GRBM_GFX_INDEX has a different offset on VI */
3559 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3560 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3561 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3564 /* GRBM_GFX_INDEX has a different offset on VI */
3565 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3568 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3572 u32 raster_config = 0, raster_config_1 = 0;
3574 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3575 adev->gfx.config.max_sh_per_se;
3576 unsigned num_rb_pipes;
3578 mutex_lock(&adev->grbm_idx_mutex);
3579 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3580 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3581 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3582 data = gfx_v8_0_get_rb_active_bitmap(adev);
3583 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3584 rb_bitmap_width_per_sh);
3587 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3589 adev->gfx.config.backend_enable_mask = active_rbs;
3590 adev->gfx.config.num_rbs = hweight32(active_rbs);
3592 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3593 adev->gfx.config.max_shader_engines, 16);
3595 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3597 if (!adev->gfx.config.backend_enable_mask ||
3598 adev->gfx.config.num_rbs >= num_rb_pipes) {
3599 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3600 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3602 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3603 adev->gfx.config.backend_enable_mask,
3607 /* cache the values for userspace */
3608 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3609 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3610 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3611 adev->gfx.config.rb_config[i][j].rb_backend_disable =
3612 RREG32(mmCC_RB_BACKEND_DISABLE);
3613 adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3614 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3615 adev->gfx.config.rb_config[i][j].raster_config =
3616 RREG32(mmPA_SC_RASTER_CONFIG);
3617 adev->gfx.config.rb_config[i][j].raster_config_1 =
3618 RREG32(mmPA_SC_RASTER_CONFIG_1);
3621 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3622 mutex_unlock(&adev->grbm_idx_mutex);
3626 * gfx_v8_0_init_compute_vmid - gart enable
3628 * @adev: amdgpu_device pointer
3630 * Initialize compute vmid sh_mem registers
3633 #define DEFAULT_SH_MEM_BASES (0x6000)
3634 #define FIRST_COMPUTE_VMID (8)
3635 #define LAST_COMPUTE_VMID (16)
3636 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3639 uint32_t sh_mem_config;
3640 uint32_t sh_mem_bases;
3643 * Configure apertures:
3644 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
3645 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
3646 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
3648 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3650 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3651 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3652 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3653 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3654 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3655 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3657 mutex_lock(&adev->srbm_mutex);
3658 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3659 vi_srbm_select(adev, 0, 0, 0, i);
3660 /* CP and shaders */
3661 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3662 WREG32(mmSH_MEM_APE1_BASE, 1);
3663 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3664 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3666 vi_srbm_select(adev, 0, 0, 0, 0);
3667 mutex_unlock(&adev->srbm_mutex);
3669 /* Initialize all compute VMIDs to have no GDS, GWS, or OA
3670 acccess. These should be enabled by FW for target VMIDs. */
3671 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3672 WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
3673 WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
3674 WREG32(amdgpu_gds_reg_offset[i].gws, 0);
3675 WREG32(amdgpu_gds_reg_offset[i].oa, 0);
3679 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3681 switch (adev->asic_type) {
3683 adev->gfx.config.double_offchip_lds_buf = 1;
3687 adev->gfx.config.double_offchip_lds_buf = 0;
3692 static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
3694 u32 tmp, sh_static_mem_cfg;
3697 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3698 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3699 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3700 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3702 gfx_v8_0_tiling_mode_table_init(adev);
3703 gfx_v8_0_setup_rb(adev);
3704 gfx_v8_0_get_cu_info(adev);
3705 gfx_v8_0_config_init(adev);
3707 /* XXX SH_MEM regs */
3708 /* where to put LDS, scratch, GPUVM in FSA64 space */
3709 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3711 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3713 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3715 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3717 mutex_lock(&adev->srbm_mutex);
3718 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3719 vi_srbm_select(adev, 0, 0, 0, i);
3720 /* CP and shaders */
3722 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3723 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3724 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3725 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3726 WREG32(mmSH_MEM_CONFIG, tmp);
3727 WREG32(mmSH_MEM_BASES, 0);
3729 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3730 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3731 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3732 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3733 WREG32(mmSH_MEM_CONFIG, tmp);
3734 tmp = adev->gmc.shared_aperture_start >> 48;
3735 WREG32(mmSH_MEM_BASES, tmp);
3738 WREG32(mmSH_MEM_APE1_BASE, 1);
3739 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3741 vi_srbm_select(adev, 0, 0, 0, 0);
3742 mutex_unlock(&adev->srbm_mutex);
3744 gfx_v8_0_init_compute_vmid(adev);
3746 mutex_lock(&adev->grbm_idx_mutex);
3748 * making sure that the following register writes will be broadcasted
3749 * to all the shaders
3751 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3753 WREG32(mmPA_SC_FIFO_SIZE,
3754 (adev->gfx.config.sc_prim_fifo_size_frontend <<
3755 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3756 (adev->gfx.config.sc_prim_fifo_size_backend <<
3757 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3758 (adev->gfx.config.sc_hiz_tile_fifo_size <<
3759 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3760 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3761 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3763 tmp = RREG32(mmSPI_ARB_PRIORITY);
3764 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3765 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3766 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3767 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3768 WREG32(mmSPI_ARB_PRIORITY, tmp);
3770 mutex_unlock(&adev->grbm_idx_mutex);
3774 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3779 mutex_lock(&adev->grbm_idx_mutex);
3780 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3781 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3782 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3783 for (k = 0; k < adev->usec_timeout; k++) {
3784 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3788 if (k == adev->usec_timeout) {
3789 gfx_v8_0_select_se_sh(adev, 0xffffffff,
3790 0xffffffff, 0xffffffff);
3791 mutex_unlock(&adev->grbm_idx_mutex);
3792 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3798 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3799 mutex_unlock(&adev->grbm_idx_mutex);
3801 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3802 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3803 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3804 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3805 for (k = 0; k < adev->usec_timeout; k++) {
3806 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3812 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3815 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3817 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3818 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3819 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3820 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3822 WREG32(mmCP_INT_CNTL_RING0, tmp);
3825 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3828 WREG32(mmRLC_CSIB_ADDR_HI,
3829 adev->gfx.rlc.clear_state_gpu_addr >> 32);
3830 WREG32(mmRLC_CSIB_ADDR_LO,
3831 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3832 WREG32(mmRLC_CSIB_LENGTH,
3833 adev->gfx.rlc.clear_state_size);
3836 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3839 int *unique_indices,
3842 int *ind_start_offsets,
3847 bool new_entry = true;
3849 for (; ind_offset < list_size; ind_offset++) {
3853 ind_start_offsets[*offset_count] = ind_offset;
3854 *offset_count = *offset_count + 1;
3855 BUG_ON(*offset_count >= max_offset);
3858 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3865 /* look for the matching indice */
3867 indices < *indices_count;
3869 if (unique_indices[indices] ==
3870 register_list_format[ind_offset])
3874 if (indices >= *indices_count) {
3875 unique_indices[*indices_count] =
3876 register_list_format[ind_offset];
3877 indices = *indices_count;
3878 *indices_count = *indices_count + 1;
3879 BUG_ON(*indices_count >= max_indices);
3882 register_list_format[ind_offset] = indices;
3886 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3889 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3890 int indices_count = 0;
3891 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3892 int offset_count = 0;
3895 unsigned int *register_list_format =
3896 kmemdup(adev->gfx.rlc.register_list_format,
3897 adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3898 if (!register_list_format)
3901 gfx_v8_0_parse_ind_reg_list(register_list_format,
3902 RLC_FormatDirectRegListLength,
3903 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3906 ARRAY_SIZE(unique_indices),
3907 indirect_start_offsets,
3909 ARRAY_SIZE(indirect_start_offsets));
3911 /* save and restore list */
3912 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3914 WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3915 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3916 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3919 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3920 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3921 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3923 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3924 list_size = list_size >> 1;
3925 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3926 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3928 /* starting offsets starts */
3929 WREG32(mmRLC_GPM_SCRATCH_ADDR,
3930 adev->gfx.rlc.starting_offsets_start);
3931 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
3932 WREG32(mmRLC_GPM_SCRATCH_DATA,
3933 indirect_start_offsets[i]);
3935 /* unique indices */
3936 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3937 data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3938 for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
3939 if (unique_indices[i] != 0) {
3940 WREG32(temp + i, unique_indices[i] & 0x3FFFF);
3941 WREG32(data + i, unique_indices[i] >> 20);
3944 kfree(register_list_format);
3949 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3951 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
3954 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3958 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
3960 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
3961 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
3962 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
3963 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
3964 WREG32(mmRLC_PG_DELAY, data);
3966 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
3967 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
3971 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
3974 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
3977 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
3980 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
3983 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
3985 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
3988 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
3990 if ((adev->asic_type == CHIP_CARRIZO) ||
3991 (adev->asic_type == CHIP_STONEY)) {
3992 gfx_v8_0_init_csb(adev);
3993 gfx_v8_0_init_save_restore_list(adev);
3994 gfx_v8_0_enable_save_restore_machine(adev);
3995 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
3996 gfx_v8_0_init_power_gating(adev);
3997 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
3998 } else if ((adev->asic_type == CHIP_POLARIS11) ||
3999 (adev->asic_type == CHIP_POLARIS12) ||
4000 (adev->asic_type == CHIP_VEGAM)) {
4001 gfx_v8_0_init_csb(adev);
4002 gfx_v8_0_init_save_restore_list(adev);
4003 gfx_v8_0_enable_save_restore_machine(adev);
4004 gfx_v8_0_init_power_gating(adev);
4009 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4011 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4013 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4014 gfx_v8_0_wait_for_rlc_serdes(adev);
4017 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4019 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4022 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4026 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4028 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4030 /* carrizo do enable cp interrupt after cp inited */
4031 if (!(adev->flags & AMD_IS_APU))
4032 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4037 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4039 if (amdgpu_sriov_vf(adev)) {
4040 gfx_v8_0_init_csb(adev);
4044 adev->gfx.rlc.funcs->stop(adev);
4045 adev->gfx.rlc.funcs->reset(adev);
4046 gfx_v8_0_init_pg(adev);
4047 adev->gfx.rlc.funcs->start(adev);
4052 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4055 u32 tmp = RREG32(mmCP_ME_CNTL);
4058 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4059 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4060 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4062 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4063 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4064 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4065 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4066 adev->gfx.gfx_ring[i].sched.ready = false;
4068 WREG32(mmCP_ME_CNTL, tmp);
4072 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4075 const struct cs_section_def *sect = NULL;
4076 const struct cs_extent_def *ext = NULL;
4078 /* begin clear state */
4080 /* context control state */
4083 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4084 for (ext = sect->section; ext->extent != NULL; ++ext) {
4085 if (sect->id == SECT_CONTEXT)
4086 count += 2 + ext->reg_count;
4091 /* pa_sc_raster_config/pa_sc_raster_config1 */
4093 /* end clear state */
4101 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4103 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4104 const struct cs_section_def *sect = NULL;
4105 const struct cs_extent_def *ext = NULL;
4109 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4110 WREG32(mmCP_ENDIAN_SWAP, 0);
4111 WREG32(mmCP_DEVICE_ID, 1);
4113 gfx_v8_0_cp_gfx_enable(adev, true);
4115 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4117 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4121 /* clear state buffer */
4122 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4123 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4125 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4126 amdgpu_ring_write(ring, 0x80000000);
4127 amdgpu_ring_write(ring, 0x80000000);
4129 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4130 for (ext = sect->section; ext->extent != NULL; ++ext) {
4131 if (sect->id == SECT_CONTEXT) {
4132 amdgpu_ring_write(ring,
4133 PACKET3(PACKET3_SET_CONTEXT_REG,
4135 amdgpu_ring_write(ring,
4136 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4137 for (i = 0; i < ext->reg_count; i++)
4138 amdgpu_ring_write(ring, ext->extent[i]);
4143 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4144 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4145 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4146 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4148 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4149 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4151 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4152 amdgpu_ring_write(ring, 0);
4154 /* init the CE partitions */
4155 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4156 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4157 amdgpu_ring_write(ring, 0x8000);
4158 amdgpu_ring_write(ring, 0x8000);
4160 amdgpu_ring_commit(ring);
4164 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4167 /* no gfx doorbells on iceland */
4168 if (adev->asic_type == CHIP_TOPAZ)
4171 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4173 if (ring->use_doorbell) {
4174 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4175 DOORBELL_OFFSET, ring->doorbell_index);
4176 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4178 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4181 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4184 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4186 if (adev->flags & AMD_IS_APU)
4189 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4190 DOORBELL_RANGE_LOWER,
4191 adev->doorbell_index.gfx_ring0);
4192 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4194 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4195 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4198 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4200 struct amdgpu_ring *ring;
4203 u64 rb_addr, rptr_addr, wptr_gpu_addr;
4205 /* Set the write pointer delay */
4206 WREG32(mmCP_RB_WPTR_DELAY, 0);
4208 /* set the RB to use vmid 0 */
4209 WREG32(mmCP_RB_VMID, 0);
4211 /* Set ring buffer size */
4212 ring = &adev->gfx.gfx_ring[0];
4213 rb_bufsz = order_base_2(ring->ring_size / 8);
4214 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4215 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4216 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4217 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4219 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4221 WREG32(mmCP_RB0_CNTL, tmp);
4223 /* Initialize the ring buffer's read and write pointers */
4224 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4226 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4228 /* set the wb address wether it's enabled or not */
4229 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4230 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4231 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4233 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4234 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4235 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4237 WREG32(mmCP_RB0_CNTL, tmp);
4239 rb_addr = ring->gpu_addr >> 8;
4240 WREG32(mmCP_RB0_BASE, rb_addr);
4241 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4243 gfx_v8_0_set_cpg_door_bell(adev, ring);
4244 /* start the ring */
4245 amdgpu_ring_clear_ring(ring);
4246 gfx_v8_0_cp_gfx_start(adev);
4247 ring->sched.ready = true;
4252 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4257 WREG32(mmCP_MEC_CNTL, 0);
4259 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4260 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4261 adev->gfx.compute_ring[i].sched.ready = false;
4262 adev->gfx.kiq.ring.sched.ready = false;
4268 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4271 struct amdgpu_device *adev = ring->adev;
4273 /* tell RLC which is KIQ queue */
4274 tmp = RREG32(mmRLC_CP_SCHEDULERS);
4276 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4277 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4279 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4282 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4284 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4285 uint64_t queue_mask = 0;
4288 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4289 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4292 /* This situation may be hit in the future if a new HW
4293 * generation exposes more than 64 queues. If so, the
4294 * definition of queue_mask needs updating */
4295 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4296 DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4300 queue_mask |= (1ull << i);
4303 r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
4305 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4309 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4310 amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4311 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4312 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4313 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4314 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4315 amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4316 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4317 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4318 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4319 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4320 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4323 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4324 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4325 amdgpu_ring_write(kiq_ring,
4326 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4327 amdgpu_ring_write(kiq_ring,
4328 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4329 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4330 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4331 PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4332 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4333 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4334 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4335 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4338 amdgpu_ring_commit(kiq_ring);
4343 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4347 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4348 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4349 for (i = 0; i < adev->usec_timeout; i++) {
4350 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4354 if (i == adev->usec_timeout)
4357 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4358 WREG32(mmCP_HQD_PQ_RPTR, 0);
4359 WREG32(mmCP_HQD_PQ_WPTR, 0);
4364 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4366 struct amdgpu_device *adev = ring->adev;
4367 struct vi_mqd *mqd = ring->mqd_ptr;
4368 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4371 mqd->header = 0xC0310800;
4372 mqd->compute_pipelinestat_enable = 0x00000001;
4373 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4374 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4375 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4376 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4377 mqd->compute_misc_reserved = 0x00000003;
4378 mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4379 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4380 mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4381 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4382 eop_base_addr = ring->eop_gpu_addr >> 8;
4383 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4384 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4386 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4387 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4388 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4389 (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4391 mqd->cp_hqd_eop_control = tmp;
4393 /* enable doorbell? */
4394 tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4395 CP_HQD_PQ_DOORBELL_CONTROL,
4397 ring->use_doorbell ? 1 : 0);
4399 mqd->cp_hqd_pq_doorbell_control = tmp;
4401 /* set the pointer to the MQD */
4402 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4403 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4405 /* set MQD vmid to 0 */
4406 tmp = RREG32(mmCP_MQD_CONTROL);
4407 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4408 mqd->cp_mqd_control = tmp;
4410 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4411 hqd_gpu_addr = ring->gpu_addr >> 8;
4412 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4413 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4415 /* set up the HQD, this is similar to CP_RB0_CNTL */
4416 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4417 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4418 (order_base_2(ring->ring_size / 4) - 1));
4419 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4420 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4422 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4424 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4425 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4426 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4427 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4428 mqd->cp_hqd_pq_control = tmp;
4430 /* set the wb address whether it's enabled or not */
4431 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4432 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4433 mqd->cp_hqd_pq_rptr_report_addr_hi =
4434 upper_32_bits(wb_gpu_addr) & 0xffff;
4436 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4437 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4438 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4439 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4442 /* enable the doorbell if requested */
4443 if (ring->use_doorbell) {
4444 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4445 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4446 DOORBELL_OFFSET, ring->doorbell_index);
4448 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4450 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4451 DOORBELL_SOURCE, 0);
4452 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4456 mqd->cp_hqd_pq_doorbell_control = tmp;
4458 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4460 mqd->cp_hqd_pq_wptr = ring->wptr;
4461 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4463 /* set the vmid for the queue */
4464 mqd->cp_hqd_vmid = 0;
4466 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4467 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4468 mqd->cp_hqd_persistent_state = tmp;
4471 tmp = RREG32(mmCP_HQD_IB_CONTROL);
4472 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4473 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4474 mqd->cp_hqd_ib_control = tmp;
4476 tmp = RREG32(mmCP_HQD_IQ_TIMER);
4477 tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4478 mqd->cp_hqd_iq_timer = tmp;
4480 tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4481 tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4482 mqd->cp_hqd_ctx_save_control = tmp;
4485 mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4486 mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4487 mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4488 mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4489 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4490 mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4491 mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4492 mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4493 mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4494 mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4495 mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4496 mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4497 mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4498 mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4499 mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4501 /* activate the queue */
4502 mqd->cp_hqd_active = 1;
4507 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4513 /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4514 mqd_data = &mqd->cp_mqd_base_addr_lo;
4516 /* disable wptr polling */
4517 WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4519 /* program all HQD registers */
4520 for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4521 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4523 /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4524 * This is safe since EOP RPTR==WPTR for any inactive HQD
4525 * on ASICs that do not support context-save.
4526 * EOP writes/reads can start anywhere in the ring.
4528 if (adev->asic_type != CHIP_TONGA) {
4529 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4530 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4531 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4534 for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4535 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4537 /* activate the HQD */
4538 for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4539 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4544 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4546 struct amdgpu_device *adev = ring->adev;
4547 struct vi_mqd *mqd = ring->mqd_ptr;
4548 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4550 gfx_v8_0_kiq_setting(ring);
4552 if (adev->in_gpu_reset) { /* for GPU_RESET case */
4553 /* reset MQD to a clean status */
4554 if (adev->gfx.mec.mqd_backup[mqd_idx])
4555 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4557 /* reset ring buffer */
4559 amdgpu_ring_clear_ring(ring);
4560 mutex_lock(&adev->srbm_mutex);
4561 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4562 gfx_v8_0_mqd_commit(adev, mqd);
4563 vi_srbm_select(adev, 0, 0, 0, 0);
4564 mutex_unlock(&adev->srbm_mutex);
4566 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4567 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4568 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4569 mutex_lock(&adev->srbm_mutex);
4570 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4571 gfx_v8_0_mqd_init(ring);
4572 gfx_v8_0_mqd_commit(adev, mqd);
4573 vi_srbm_select(adev, 0, 0, 0, 0);
4574 mutex_unlock(&adev->srbm_mutex);
4576 if (adev->gfx.mec.mqd_backup[mqd_idx])
4577 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4583 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4585 struct amdgpu_device *adev = ring->adev;
4586 struct vi_mqd *mqd = ring->mqd_ptr;
4587 int mqd_idx = ring - &adev->gfx.compute_ring[0];
4589 if (!adev->in_gpu_reset && !adev->in_suspend) {
4590 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4591 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4592 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4593 mutex_lock(&adev->srbm_mutex);
4594 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4595 gfx_v8_0_mqd_init(ring);
4596 vi_srbm_select(adev, 0, 0, 0, 0);
4597 mutex_unlock(&adev->srbm_mutex);
4599 if (adev->gfx.mec.mqd_backup[mqd_idx])
4600 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4601 } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
4602 /* reset MQD to a clean status */
4603 if (adev->gfx.mec.mqd_backup[mqd_idx])
4604 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4605 /* reset ring buffer */
4607 amdgpu_ring_clear_ring(ring);
4609 amdgpu_ring_clear_ring(ring);
4614 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4616 if (adev->asic_type > CHIP_TONGA) {
4617 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, adev->doorbell_index.kiq << 2);
4618 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, adev->doorbell_index.mec_ring7 << 2);
4620 /* enable doorbells */
4621 WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4624 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4626 struct amdgpu_ring *ring;
4629 ring = &adev->gfx.kiq.ring;
4631 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4632 if (unlikely(r != 0))
4635 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4636 if (unlikely(r != 0))
4639 gfx_v8_0_kiq_init_queue(ring);
4640 amdgpu_bo_kunmap(ring->mqd_obj);
4641 ring->mqd_ptr = NULL;
4642 amdgpu_bo_unreserve(ring->mqd_obj);
4643 ring->sched.ready = true;
4647 static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
4649 struct amdgpu_ring *ring = NULL;
4652 gfx_v8_0_cp_compute_enable(adev, true);
4654 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4655 ring = &adev->gfx.compute_ring[i];
4657 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4658 if (unlikely(r != 0))
4660 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4662 r = gfx_v8_0_kcq_init_queue(ring);
4663 amdgpu_bo_kunmap(ring->mqd_obj);
4664 ring->mqd_ptr = NULL;
4666 amdgpu_bo_unreserve(ring->mqd_obj);
4671 gfx_v8_0_set_mec_doorbell_range(adev);
4673 r = gfx_v8_0_kiq_kcq_enable(adev);
4681 static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev)
4684 struct amdgpu_ring *ring;
4686 /* collect all the ring_tests here, gfx, kiq, compute */
4687 ring = &adev->gfx.gfx_ring[0];
4688 r = amdgpu_ring_test_helper(ring);
4692 ring = &adev->gfx.kiq.ring;
4693 r = amdgpu_ring_test_helper(ring);
4697 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4698 ring = &adev->gfx.compute_ring[i];
4699 amdgpu_ring_test_helper(ring);
4705 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4709 if (!(adev->flags & AMD_IS_APU))
4710 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4712 r = gfx_v8_0_kiq_resume(adev);
4716 r = gfx_v8_0_cp_gfx_resume(adev);
4720 r = gfx_v8_0_kcq_resume(adev);
4724 r = gfx_v8_0_cp_test_all_rings(adev);
4728 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4733 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4735 gfx_v8_0_cp_gfx_enable(adev, enable);
4736 gfx_v8_0_cp_compute_enable(adev, enable);
4739 static int gfx_v8_0_hw_init(void *handle)
4742 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4744 gfx_v8_0_init_golden_registers(adev);
4745 gfx_v8_0_constants_init(adev);
4747 r = gfx_v8_0_csb_vram_pin(adev);
4751 r = adev->gfx.rlc.funcs->resume(adev);
4755 r = gfx_v8_0_cp_resume(adev);
4760 static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
4763 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4765 r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
4767 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4769 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4770 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4772 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
4773 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
4774 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
4775 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
4776 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
4777 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
4778 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
4779 amdgpu_ring_write(kiq_ring, 0);
4780 amdgpu_ring_write(kiq_ring, 0);
4781 amdgpu_ring_write(kiq_ring, 0);
4783 r = amdgpu_ring_test_helper(kiq_ring);
4785 DRM_ERROR("KCQ disable failed\n");
4790 static bool gfx_v8_0_is_idle(void *handle)
4792 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4794 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
4795 || RREG32(mmGRBM_STATUS2) != 0x8)
4801 static bool gfx_v8_0_rlc_is_idle(void *handle)
4803 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4805 if (RREG32(mmGRBM_STATUS2) != 0x8)
4811 static int gfx_v8_0_wait_for_rlc_idle(void *handle)
4814 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4816 for (i = 0; i < adev->usec_timeout; i++) {
4817 if (gfx_v8_0_rlc_is_idle(handle))
4825 static int gfx_v8_0_wait_for_idle(void *handle)
4828 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4830 for (i = 0; i < adev->usec_timeout; i++) {
4831 if (gfx_v8_0_is_idle(handle))
4839 static int gfx_v8_0_hw_fini(void *handle)
4841 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4843 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4844 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4846 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4848 amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
4850 /* disable KCQ to avoid CPC touch memory not valid anymore */
4851 gfx_v8_0_kcq_disable(adev);
4853 if (amdgpu_sriov_vf(adev)) {
4854 pr_debug("For SRIOV client, shouldn't do anything.\n");
4857 amdgpu_gfx_rlc_enter_safe_mode(adev);
4858 if (!gfx_v8_0_wait_for_idle(adev))
4859 gfx_v8_0_cp_enable(adev, false);
4861 pr_err("cp is busy, skip halt cp\n");
4862 if (!gfx_v8_0_wait_for_rlc_idle(adev))
4863 adev->gfx.rlc.funcs->stop(adev);
4865 pr_err("rlc is busy, skip halt rlc\n");
4866 amdgpu_gfx_rlc_exit_safe_mode(adev);
4868 gfx_v8_0_csb_vram_unpin(adev);
4873 static int gfx_v8_0_suspend(void *handle)
4875 return gfx_v8_0_hw_fini(handle);
4878 static int gfx_v8_0_resume(void *handle)
4880 return gfx_v8_0_hw_init(handle);
4883 static bool gfx_v8_0_check_soft_reset(void *handle)
4885 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4886 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4890 tmp = RREG32(mmGRBM_STATUS);
4891 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4892 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4893 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4894 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4895 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4896 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
4897 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4898 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4899 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4900 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4901 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4902 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4903 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4907 tmp = RREG32(mmGRBM_STATUS2);
4908 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4909 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4910 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4912 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
4913 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
4914 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
4915 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4917 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4919 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4921 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
4922 SOFT_RESET_GRBM, 1);
4926 tmp = RREG32(mmSRBM_STATUS);
4927 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4928 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4929 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4930 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
4931 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4932 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
4934 if (grbm_soft_reset || srbm_soft_reset) {
4935 adev->gfx.grbm_soft_reset = grbm_soft_reset;
4936 adev->gfx.srbm_soft_reset = srbm_soft_reset;
4939 adev->gfx.grbm_soft_reset = 0;
4940 adev->gfx.srbm_soft_reset = 0;
4945 static int gfx_v8_0_pre_soft_reset(void *handle)
4947 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4948 u32 grbm_soft_reset = 0;
4950 if ((!adev->gfx.grbm_soft_reset) &&
4951 (!adev->gfx.srbm_soft_reset))
4954 grbm_soft_reset = adev->gfx.grbm_soft_reset;
4957 adev->gfx.rlc.funcs->stop(adev);
4959 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
4960 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
4961 /* Disable GFX parsing/prefetching */
4962 gfx_v8_0_cp_gfx_enable(adev, false);
4964 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
4965 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
4966 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
4967 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
4970 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4971 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4973 mutex_lock(&adev->srbm_mutex);
4974 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4975 gfx_v8_0_deactivate_hqd(adev, 2);
4976 vi_srbm_select(adev, 0, 0, 0, 0);
4977 mutex_unlock(&adev->srbm_mutex);
4979 /* Disable MEC parsing/prefetching */
4980 gfx_v8_0_cp_compute_enable(adev, false);
4986 static int gfx_v8_0_soft_reset(void *handle)
4988 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4989 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4992 if ((!adev->gfx.grbm_soft_reset) &&
4993 (!adev->gfx.srbm_soft_reset))
4996 grbm_soft_reset = adev->gfx.grbm_soft_reset;
4997 srbm_soft_reset = adev->gfx.srbm_soft_reset;
4999 if (grbm_soft_reset || srbm_soft_reset) {
5000 tmp = RREG32(mmGMCON_DEBUG);
5001 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5002 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5003 WREG32(mmGMCON_DEBUG, tmp);
5007 if (grbm_soft_reset) {
5008 tmp = RREG32(mmGRBM_SOFT_RESET);
5009 tmp |= grbm_soft_reset;
5010 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5011 WREG32(mmGRBM_SOFT_RESET, tmp);
5012 tmp = RREG32(mmGRBM_SOFT_RESET);
5016 tmp &= ~grbm_soft_reset;
5017 WREG32(mmGRBM_SOFT_RESET, tmp);
5018 tmp = RREG32(mmGRBM_SOFT_RESET);
5021 if (srbm_soft_reset) {
5022 tmp = RREG32(mmSRBM_SOFT_RESET);
5023 tmp |= srbm_soft_reset;
5024 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5025 WREG32(mmSRBM_SOFT_RESET, tmp);
5026 tmp = RREG32(mmSRBM_SOFT_RESET);
5030 tmp &= ~srbm_soft_reset;
5031 WREG32(mmSRBM_SOFT_RESET, tmp);
5032 tmp = RREG32(mmSRBM_SOFT_RESET);
5035 if (grbm_soft_reset || srbm_soft_reset) {
5036 tmp = RREG32(mmGMCON_DEBUG);
5037 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5038 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5039 WREG32(mmGMCON_DEBUG, tmp);
5042 /* Wait a little for things to settle down */
5048 static int gfx_v8_0_post_soft_reset(void *handle)
5050 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5051 u32 grbm_soft_reset = 0;
5053 if ((!adev->gfx.grbm_soft_reset) &&
5054 (!adev->gfx.srbm_soft_reset))
5057 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5059 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5060 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5061 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5062 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5065 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5066 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5068 mutex_lock(&adev->srbm_mutex);
5069 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5070 gfx_v8_0_deactivate_hqd(adev, 2);
5071 vi_srbm_select(adev, 0, 0, 0, 0);
5072 mutex_unlock(&adev->srbm_mutex);
5074 gfx_v8_0_kiq_resume(adev);
5075 gfx_v8_0_kcq_resume(adev);
5078 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5079 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5080 gfx_v8_0_cp_gfx_resume(adev);
5082 gfx_v8_0_cp_test_all_rings(adev);
5084 adev->gfx.rlc.funcs->start(adev);
5090 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5092 * @adev: amdgpu_device pointer
5094 * Fetches a GPU clock counter snapshot.
5095 * Returns the 64 bit clock counter snapshot.
5097 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5101 mutex_lock(&adev->gfx.gpu_clock_mutex);
5102 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5103 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5104 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5105 mutex_unlock(&adev->gfx.gpu_clock_mutex);
5109 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5111 uint32_t gds_base, uint32_t gds_size,
5112 uint32_t gws_base, uint32_t gws_size,
5113 uint32_t oa_base, uint32_t oa_size)
5116 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5117 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5118 WRITE_DATA_DST_SEL(0)));
5119 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5120 amdgpu_ring_write(ring, 0);
5121 amdgpu_ring_write(ring, gds_base);
5124 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5125 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5126 WRITE_DATA_DST_SEL(0)));
5127 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5128 amdgpu_ring_write(ring, 0);
5129 amdgpu_ring_write(ring, gds_size);
5132 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5133 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5134 WRITE_DATA_DST_SEL(0)));
5135 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5136 amdgpu_ring_write(ring, 0);
5137 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5140 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5141 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5142 WRITE_DATA_DST_SEL(0)));
5143 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5144 amdgpu_ring_write(ring, 0);
5145 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5148 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5150 WREG32(mmSQ_IND_INDEX,
5151 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5152 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5153 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5154 (SQ_IND_INDEX__FORCE_READ_MASK));
5155 return RREG32(mmSQ_IND_DATA);
5158 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5159 uint32_t wave, uint32_t thread,
5160 uint32_t regno, uint32_t num, uint32_t *out)
5162 WREG32(mmSQ_IND_INDEX,
5163 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5164 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5165 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5166 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5167 (SQ_IND_INDEX__FORCE_READ_MASK) |
5168 (SQ_IND_INDEX__AUTO_INCR_MASK));
5170 *(out++) = RREG32(mmSQ_IND_DATA);
5173 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5175 /* type 0 wave data */
5176 dst[(*no_fields)++] = 0;
5177 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5178 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5179 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5180 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5181 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5182 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5183 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5184 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5185 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5186 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5187 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5188 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5189 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5190 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5191 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5192 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5193 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5194 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5197 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5198 uint32_t wave, uint32_t start,
5199 uint32_t size, uint32_t *dst)
5202 adev, simd, wave, 0,
5203 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5207 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5208 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5209 .select_se_sh = &gfx_v8_0_select_se_sh,
5210 .read_wave_data = &gfx_v8_0_read_wave_data,
5211 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5212 .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5215 static int gfx_v8_0_early_init(void *handle)
5217 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5219 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5220 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5221 adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5222 gfx_v8_0_set_ring_funcs(adev);
5223 gfx_v8_0_set_irq_funcs(adev);
5224 gfx_v8_0_set_gds_init(adev);
5225 gfx_v8_0_set_rlc_funcs(adev);
5230 static int gfx_v8_0_late_init(void *handle)
5232 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5235 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5239 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5243 /* requires IBs so do in late init after IB pool is initialized */
5244 r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5248 r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5250 DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5254 r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5257 "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5265 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5268 if (((adev->asic_type == CHIP_POLARIS11) ||
5269 (adev->asic_type == CHIP_POLARIS12) ||
5270 (adev->asic_type == CHIP_VEGAM)) &&
5271 adev->powerplay.pp_funcs->set_powergating_by_smu)
5272 /* Send msg to SMU via Powerplay */
5273 amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
5275 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5278 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5281 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5284 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5287 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5290 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5293 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5296 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5299 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5301 /* Read any GFX register to wake up GFX. */
5303 RREG32(mmDB_RENDER_CONTROL);
5306 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5309 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5310 cz_enable_gfx_cg_power_gating(adev, true);
5311 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5312 cz_enable_gfx_pipeline_power_gating(adev, true);
5314 cz_enable_gfx_cg_power_gating(adev, false);
5315 cz_enable_gfx_pipeline_power_gating(adev, false);
5319 static int gfx_v8_0_set_powergating_state(void *handle,
5320 enum amd_powergating_state state)
5322 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5323 bool enable = (state == AMD_PG_STATE_GATE);
5325 if (amdgpu_sriov_vf(adev))
5328 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5329 AMD_PG_SUPPORT_RLC_SMU_HS |
5331 AMD_PG_SUPPORT_GFX_DMG))
5332 amdgpu_gfx_rlc_enter_safe_mode(adev);
5333 switch (adev->asic_type) {
5337 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5338 cz_enable_sck_slow_down_on_power_up(adev, true);
5339 cz_enable_sck_slow_down_on_power_down(adev, true);
5341 cz_enable_sck_slow_down_on_power_up(adev, false);
5342 cz_enable_sck_slow_down_on_power_down(adev, false);
5344 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5345 cz_enable_cp_power_gating(adev, true);
5347 cz_enable_cp_power_gating(adev, false);
5349 cz_update_gfx_cg_power_gating(adev, enable);
5351 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5352 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5354 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5356 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5357 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5359 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5361 case CHIP_POLARIS11:
5362 case CHIP_POLARIS12:
5364 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5365 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5367 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5369 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5370 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5372 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5374 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5375 polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5377 polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5382 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5383 AMD_PG_SUPPORT_RLC_SMU_HS |
5385 AMD_PG_SUPPORT_GFX_DMG))
5386 amdgpu_gfx_rlc_exit_safe_mode(adev);
5390 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5392 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5395 if (amdgpu_sriov_vf(adev))
5398 /* AMD_CG_SUPPORT_GFX_MGCG */
5399 data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5400 if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5401 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5403 /* AMD_CG_SUPPORT_GFX_CGLG */
5404 data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5405 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5406 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5408 /* AMD_CG_SUPPORT_GFX_CGLS */
5409 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5410 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5412 /* AMD_CG_SUPPORT_GFX_CGTS */
5413 data = RREG32(mmCGTS_SM_CTRL_REG);
5414 if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5415 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5417 /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5418 if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5419 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5421 /* AMD_CG_SUPPORT_GFX_RLC_LS */
5422 data = RREG32(mmRLC_MEM_SLP_CNTL);
5423 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5424 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5426 /* AMD_CG_SUPPORT_GFX_CP_LS */
5427 data = RREG32(mmCP_MEM_SLP_CNTL);
5428 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5429 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5432 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5433 uint32_t reg_addr, uint32_t cmd)
5437 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5439 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5440 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5442 data = RREG32(mmRLC_SERDES_WR_CTRL);
5443 if (adev->asic_type == CHIP_STONEY)
5444 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5445 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5446 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5447 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5448 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5449 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5450 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5451 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5452 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5454 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5455 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5456 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5457 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5458 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5459 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5460 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5461 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5462 RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5463 RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5464 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5465 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5466 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5467 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5468 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5470 WREG32(mmRLC_SERDES_WR_CTRL, data);
5473 #define MSG_ENTER_RLC_SAFE_MODE 1
5474 #define MSG_EXIT_RLC_SAFE_MODE 0
5475 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5476 #define RLC_GPR_REG2__REQ__SHIFT 0
5477 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5478 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5480 static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev)
5482 uint32_t rlc_setting;
5484 rlc_setting = RREG32(mmRLC_CNTL);
5485 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
5491 static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev)
5495 data = RREG32(mmRLC_CNTL);
5496 data |= RLC_SAFE_MODE__CMD_MASK;
5497 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5498 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5499 WREG32(mmRLC_SAFE_MODE, data);
5501 /* wait for RLC_SAFE_MODE */
5502 for (i = 0; i < adev->usec_timeout; i++) {
5503 if ((RREG32(mmRLC_GPM_STAT) &
5504 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5505 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5506 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5507 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5511 for (i = 0; i < adev->usec_timeout; i++) {
5512 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5518 static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev)
5523 data = RREG32(mmRLC_CNTL);
5524 data |= RLC_SAFE_MODE__CMD_MASK;
5525 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5526 WREG32(mmRLC_SAFE_MODE, data);
5528 for (i = 0; i < adev->usec_timeout; i++) {
5529 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5535 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5536 .is_rlc_enabled = gfx_v8_0_is_rlc_enabled,
5537 .set_safe_mode = gfx_v8_0_set_safe_mode,
5538 .unset_safe_mode = gfx_v8_0_unset_safe_mode,
5539 .init = gfx_v8_0_rlc_init,
5540 .get_csb_size = gfx_v8_0_get_csb_size,
5541 .get_csb_buffer = gfx_v8_0_get_csb_buffer,
5542 .get_cp_table_num = gfx_v8_0_cp_jump_table_num,
5543 .resume = gfx_v8_0_rlc_resume,
5544 .stop = gfx_v8_0_rlc_stop,
5545 .reset = gfx_v8_0_rlc_reset,
5546 .start = gfx_v8_0_rlc_start
5549 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5552 uint32_t temp, data;
5554 amdgpu_gfx_rlc_enter_safe_mode(adev);
5556 /* It is disabled by HW by default */
5557 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5558 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5559 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5560 /* 1 - RLC memory Light sleep */
5561 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5563 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5564 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5567 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5568 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5569 if (adev->flags & AMD_IS_APU)
5570 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5571 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5572 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5574 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5575 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5576 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5577 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5580 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5582 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5583 gfx_v8_0_wait_for_rlc_serdes(adev);
5585 /* 5 - clear mgcg override */
5586 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5588 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5589 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5590 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5591 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5592 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5593 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5594 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5595 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5596 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5597 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5598 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5599 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5601 WREG32(mmCGTS_SM_CTRL_REG, data);
5605 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5606 gfx_v8_0_wait_for_rlc_serdes(adev);
5608 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5609 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5610 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5611 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5612 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5613 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5615 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5617 /* 2 - disable MGLS in RLC */
5618 data = RREG32(mmRLC_MEM_SLP_CNTL);
5619 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5620 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5621 WREG32(mmRLC_MEM_SLP_CNTL, data);
5624 /* 3 - disable MGLS in CP */
5625 data = RREG32(mmCP_MEM_SLP_CNTL);
5626 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5627 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5628 WREG32(mmCP_MEM_SLP_CNTL, data);
5631 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5632 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5633 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5634 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5636 WREG32(mmCGTS_SM_CTRL_REG, data);
5638 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5639 gfx_v8_0_wait_for_rlc_serdes(adev);
5641 /* 6 - set mgcg override */
5642 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5646 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5647 gfx_v8_0_wait_for_rlc_serdes(adev);
5650 amdgpu_gfx_rlc_exit_safe_mode(adev);
5653 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5656 uint32_t temp, temp1, data, data1;
5658 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5660 amdgpu_gfx_rlc_enter_safe_mode(adev);
5662 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5663 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5664 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5666 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5668 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5669 gfx_v8_0_wait_for_rlc_serdes(adev);
5671 /* 2 - clear cgcg override */
5672 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5674 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5675 gfx_v8_0_wait_for_rlc_serdes(adev);
5677 /* 3 - write cmd to set CGLS */
5678 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5680 /* 4 - enable cgcg */
5681 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5683 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5685 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5687 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5688 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5691 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5693 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5697 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5699 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5700 * Cmp_busy/GFX_Idle interrupts
5702 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5704 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5705 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5708 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5709 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5710 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5712 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5714 /* read gfx register to wake up cgcg */
5715 RREG32(mmCB_CGTT_SCLK_CTRL);
5716 RREG32(mmCB_CGTT_SCLK_CTRL);
5717 RREG32(mmCB_CGTT_SCLK_CTRL);
5718 RREG32(mmCB_CGTT_SCLK_CTRL);
5720 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5721 gfx_v8_0_wait_for_rlc_serdes(adev);
5723 /* write cmd to Set CGCG Overrride */
5724 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5726 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5727 gfx_v8_0_wait_for_rlc_serdes(adev);
5729 /* write cmd to Clear CGLS */
5730 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5732 /* disable cgcg, cgls should be disabled too. */
5733 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5734 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5736 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5737 /* enable interrupts again for PG */
5738 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5741 gfx_v8_0_wait_for_rlc_serdes(adev);
5743 amdgpu_gfx_rlc_exit_safe_mode(adev);
5745 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5749 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5750 * === MGCG + MGLS + TS(CG/LS) ===
5752 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5753 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5755 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5756 * === CGCG + CGLS ===
5758 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5759 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5764 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5765 enum amd_clockgating_state state)
5767 uint32_t msg_id, pp_state = 0;
5768 uint32_t pp_support_state = 0;
5770 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5771 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5772 pp_support_state = PP_STATE_SUPPORT_LS;
5773 pp_state = PP_STATE_LS;
5775 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5776 pp_support_state |= PP_STATE_SUPPORT_CG;
5777 pp_state |= PP_STATE_CG;
5779 if (state == AMD_CG_STATE_UNGATE)
5782 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5786 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5787 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5790 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5791 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5792 pp_support_state = PP_STATE_SUPPORT_LS;
5793 pp_state = PP_STATE_LS;
5796 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5797 pp_support_state |= PP_STATE_SUPPORT_CG;
5798 pp_state |= PP_STATE_CG;
5801 if (state == AMD_CG_STATE_UNGATE)
5804 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5808 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5809 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5815 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5816 enum amd_clockgating_state state)
5819 uint32_t msg_id, pp_state = 0;
5820 uint32_t pp_support_state = 0;
5822 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5823 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5824 pp_support_state = PP_STATE_SUPPORT_LS;
5825 pp_state = PP_STATE_LS;
5827 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5828 pp_support_state |= PP_STATE_SUPPORT_CG;
5829 pp_state |= PP_STATE_CG;
5831 if (state == AMD_CG_STATE_UNGATE)
5834 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5838 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5839 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5842 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5843 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5844 pp_support_state = PP_STATE_SUPPORT_LS;
5845 pp_state = PP_STATE_LS;
5847 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5848 pp_support_state |= PP_STATE_SUPPORT_CG;
5849 pp_state |= PP_STATE_CG;
5851 if (state == AMD_CG_STATE_UNGATE)
5854 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5858 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5859 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5862 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5863 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5864 pp_support_state = PP_STATE_SUPPORT_LS;
5865 pp_state = PP_STATE_LS;
5868 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5869 pp_support_state |= PP_STATE_SUPPORT_CG;
5870 pp_state |= PP_STATE_CG;
5873 if (state == AMD_CG_STATE_UNGATE)
5876 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5880 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5881 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5884 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5885 pp_support_state = PP_STATE_SUPPORT_LS;
5887 if (state == AMD_CG_STATE_UNGATE)
5890 pp_state = PP_STATE_LS;
5892 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5896 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5897 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5900 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5901 pp_support_state = PP_STATE_SUPPORT_LS;
5903 if (state == AMD_CG_STATE_UNGATE)
5906 pp_state = PP_STATE_LS;
5907 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5911 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5912 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5918 static int gfx_v8_0_set_clockgating_state(void *handle,
5919 enum amd_clockgating_state state)
5921 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5923 if (amdgpu_sriov_vf(adev))
5926 switch (adev->asic_type) {
5930 gfx_v8_0_update_gfx_clock_gating(adev,
5931 state == AMD_CG_STATE_GATE);
5934 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
5936 case CHIP_POLARIS10:
5937 case CHIP_POLARIS11:
5938 case CHIP_POLARIS12:
5940 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
5948 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
5950 return ring->adev->wb.wb[ring->rptr_offs];
5953 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5955 struct amdgpu_device *adev = ring->adev;
5957 if (ring->use_doorbell)
5958 /* XXX check if swapping is necessary on BE */
5959 return ring->adev->wb.wb[ring->wptr_offs];
5961 return RREG32(mmCP_RB0_WPTR);
5964 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5966 struct amdgpu_device *adev = ring->adev;
5968 if (ring->use_doorbell) {
5969 /* XXX check if swapping is necessary on BE */
5970 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
5971 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
5973 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5974 (void)RREG32(mmCP_RB0_WPTR);
5978 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5980 u32 ref_and_mask, reg_mem_engine;
5982 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
5983 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
5986 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
5989 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
5996 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
5997 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6000 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6001 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6002 WAIT_REG_MEM_FUNCTION(3) | /* == */
6004 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6005 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6006 amdgpu_ring_write(ring, ref_and_mask);
6007 amdgpu_ring_write(ring, ref_and_mask);
6008 amdgpu_ring_write(ring, 0x20); /* poll interval */
6011 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6013 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6014 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6017 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6018 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6022 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6023 struct amdgpu_job *job,
6024 struct amdgpu_ib *ib,
6027 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6028 u32 header, control = 0;
6030 if (ib->flags & AMDGPU_IB_FLAG_CE)
6031 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6033 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6035 control |= ib->length_dw | (vmid << 24);
6037 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6038 control |= INDIRECT_BUFFER_PRE_ENB(1);
6040 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6041 gfx_v8_0_ring_emit_de_meta(ring);
6044 amdgpu_ring_write(ring, header);
6045 amdgpu_ring_write(ring,
6049 (ib->gpu_addr & 0xFFFFFFFC));
6050 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6051 amdgpu_ring_write(ring, control);
6054 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6055 struct amdgpu_job *job,
6056 struct amdgpu_ib *ib,
6059 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6060 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6062 /* Currently, there is a high possibility to get wave ID mismatch
6063 * between ME and GDS, leading to a hw deadlock, because ME generates
6064 * different wave IDs than the GDS expects. This situation happens
6065 * randomly when at least 5 compute pipes use GDS ordered append.
6066 * The wave IDs generated by ME are also wrong after suspend/resume.
6067 * Those are probably bugs somewhere else in the kernel driver.
6069 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
6070 * GDS to 0 for this ring (me/pipe).
6072 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
6073 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
6074 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
6075 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
6078 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6079 amdgpu_ring_write(ring,
6083 (ib->gpu_addr & 0xFFFFFFFC));
6084 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6085 amdgpu_ring_write(ring, control);
6088 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6089 u64 seq, unsigned flags)
6091 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6092 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6094 /* EVENT_WRITE_EOP - flush caches, send int */
6095 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6096 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6098 EOP_TC_WB_ACTION_EN |
6099 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6101 amdgpu_ring_write(ring, addr & 0xfffffffc);
6102 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6103 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6104 amdgpu_ring_write(ring, lower_32_bits(seq));
6105 amdgpu_ring_write(ring, upper_32_bits(seq));
6109 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6111 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6112 uint32_t seq = ring->fence_drv.sync_seq;
6113 uint64_t addr = ring->fence_drv.gpu_addr;
6115 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6116 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6117 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6118 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6119 amdgpu_ring_write(ring, addr & 0xfffffffc);
6120 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6121 amdgpu_ring_write(ring, seq);
6122 amdgpu_ring_write(ring, 0xffffffff);
6123 amdgpu_ring_write(ring, 4); /* poll interval */
6126 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6127 unsigned vmid, uint64_t pd_addr)
6129 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6131 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6133 /* wait for the invalidate to complete */
6134 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6135 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6136 WAIT_REG_MEM_FUNCTION(0) | /* always */
6137 WAIT_REG_MEM_ENGINE(0))); /* me */
6138 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6139 amdgpu_ring_write(ring, 0);
6140 amdgpu_ring_write(ring, 0); /* ref */
6141 amdgpu_ring_write(ring, 0); /* mask */
6142 amdgpu_ring_write(ring, 0x20); /* poll interval */
6144 /* compute doesn't have PFP */
6146 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6147 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6148 amdgpu_ring_write(ring, 0x0);
6152 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6154 return ring->adev->wb.wb[ring->wptr_offs];
6157 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6159 struct amdgpu_device *adev = ring->adev;
6161 /* XXX check if swapping is necessary on BE */
6162 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6163 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6166 static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
6169 struct amdgpu_device *adev = ring->adev;
6170 int pipe_num, tmp, reg;
6171 int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
6173 pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
6175 /* first me only has 2 entries, GFX and HP3D */
6179 reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num;
6181 tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
6185 static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev,
6186 struct amdgpu_ring *ring,
6191 struct amdgpu_ring *iring;
6193 mutex_lock(&adev->gfx.pipe_reserve_mutex);
6194 pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
6196 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6198 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6200 if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
6201 /* Clear all reservations - everyone reacquires all resources */
6202 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
6203 gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
6206 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
6207 gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
6210 /* Lower all pipes without a current reservation */
6211 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
6212 iring = &adev->gfx.gfx_ring[i];
6213 pipe = amdgpu_gfx_mec_queue_to_bit(adev,
6217 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6218 gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6221 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
6222 iring = &adev->gfx.compute_ring[i];
6223 pipe = amdgpu_gfx_mec_queue_to_bit(adev,
6227 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6228 gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6232 mutex_unlock(&adev->gfx.pipe_reserve_mutex);
6235 static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev,
6236 struct amdgpu_ring *ring,
6239 uint32_t pipe_priority = acquire ? 0x2 : 0x0;
6240 uint32_t queue_priority = acquire ? 0xf : 0x0;
6242 mutex_lock(&adev->srbm_mutex);
6243 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
6245 WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority);
6246 WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority);
6248 vi_srbm_select(adev, 0, 0, 0, 0);
6249 mutex_unlock(&adev->srbm_mutex);
6251 static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
6252 enum drm_sched_priority priority)
6254 struct amdgpu_device *adev = ring->adev;
6255 bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
6257 if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
6260 gfx_v8_0_hqd_set_priority(adev, ring, acquire);
6261 gfx_v8_0_pipe_reserve_resources(adev, ring, acquire);
6264 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6268 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6269 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6271 /* RELEASE_MEM - flush caches, send int */
6272 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6273 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6275 EOP_TC_WB_ACTION_EN |
6276 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6278 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6279 amdgpu_ring_write(ring, addr & 0xfffffffc);
6280 amdgpu_ring_write(ring, upper_32_bits(addr));
6281 amdgpu_ring_write(ring, lower_32_bits(seq));
6282 amdgpu_ring_write(ring, upper_32_bits(seq));
6285 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6286 u64 seq, unsigned int flags)
6288 /* we only allocate 32bit for each seq wb address */
6289 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6291 /* write fence seq to the "addr" */
6292 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6293 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6294 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6295 amdgpu_ring_write(ring, lower_32_bits(addr));
6296 amdgpu_ring_write(ring, upper_32_bits(addr));
6297 amdgpu_ring_write(ring, lower_32_bits(seq));
6299 if (flags & AMDGPU_FENCE_FLAG_INT) {
6300 /* set register to trigger INT */
6301 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6302 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6303 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6304 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6305 amdgpu_ring_write(ring, 0);
6306 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6310 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6312 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6313 amdgpu_ring_write(ring, 0);
6316 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6320 if (amdgpu_sriov_vf(ring->adev))
6321 gfx_v8_0_ring_emit_ce_meta(ring);
6323 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6324 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6325 gfx_v8_0_ring_emit_vgt_flush(ring);
6326 /* set load_global_config & load_global_uconfig */
6328 /* set load_cs_sh_regs */
6330 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6333 /* set load_ce_ram if preamble presented */
6334 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6337 /* still load_ce_ram if this is the first time preamble presented
6338 * although there is no context switch happens.
6340 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6344 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6345 amdgpu_ring_write(ring, dw2);
6346 amdgpu_ring_write(ring, 0);
6349 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6353 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6354 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6355 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6356 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6357 ret = ring->wptr & ring->buf_mask;
6358 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6362 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6366 BUG_ON(offset > ring->buf_mask);
6367 BUG_ON(ring->ring[offset] != 0x55aa55aa);
6369 cur = (ring->wptr & ring->buf_mask) - 1;
6370 if (likely(cur > offset))
6371 ring->ring[offset] = cur - offset;
6373 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6376 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6378 struct amdgpu_device *adev = ring->adev;
6380 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6381 amdgpu_ring_write(ring, 0 | /* src: register*/
6382 (5 << 8) | /* dst: memory */
6383 (1 << 20)); /* write confirm */
6384 amdgpu_ring_write(ring, reg);
6385 amdgpu_ring_write(ring, 0);
6386 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6387 adev->virt.reg_val_offs * 4));
6388 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6389 adev->virt.reg_val_offs * 4));
6392 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6397 switch (ring->funcs->type) {
6398 case AMDGPU_RING_TYPE_GFX:
6399 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6401 case AMDGPU_RING_TYPE_KIQ:
6402 cmd = 1 << 16; /* no inc addr */
6409 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6410 amdgpu_ring_write(ring, cmd);
6411 amdgpu_ring_write(ring, reg);
6412 amdgpu_ring_write(ring, 0);
6413 amdgpu_ring_write(ring, val);
6416 static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
6418 struct amdgpu_device *adev = ring->adev;
6421 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
6422 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
6423 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
6424 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
6425 WREG32(mmSQ_CMD, value);
6428 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6429 enum amdgpu_interrupt_state state)
6431 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6432 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6435 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6437 enum amdgpu_interrupt_state state)
6439 u32 mec_int_cntl, mec_int_cntl_reg;
6442 * amdgpu controls only the first MEC. That's why this function only
6443 * handles the setting of interrupts for this specific MEC. All other
6444 * pipes' interrupts are set by amdkfd.
6450 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6453 mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6456 mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6459 mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6462 DRM_DEBUG("invalid pipe %d\n", pipe);
6466 DRM_DEBUG("invalid me %d\n", me);
6471 case AMDGPU_IRQ_STATE_DISABLE:
6472 mec_int_cntl = RREG32(mec_int_cntl_reg);
6473 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6474 WREG32(mec_int_cntl_reg, mec_int_cntl);
6476 case AMDGPU_IRQ_STATE_ENABLE:
6477 mec_int_cntl = RREG32(mec_int_cntl_reg);
6478 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6479 WREG32(mec_int_cntl_reg, mec_int_cntl);
6486 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6487 struct amdgpu_irq_src *source,
6489 enum amdgpu_interrupt_state state)
6491 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6492 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6497 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6498 struct amdgpu_irq_src *source,
6500 enum amdgpu_interrupt_state state)
6502 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6503 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6508 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6509 struct amdgpu_irq_src *src,
6511 enum amdgpu_interrupt_state state)
6514 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6515 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6517 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6518 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6520 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6521 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6523 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6524 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6526 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6527 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6529 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6530 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6532 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6533 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6535 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6536 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6538 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6539 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6547 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6548 struct amdgpu_irq_src *source,
6550 enum amdgpu_interrupt_state state)
6555 case AMDGPU_IRQ_STATE_DISABLE:
6559 case AMDGPU_IRQ_STATE_ENABLE:
6567 WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6568 WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6569 WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6570 WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6571 WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6572 WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6574 WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6576 WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6578 WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6580 WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6582 WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6584 WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6586 WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6592 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6593 struct amdgpu_irq_src *source,
6595 enum amdgpu_interrupt_state state)
6600 case AMDGPU_IRQ_STATE_DISABLE:
6604 case AMDGPU_IRQ_STATE_ENABLE:
6612 WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6618 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6619 struct amdgpu_irq_src *source,
6620 struct amdgpu_iv_entry *entry)
6623 u8 me_id, pipe_id, queue_id;
6624 struct amdgpu_ring *ring;
6626 DRM_DEBUG("IH: CP EOP\n");
6627 me_id = (entry->ring_id & 0x0c) >> 2;
6628 pipe_id = (entry->ring_id & 0x03) >> 0;
6629 queue_id = (entry->ring_id & 0x70) >> 4;
6633 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6637 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6638 ring = &adev->gfx.compute_ring[i];
6639 /* Per-queue interrupt is supported for MEC starting from VI.
6640 * The interrupt can only be enabled/disabled per pipe instead of per queue.
6642 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6643 amdgpu_fence_process(ring);
6650 static void gfx_v8_0_fault(struct amdgpu_device *adev,
6651 struct amdgpu_iv_entry *entry)
6653 u8 me_id, pipe_id, queue_id;
6654 struct amdgpu_ring *ring;
6657 me_id = (entry->ring_id & 0x0c) >> 2;
6658 pipe_id = (entry->ring_id & 0x03) >> 0;
6659 queue_id = (entry->ring_id & 0x70) >> 4;
6663 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6667 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6668 ring = &adev->gfx.compute_ring[i];
6669 if (ring->me == me_id && ring->pipe == pipe_id &&
6670 ring->queue == queue_id)
6671 drm_sched_fault(&ring->sched);
6677 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6678 struct amdgpu_irq_src *source,
6679 struct amdgpu_iv_entry *entry)
6681 DRM_ERROR("Illegal register access in command stream\n");
6682 gfx_v8_0_fault(adev, entry);
6686 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6687 struct amdgpu_irq_src *source,
6688 struct amdgpu_iv_entry *entry)
6690 DRM_ERROR("Illegal instruction in command stream\n");
6691 gfx_v8_0_fault(adev, entry);
6695 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6696 struct amdgpu_irq_src *source,
6697 struct amdgpu_iv_entry *entry)
6699 DRM_ERROR("CP EDC/ECC error detected.");
6703 static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data)
6705 u32 enc, se_id, sh_id, cu_id;
6707 int sq_edc_source = -1;
6709 enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6710 se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6714 DRM_INFO("SQ general purpose intr detected:"
6715 "se_id %d, immed_overflow %d, host_reg_overflow %d,"
6716 "host_cmd_overflow %d, cmd_timestamp %d,"
6717 "reg_timestamp %d, thread_trace_buff_full %d,"
6718 "wlt %d, thread_trace %d.\n",
6720 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6721 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6722 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6723 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6724 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6725 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6726 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6727 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
6733 cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
6734 sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
6737 * This function can be called either directly from ISR
6738 * or from BH in which case we can access SQ_EDC_INFO
6742 mutex_lock(&adev->grbm_idx_mutex);
6743 gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
6745 sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
6747 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6748 mutex_unlock(&adev->grbm_idx_mutex);
6752 sprintf(type, "instruction intr");
6754 sprintf(type, "EDC/ECC error");
6758 "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
6759 "trap %s, sq_ed_info.source %s.\n",
6760 type, se_id, sh_id, cu_id,
6761 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
6762 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
6763 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
6764 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
6765 (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
6769 DRM_ERROR("SQ invalid encoding type\n.");
6773 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
6776 struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
6777 struct sq_work *sq_work = container_of(work, struct sq_work, work);
6779 gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data);
6782 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
6783 struct amdgpu_irq_src *source,
6784 struct amdgpu_iv_entry *entry)
6786 unsigned ih_data = entry->src_data[0];
6789 * Try to submit work so SQ_EDC_INFO can be accessed from
6790 * BH. If previous work submission hasn't finished yet
6791 * just print whatever info is possible directly from the ISR.
6793 if (work_pending(&adev->gfx.sq_work.work)) {
6794 gfx_v8_0_parse_sq_irq(adev, ih_data);
6796 adev->gfx.sq_work.ih_data = ih_data;
6797 schedule_work(&adev->gfx.sq_work.work);
6803 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6805 .early_init = gfx_v8_0_early_init,
6806 .late_init = gfx_v8_0_late_init,
6807 .sw_init = gfx_v8_0_sw_init,
6808 .sw_fini = gfx_v8_0_sw_fini,
6809 .hw_init = gfx_v8_0_hw_init,
6810 .hw_fini = gfx_v8_0_hw_fini,
6811 .suspend = gfx_v8_0_suspend,
6812 .resume = gfx_v8_0_resume,
6813 .is_idle = gfx_v8_0_is_idle,
6814 .wait_for_idle = gfx_v8_0_wait_for_idle,
6815 .check_soft_reset = gfx_v8_0_check_soft_reset,
6816 .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6817 .soft_reset = gfx_v8_0_soft_reset,
6818 .post_soft_reset = gfx_v8_0_post_soft_reset,
6819 .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6820 .set_powergating_state = gfx_v8_0_set_powergating_state,
6821 .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6824 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6825 .type = AMDGPU_RING_TYPE_GFX,
6827 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6828 .support_64bit_ptrs = false,
6829 .get_rptr = gfx_v8_0_ring_get_rptr,
6830 .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6831 .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6832 .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6834 7 + /* PIPELINE_SYNC */
6835 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
6836 8 + /* FENCE for VM_FLUSH */
6837 20 + /* GDS switch */
6838 4 + /* double SWITCH_BUFFER,
6839 the first COND_EXEC jump to the place just
6840 prior to this double SWITCH_BUFFER */
6848 8 + 8 + /* FENCE x2 */
6849 2, /* SWITCH_BUFFER */
6850 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6851 .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6852 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6853 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6854 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6855 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6856 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6857 .test_ring = gfx_v8_0_ring_test_ring,
6858 .test_ib = gfx_v8_0_ring_test_ib,
6859 .insert_nop = amdgpu_ring_insert_nop,
6860 .pad_ib = amdgpu_ring_generic_pad_ib,
6861 .emit_switch_buffer = gfx_v8_ring_emit_sb,
6862 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6863 .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6864 .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6865 .emit_wreg = gfx_v8_0_ring_emit_wreg,
6866 .soft_recovery = gfx_v8_0_ring_soft_recovery,
6869 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6870 .type = AMDGPU_RING_TYPE_COMPUTE,
6872 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6873 .support_64bit_ptrs = false,
6874 .get_rptr = gfx_v8_0_ring_get_rptr,
6875 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6876 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6878 20 + /* gfx_v8_0_ring_emit_gds_switch */
6879 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6880 5 + /* hdp_invalidate */
6881 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6882 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
6883 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6884 .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
6885 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6886 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6887 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6888 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6889 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6890 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6891 .test_ring = gfx_v8_0_ring_test_ring,
6892 .test_ib = gfx_v8_0_ring_test_ib,
6893 .insert_nop = amdgpu_ring_insert_nop,
6894 .pad_ib = amdgpu_ring_generic_pad_ib,
6895 .set_priority = gfx_v8_0_ring_set_priority_compute,
6896 .emit_wreg = gfx_v8_0_ring_emit_wreg,
6899 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6900 .type = AMDGPU_RING_TYPE_KIQ,
6902 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6903 .support_64bit_ptrs = false,
6904 .get_rptr = gfx_v8_0_ring_get_rptr,
6905 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6906 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6908 20 + /* gfx_v8_0_ring_emit_gds_switch */
6909 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6910 5 + /* hdp_invalidate */
6911 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6912 17 + /* gfx_v8_0_ring_emit_vm_flush */
6913 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6914 .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
6915 .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6916 .test_ring = gfx_v8_0_ring_test_ring,
6917 .insert_nop = amdgpu_ring_insert_nop,
6918 .pad_ib = amdgpu_ring_generic_pad_ib,
6919 .emit_rreg = gfx_v8_0_ring_emit_rreg,
6920 .emit_wreg = gfx_v8_0_ring_emit_wreg,
6923 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6927 adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
6929 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6930 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6932 for (i = 0; i < adev->gfx.num_compute_rings; i++)
6933 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6936 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6937 .set = gfx_v8_0_set_eop_interrupt_state,
6938 .process = gfx_v8_0_eop_irq,
6941 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6942 .set = gfx_v8_0_set_priv_reg_fault_state,
6943 .process = gfx_v8_0_priv_reg_irq,
6946 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6947 .set = gfx_v8_0_set_priv_inst_fault_state,
6948 .process = gfx_v8_0_priv_inst_irq,
6951 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
6952 .set = gfx_v8_0_set_cp_ecc_int_state,
6953 .process = gfx_v8_0_cp_ecc_error_irq,
6956 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
6957 .set = gfx_v8_0_set_sq_int_state,
6958 .process = gfx_v8_0_sq_irq,
6961 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6963 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6964 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6966 adev->gfx.priv_reg_irq.num_types = 1;
6967 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6969 adev->gfx.priv_inst_irq.num_types = 1;
6970 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6972 adev->gfx.cp_ecc_error_irq.num_types = 1;
6973 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
6975 adev->gfx.sq_irq.num_types = 1;
6976 adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
6979 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6981 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6984 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6986 /* init asci gds info */
6987 adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
6988 adev->gds.gws_size = 64;
6989 adev->gds.oa_size = 16;
6990 adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
6993 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7001 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7002 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7004 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7007 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7011 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7012 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7014 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7016 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7019 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7021 int i, j, k, counter, active_cu_number = 0;
7022 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7023 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7024 unsigned disable_masks[4 * 2];
7027 memset(cu_info, 0, sizeof(*cu_info));
7029 if (adev->flags & AMD_IS_APU)
7032 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7034 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7036 mutex_lock(&adev->grbm_idx_mutex);
7037 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7038 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7042 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7044 gfx_v8_0_set_user_cu_inactive_bitmap(
7045 adev, disable_masks[i * 2 + j]);
7046 bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7047 cu_info->bitmap[i][j] = bitmap;
7049 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7050 if (bitmap & mask) {
7051 if (counter < ao_cu_num)
7057 active_cu_number += counter;
7059 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7060 cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7063 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7064 mutex_unlock(&adev->grbm_idx_mutex);
7066 cu_info->number = active_cu_number;
7067 cu_info->ao_cu_mask = ao_cu_mask;
7068 cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7069 cu_info->max_waves_per_simd = 10;
7070 cu_info->max_scratch_slots_per_cu = 32;
7071 cu_info->wave_front_size = 64;
7072 cu_info->lds_size = 64;
7075 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7077 .type = AMD_IP_BLOCK_TYPE_GFX,
7081 .funcs = &gfx_v8_0_ip_funcs,
7084 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7086 .type = AMD_IP_BLOCK_TYPE_GFX,
7090 .funcs = &gfx_v8_0_ip_funcs,
7093 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7095 uint64_t ce_payload_addr;
7098 struct vi_ce_ib_state regular;
7099 struct vi_ce_ib_state_chained_ib chained;
7102 if (ring->adev->virt.chained_ib_support) {
7103 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7104 offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7105 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7107 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7108 offsetof(struct vi_gfx_meta_data, ce_payload);
7109 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7112 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7113 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7114 WRITE_DATA_DST_SEL(8) |
7116 WRITE_DATA_CACHE_POLICY(0));
7117 amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7118 amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7119 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7122 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7124 uint64_t de_payload_addr, gds_addr, csa_addr;
7127 struct vi_de_ib_state regular;
7128 struct vi_de_ib_state_chained_ib chained;
7131 csa_addr = amdgpu_csa_vaddr(ring->adev);
7132 gds_addr = csa_addr + 4096;
7133 if (ring->adev->virt.chained_ib_support) {
7134 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7135 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7136 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7137 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7139 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7140 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7141 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7142 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7145 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7146 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7147 WRITE_DATA_DST_SEL(8) |
7149 WRITE_DATA_CACHE_POLICY(0));
7150 amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7151 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7152 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);