2 * Copyright 2016 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
31 #include "amdgpu_gfx.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
40 #include "vega10_enum.h"
41 #include "hdp/hdp_4_0_offset.h"
43 #include "soc15_common.h"
44 #include "clearstate_gfx9.h"
45 #include "v9_structs.h"
47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
49 #include "amdgpu_ras.h"
53 #define GFX9_NUM_GFX_RINGS 1
54 #define GFX9_MEC_HPD_SIZE 4096
55 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
56 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
58 #define mmPWR_MISC_CNTL_STATUS 0x0183
59 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX 0
60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT 0x0
61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT 0x1
62 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK 0x00000001L
63 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK 0x00000006L
65 #define mmGCEA_PROBE_MAP 0x070c
66 #define mmGCEA_PROBE_MAP_BASE_IDX 0
70 #define mmTCP_CHAN_STEER_0_ARCT 0x0b03
71 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX 0
72 #define mmTCP_CHAN_STEER_1_ARCT 0x0b04
73 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX 0
74 #define mmTCP_CHAN_STEER_2_ARCT 0x0b09
75 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX 0
76 #define mmTCP_CHAN_STEER_3_ARCT 0x0b0a
77 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX 0
78 #define mmTCP_CHAN_STEER_4_ARCT 0x0b0b
79 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX 0
80 #define mmTCP_CHAN_STEER_5_ARCT 0x0b0c
81 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX 0
83 enum ta_ras_gfx_subblock {
85 TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
86 TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
87 TA_RAS_BLOCK__GFX_CPC_UCODE,
88 TA_RAS_BLOCK__GFX_DC_STATE_ME1,
89 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
90 TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
91 TA_RAS_BLOCK__GFX_DC_STATE_ME2,
92 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
93 TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
94 TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
96 TA_RAS_BLOCK__GFX_CPF_INDEX_START,
97 TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
98 TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
99 TA_RAS_BLOCK__GFX_CPF_TAG,
100 TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
102 TA_RAS_BLOCK__GFX_CPG_INDEX_START,
103 TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
104 TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
105 TA_RAS_BLOCK__GFX_CPG_TAG,
106 TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
108 TA_RAS_BLOCK__GFX_GDS_INDEX_START,
109 TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
110 TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
111 TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
112 TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
113 TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
114 TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
116 TA_RAS_BLOCK__GFX_SPI_SR_MEM,
118 TA_RAS_BLOCK__GFX_SQ_INDEX_START,
119 TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
120 TA_RAS_BLOCK__GFX_SQ_LDS_D,
121 TA_RAS_BLOCK__GFX_SQ_LDS_I,
122 TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
123 TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
125 TA_RAS_BLOCK__GFX_SQC_INDEX_START,
127 TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
128 TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
129 TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
130 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
131 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
132 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
133 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
134 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
135 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
136 TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
137 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
139 TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
140 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
141 TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
142 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
143 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
144 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
145 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
146 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
147 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
148 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
149 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
150 TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
151 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
153 TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
154 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
155 TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
156 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
157 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
158 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
159 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
160 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
161 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
162 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
163 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
164 TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
165 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
166 TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
168 TA_RAS_BLOCK__GFX_TA_INDEX_START,
169 TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
170 TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
171 TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
172 TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
173 TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
174 TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
176 TA_RAS_BLOCK__GFX_TCA_INDEX_START,
177 TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
178 TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
179 TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
180 /* TCC (5 sub-ranges)*/
181 TA_RAS_BLOCK__GFX_TCC_INDEX_START,
183 TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
184 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
185 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
186 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
187 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
188 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
189 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
190 TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
191 TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
192 TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
194 TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
195 TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
196 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
197 TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
198 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
200 TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
201 TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
202 TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
203 TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
204 TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
205 TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
206 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
207 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
208 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
209 TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
210 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
212 TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
213 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
214 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
215 TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
216 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
218 TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
219 TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
220 TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
221 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
222 TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
223 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
224 TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
226 TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
228 TA_RAS_BLOCK__GFX_TCP_INDEX_START,
229 TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
230 TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
231 TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
232 TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
233 TA_RAS_BLOCK__GFX_TCP_DB_RAM,
234 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
235 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
236 TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
238 TA_RAS_BLOCK__GFX_TD_INDEX_START,
239 TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
240 TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
241 TA_RAS_BLOCK__GFX_TD_CS_FIFO,
242 TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
243 /* EA (3 sub-ranges)*/
244 TA_RAS_BLOCK__GFX_EA_INDEX_START,
246 TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
247 TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
248 TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
249 TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
250 TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
251 TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
252 TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
253 TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
254 TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
255 TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
257 TA_RAS_BLOCK__GFX_EA_INDEX1_START,
258 TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
259 TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
260 TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
261 TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
262 TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
263 TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
264 TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
265 TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
267 TA_RAS_BLOCK__GFX_EA_INDEX2_START,
268 TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
269 TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
270 TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
271 TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
272 TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
273 TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
275 TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
277 TA_RAS_BLOCK__UTC_VML2_WALKER,
278 /* UTC ATC L2 2MB cache*/
279 TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
280 /* UTC ATC L2 4KB cache*/
281 TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
282 TA_RAS_BLOCK__GFX_MAX
285 struct ras_gfx_subblock {
288 int hw_supported_error_type;
289 int sw_supported_error_type;
292 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h) \
293 [AMDGPU_RAS_BLOCK__##subblock] = { \
295 TA_RAS_BLOCK__##subblock, \
296 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)), \
297 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)), \
300 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
301 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
302 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
303 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
304 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
305 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
306 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
307 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
308 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
309 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
310 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
311 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
312 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
313 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
314 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
315 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
316 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
317 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
319 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
321 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
322 AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
323 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
324 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
325 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
326 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
327 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
328 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
330 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
332 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
334 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
336 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
338 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
340 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
342 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
344 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
346 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
348 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
350 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
352 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
354 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
356 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
358 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
360 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
362 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
364 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
366 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
368 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
370 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
372 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
374 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
376 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
377 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
378 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
379 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
380 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
381 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
382 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
383 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
384 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
386 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
388 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
390 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
392 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
394 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
395 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
396 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
397 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
398 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
399 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
400 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
401 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
402 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
403 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
404 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
405 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
407 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
408 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
410 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
412 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
414 AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
415 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
416 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
417 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
418 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
419 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
420 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
421 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
422 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
423 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
424 AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
425 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
426 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
427 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
428 AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
429 AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
430 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
431 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
432 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
433 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
434 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
435 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
436 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
437 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
438 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
439 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
440 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
441 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
442 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
443 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
444 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
445 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
446 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
447 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
450 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
452 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
453 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
454 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
455 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
456 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
457 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
458 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
459 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
460 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
461 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
462 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
463 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
464 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
465 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
466 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
467 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
468 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
469 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
470 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
471 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
474 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
476 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
477 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
478 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
479 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
480 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
481 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
482 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
483 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
484 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
485 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
486 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
487 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
488 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
489 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
490 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
491 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
492 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
493 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
496 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
498 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
499 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
500 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
501 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
502 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
503 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
504 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
505 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
506 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
507 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
508 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
511 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
513 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
514 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
515 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
516 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
517 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
518 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
519 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
520 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
521 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
522 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
523 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
524 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
525 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
526 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
527 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
528 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
529 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
530 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
531 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
532 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
533 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
534 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
535 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
536 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
539 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
541 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
542 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
543 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
544 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
545 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
546 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
547 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
550 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
552 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
553 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
554 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
555 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
556 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
557 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
558 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
559 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
560 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
561 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
562 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
563 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
564 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
565 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
566 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
567 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
568 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
569 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
570 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
573 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
575 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
576 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
577 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
578 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
579 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
580 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
581 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
582 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
583 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
584 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
585 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
586 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
589 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
591 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
592 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
593 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
596 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
598 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
599 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
600 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
601 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
602 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
603 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
604 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
605 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
606 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
607 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
608 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
609 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
610 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
611 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
612 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
613 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
616 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
618 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
619 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
620 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
621 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
622 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
623 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
624 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
625 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
626 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
627 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
628 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
629 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
630 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
633 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
635 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
636 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
637 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
638 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
639 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
640 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
641 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
642 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
643 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
644 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
647 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = {
648 {SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)},
649 {SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)},
652 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
654 mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
655 mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
656 mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
657 mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
658 mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
659 mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
660 mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
661 mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
664 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
666 mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
667 mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
668 mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
669 mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
670 mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
671 mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
672 mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
673 mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
676 void gfx_v9_0_rlcg_wreg(struct amdgpu_device *adev, u32 offset, u32 v)
678 static void *scratch_reg0;
679 static void *scratch_reg1;
680 static void *scratch_reg2;
681 static void *scratch_reg3;
682 static void *spare_int;
683 static uint32_t grbm_cntl;
684 static uint32_t grbm_idx;
686 scratch_reg0 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4;
687 scratch_reg1 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4;
688 scratch_reg2 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2)*4;
689 scratch_reg3 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3)*4;
690 spare_int = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4;
692 grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL;
693 grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX;
695 if (amdgpu_sriov_runtime(adev)) {
696 pr_err("shouldn't call rlcg write register during runtime\n");
700 if (offset == grbm_cntl || offset == grbm_idx) {
701 if (offset == grbm_cntl)
702 writel(v, scratch_reg2);
703 else if (offset == grbm_idx)
704 writel(v, scratch_reg3);
706 writel(v, ((void __iomem *)adev->rmmio) + (offset * 4));
709 uint32_t retries = 50000;
711 writel(v, scratch_reg0);
712 writel(offset | 0x80000000, scratch_reg1);
713 writel(1, spare_int);
714 for (i = 0; i < retries; i++) {
717 tmp = readl(scratch_reg1);
718 if (!(tmp & 0x80000000))
724 pr_err("timeout: rlcg program reg:0x%05x failed !\n", offset);
729 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
730 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
731 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
732 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
734 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
735 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
736 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
737 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
738 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
739 struct amdgpu_cu_info *cu_info);
740 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
741 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
742 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
743 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
744 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
745 void *ras_error_status);
746 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
748 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
750 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
753 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
754 amdgpu_ring_write(kiq_ring,
755 PACKET3_SET_RESOURCES_VMID_MASK(0) |
756 /* vmid_mask:0* queue_type:0 (KIQ) */
757 PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
758 amdgpu_ring_write(kiq_ring,
759 lower_32_bits(queue_mask)); /* queue mask lo */
760 amdgpu_ring_write(kiq_ring,
761 upper_32_bits(queue_mask)); /* queue mask hi */
762 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
763 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
764 amdgpu_ring_write(kiq_ring, 0); /* oac mask */
765 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
768 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
769 struct amdgpu_ring *ring)
771 struct amdgpu_device *adev = kiq_ring->adev;
772 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
773 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
774 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
776 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
777 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
778 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
779 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
780 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
781 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
782 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
783 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
784 /*queue_type: normal compute queue */
785 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
786 /* alloc format: all_on_one_pipe */
787 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
788 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
789 /* num_queues: must be 1 */
790 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
791 amdgpu_ring_write(kiq_ring,
792 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
793 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
794 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
795 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
796 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
799 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
800 struct amdgpu_ring *ring,
801 enum amdgpu_unmap_queues_action action,
802 u64 gpu_addr, u64 seq)
804 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
806 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
807 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
808 PACKET3_UNMAP_QUEUES_ACTION(action) |
809 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
810 PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
811 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
812 amdgpu_ring_write(kiq_ring,
813 PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
815 if (action == PREEMPT_QUEUES_NO_UNMAP) {
816 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
817 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
818 amdgpu_ring_write(kiq_ring, seq);
820 amdgpu_ring_write(kiq_ring, 0);
821 amdgpu_ring_write(kiq_ring, 0);
822 amdgpu_ring_write(kiq_ring, 0);
826 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
827 struct amdgpu_ring *ring,
831 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
833 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
834 amdgpu_ring_write(kiq_ring,
835 PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
836 PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
837 PACKET3_QUERY_STATUS_COMMAND(2));
838 /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
839 amdgpu_ring_write(kiq_ring,
840 PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
841 PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
842 amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
843 amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
844 amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
845 amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
848 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
849 uint16_t pasid, uint32_t flush_type,
852 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
853 amdgpu_ring_write(kiq_ring,
854 PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
855 PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
856 PACKET3_INVALIDATE_TLBS_PASID(pasid) |
857 PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
860 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
861 .kiq_set_resources = gfx_v9_0_kiq_set_resources,
862 .kiq_map_queues = gfx_v9_0_kiq_map_queues,
863 .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
864 .kiq_query_status = gfx_v9_0_kiq_query_status,
865 .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
866 .set_resources_size = 8,
867 .map_queues_size = 7,
868 .unmap_queues_size = 6,
869 .query_status_size = 7,
870 .invalidate_tlbs_size = 2,
873 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
875 adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs;
878 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
880 switch (adev->asic_type) {
882 soc15_program_register_sequence(adev,
883 golden_settings_gc_9_0,
884 ARRAY_SIZE(golden_settings_gc_9_0));
885 soc15_program_register_sequence(adev,
886 golden_settings_gc_9_0_vg10,
887 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
890 soc15_program_register_sequence(adev,
891 golden_settings_gc_9_2_1,
892 ARRAY_SIZE(golden_settings_gc_9_2_1));
893 soc15_program_register_sequence(adev,
894 golden_settings_gc_9_2_1_vg12,
895 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
898 soc15_program_register_sequence(adev,
899 golden_settings_gc_9_0,
900 ARRAY_SIZE(golden_settings_gc_9_0));
901 soc15_program_register_sequence(adev,
902 golden_settings_gc_9_0_vg20,
903 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
906 soc15_program_register_sequence(adev,
907 golden_settings_gc_9_4_1_arct,
908 ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
911 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
912 ARRAY_SIZE(golden_settings_gc_9_1));
913 if (adev->rev_id >= 8)
914 soc15_program_register_sequence(adev,
915 golden_settings_gc_9_1_rv2,
916 ARRAY_SIZE(golden_settings_gc_9_1_rv2));
918 soc15_program_register_sequence(adev,
919 golden_settings_gc_9_1_rv1,
920 ARRAY_SIZE(golden_settings_gc_9_1_rv1));
923 soc15_program_register_sequence(adev,
924 golden_settings_gc_9_1_rn,
925 ARRAY_SIZE(golden_settings_gc_9_1_rn));
926 return; /* for renoir, don't need common goldensetting */
931 if (adev->asic_type != CHIP_ARCTURUS)
932 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
933 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
936 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
938 adev->gfx.scratch.num_reg = 8;
939 adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
940 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
943 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
944 bool wc, uint32_t reg, uint32_t val)
946 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
947 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
948 WRITE_DATA_DST_SEL(0) |
949 (wc ? WR_CONFIRM : 0));
950 amdgpu_ring_write(ring, reg);
951 amdgpu_ring_write(ring, 0);
952 amdgpu_ring_write(ring, val);
955 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
956 int mem_space, int opt, uint32_t addr0,
957 uint32_t addr1, uint32_t ref, uint32_t mask,
960 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
961 amdgpu_ring_write(ring,
962 /* memory (1) or register (0) */
963 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
964 WAIT_REG_MEM_OPERATION(opt) | /* wait */
965 WAIT_REG_MEM_FUNCTION(3) | /* equal */
966 WAIT_REG_MEM_ENGINE(eng_sel)));
969 BUG_ON(addr0 & 0x3); /* Dword align */
970 amdgpu_ring_write(ring, addr0);
971 amdgpu_ring_write(ring, addr1);
972 amdgpu_ring_write(ring, ref);
973 amdgpu_ring_write(ring, mask);
974 amdgpu_ring_write(ring, inv); /* poll interval */
977 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
979 struct amdgpu_device *adev = ring->adev;
985 r = amdgpu_gfx_scratch_get(adev, &scratch);
989 WREG32(scratch, 0xCAFEDEAD);
990 r = amdgpu_ring_alloc(ring, 3);
992 goto error_free_scratch;
994 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
995 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
996 amdgpu_ring_write(ring, 0xDEADBEEF);
997 amdgpu_ring_commit(ring);
999 for (i = 0; i < adev->usec_timeout; i++) {
1000 tmp = RREG32(scratch);
1001 if (tmp == 0xDEADBEEF)
1006 if (i >= adev->usec_timeout)
1010 amdgpu_gfx_scratch_free(adev, scratch);
1014 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1016 struct amdgpu_device *adev = ring->adev;
1017 struct amdgpu_ib ib;
1018 struct dma_fence *f = NULL;
1025 r = amdgpu_device_wb_get(adev, &index);
1029 gpu_addr = adev->wb.gpu_addr + (index * 4);
1030 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1031 memset(&ib, 0, sizeof(ib));
1032 r = amdgpu_ib_get(adev, NULL, 16, &ib);
1036 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1037 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1038 ib.ptr[2] = lower_32_bits(gpu_addr);
1039 ib.ptr[3] = upper_32_bits(gpu_addr);
1040 ib.ptr[4] = 0xDEADBEEF;
1043 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1047 r = dma_fence_wait_timeout(f, false, timeout);
1055 tmp = adev->wb.wb[index];
1056 if (tmp == 0xDEADBEEF)
1062 amdgpu_ib_free(adev, &ib, NULL);
1065 amdgpu_device_wb_free(adev, index);
1070 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1072 release_firmware(adev->gfx.pfp_fw);
1073 adev->gfx.pfp_fw = NULL;
1074 release_firmware(adev->gfx.me_fw);
1075 adev->gfx.me_fw = NULL;
1076 release_firmware(adev->gfx.ce_fw);
1077 adev->gfx.ce_fw = NULL;
1078 release_firmware(adev->gfx.rlc_fw);
1079 adev->gfx.rlc_fw = NULL;
1080 release_firmware(adev->gfx.mec_fw);
1081 adev->gfx.mec_fw = NULL;
1082 release_firmware(adev->gfx.mec2_fw);
1083 adev->gfx.mec2_fw = NULL;
1085 kfree(adev->gfx.rlc.register_list_format);
1088 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
1090 const struct rlc_firmware_header_v2_1 *rlc_hdr;
1092 rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
1093 adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
1094 adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
1095 adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
1096 adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
1097 adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
1098 adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
1099 adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
1100 adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
1101 adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
1102 adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
1103 adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
1104 adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
1105 adev->gfx.rlc.reg_list_format_direct_reg_list_length =
1106 le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
1109 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1111 adev->gfx.me_fw_write_wait = false;
1112 adev->gfx.mec_fw_write_wait = false;
1114 if ((adev->asic_type != CHIP_ARCTURUS) &&
1115 ((adev->gfx.mec_fw_version < 0x000001a5) ||
1116 (adev->gfx.mec_feature_version < 46) ||
1117 (adev->gfx.pfp_fw_version < 0x000000b7) ||
1118 (adev->gfx.pfp_feature_version < 46)))
1119 DRM_WARN_ONCE("CP firmware version too old, please update!");
1121 switch (adev->asic_type) {
1123 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1124 (adev->gfx.me_feature_version >= 42) &&
1125 (adev->gfx.pfp_fw_version >= 0x000000b1) &&
1126 (adev->gfx.pfp_feature_version >= 42))
1127 adev->gfx.me_fw_write_wait = true;
1129 if ((adev->gfx.mec_fw_version >= 0x00000193) &&
1130 (adev->gfx.mec_feature_version >= 42))
1131 adev->gfx.mec_fw_write_wait = true;
1134 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1135 (adev->gfx.me_feature_version >= 44) &&
1136 (adev->gfx.pfp_fw_version >= 0x000000b2) &&
1137 (adev->gfx.pfp_feature_version >= 44))
1138 adev->gfx.me_fw_write_wait = true;
1140 if ((adev->gfx.mec_fw_version >= 0x00000196) &&
1141 (adev->gfx.mec_feature_version >= 44))
1142 adev->gfx.mec_fw_write_wait = true;
1145 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1146 (adev->gfx.me_feature_version >= 44) &&
1147 (adev->gfx.pfp_fw_version >= 0x000000b2) &&
1148 (adev->gfx.pfp_feature_version >= 44))
1149 adev->gfx.me_fw_write_wait = true;
1151 if ((adev->gfx.mec_fw_version >= 0x00000197) &&
1152 (adev->gfx.mec_feature_version >= 44))
1153 adev->gfx.mec_fw_write_wait = true;
1156 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1157 (adev->gfx.me_feature_version >= 42) &&
1158 (adev->gfx.pfp_fw_version >= 0x000000b1) &&
1159 (adev->gfx.pfp_feature_version >= 42))
1160 adev->gfx.me_fw_write_wait = true;
1162 if ((adev->gfx.mec_fw_version >= 0x00000192) &&
1163 (adev->gfx.mec_feature_version >= 42))
1164 adev->gfx.mec_fw_write_wait = true;
1167 adev->gfx.me_fw_write_wait = true;
1168 adev->gfx.mec_fw_write_wait = true;
1173 struct amdgpu_gfxoff_quirk {
1181 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
1182 /* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
1183 { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
1184 /* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */
1185 { 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
1186 /* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
1187 { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
1191 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
1193 const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
1195 while (p && p->chip_device != 0) {
1196 if (pdev->vendor == p->chip_vendor &&
1197 pdev->device == p->chip_device &&
1198 pdev->subsystem_vendor == p->subsys_vendor &&
1199 pdev->subsystem_device == p->subsys_device &&
1200 pdev->revision == p->revision) {
1208 static bool is_raven_kicker(struct amdgpu_device *adev)
1210 if (adev->pm.fw_version >= 0x41e2b)
1216 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1218 if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
1219 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1221 switch (adev->asic_type) {
1227 if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) &&
1228 ((!is_raven_kicker(adev) &&
1229 adev->gfx.rlc_fw_version < 531) ||
1230 (adev->gfx.rlc_feature_version < 1) ||
1231 !adev->gfx.rlc.is_rlc_v2_1))
1232 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1234 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1235 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1237 AMD_PG_SUPPORT_RLC_SMU_HS;
1240 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1241 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1243 AMD_PG_SUPPORT_RLC_SMU_HS;
1250 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1251 const char *chip_name)
1255 struct amdgpu_firmware_info *info = NULL;
1256 const struct common_firmware_header *header = NULL;
1257 const struct gfx_firmware_header_v1_0 *cp_hdr;
1259 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1260 err = reject_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1263 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1266 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1267 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1268 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1270 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1271 err = reject_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1274 err = amdgpu_ucode_validate(adev->gfx.me_fw);
1277 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1278 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1279 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1281 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1282 err = reject_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1285 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1288 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1289 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1290 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1292 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1293 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1294 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1295 info->fw = adev->gfx.pfp_fw;
1296 header = (const struct common_firmware_header *)info->fw->data;
1297 adev->firmware.fw_size +=
1298 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1300 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1301 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1302 info->fw = adev->gfx.me_fw;
1303 header = (const struct common_firmware_header *)info->fw->data;
1304 adev->firmware.fw_size +=
1305 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1307 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1308 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1309 info->fw = adev->gfx.ce_fw;
1310 header = (const struct common_firmware_header *)info->fw->data;
1311 adev->firmware.fw_size +=
1312 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1318 "gfx9: Failed to load firmware \"%s\"\n",
1320 release_firmware(adev->gfx.pfp_fw);
1321 adev->gfx.pfp_fw = NULL;
1322 release_firmware(adev->gfx.me_fw);
1323 adev->gfx.me_fw = NULL;
1324 release_firmware(adev->gfx.ce_fw);
1325 adev->gfx.ce_fw = NULL;
1330 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1331 const char *chip_name)
1335 struct amdgpu_firmware_info *info = NULL;
1336 const struct common_firmware_header *header = NULL;
1337 const struct rlc_firmware_header_v2_0 *rlc_hdr;
1338 unsigned int *tmp = NULL;
1340 uint16_t version_major;
1341 uint16_t version_minor;
1342 uint32_t smu_version;
1345 if (!strcmp(chip_name, "picasso") &&
1346 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1347 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1348 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1349 else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1350 (smu_version >= 0x41e2b))
1352 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1354 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1356 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1357 err = reject_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1360 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1361 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1363 version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1364 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1365 if (version_major == 2 && version_minor == 1)
1366 adev->gfx.rlc.is_rlc_v2_1 = true;
1368 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1369 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1370 adev->gfx.rlc.save_and_restore_offset =
1371 le32_to_cpu(rlc_hdr->save_and_restore_offset);
1372 adev->gfx.rlc.clear_state_descriptor_offset =
1373 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1374 adev->gfx.rlc.avail_scratch_ram_locations =
1375 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1376 adev->gfx.rlc.reg_restore_list_size =
1377 le32_to_cpu(rlc_hdr->reg_restore_list_size);
1378 adev->gfx.rlc.reg_list_format_start =
1379 le32_to_cpu(rlc_hdr->reg_list_format_start);
1380 adev->gfx.rlc.reg_list_format_separate_start =
1381 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1382 adev->gfx.rlc.starting_offsets_start =
1383 le32_to_cpu(rlc_hdr->starting_offsets_start);
1384 adev->gfx.rlc.reg_list_format_size_bytes =
1385 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1386 adev->gfx.rlc.reg_list_size_bytes =
1387 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1388 adev->gfx.rlc.register_list_format =
1389 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1390 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1391 if (!adev->gfx.rlc.register_list_format) {
1396 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1397 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1398 for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1399 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1401 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1403 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1404 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1405 for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1406 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1408 if (adev->gfx.rlc.is_rlc_v2_1)
1409 gfx_v9_0_init_rlc_ext_microcode(adev);
1411 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1412 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1413 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1414 info->fw = adev->gfx.rlc_fw;
1415 header = (const struct common_firmware_header *)info->fw->data;
1416 adev->firmware.fw_size +=
1417 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1419 if (adev->gfx.rlc.is_rlc_v2_1 &&
1420 adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1421 adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1422 adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1423 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1424 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1425 info->fw = adev->gfx.rlc_fw;
1426 adev->firmware.fw_size +=
1427 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1429 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1430 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1431 info->fw = adev->gfx.rlc_fw;
1432 adev->firmware.fw_size +=
1433 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1435 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1436 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1437 info->fw = adev->gfx.rlc_fw;
1438 adev->firmware.fw_size +=
1439 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1446 "gfx9: Failed to load firmware \"%s\"\n",
1448 release_firmware(adev->gfx.rlc_fw);
1449 adev->gfx.rlc_fw = NULL;
1454 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1455 const char *chip_name)
1459 struct amdgpu_firmware_info *info = NULL;
1460 const struct common_firmware_header *header = NULL;
1461 const struct gfx_firmware_header_v1_0 *cp_hdr;
1463 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1464 err = reject_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1467 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1470 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1471 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1472 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1475 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1476 err = reject_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1478 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1481 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1482 adev->gfx.mec2_fw->data;
1483 adev->gfx.mec2_fw_version =
1484 le32_to_cpu(cp_hdr->header.ucode_version);
1485 adev->gfx.mec2_feature_version =
1486 le32_to_cpu(cp_hdr->ucode_feature_version);
1489 adev->gfx.mec2_fw = NULL;
1492 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1493 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1494 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1495 info->fw = adev->gfx.mec_fw;
1496 header = (const struct common_firmware_header *)info->fw->data;
1497 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1498 adev->firmware.fw_size +=
1499 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1501 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1502 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1503 info->fw = adev->gfx.mec_fw;
1504 adev->firmware.fw_size +=
1505 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1507 if (adev->gfx.mec2_fw) {
1508 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1509 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1510 info->fw = adev->gfx.mec2_fw;
1511 header = (const struct common_firmware_header *)info->fw->data;
1512 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1513 adev->firmware.fw_size +=
1514 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1516 /* TODO: Determine if MEC2 JT FW loading can be removed
1517 for all GFX V9 asic and above */
1518 if (adev->asic_type != CHIP_ARCTURUS &&
1519 adev->asic_type != CHIP_RENOIR) {
1520 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1521 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1522 info->fw = adev->gfx.mec2_fw;
1523 adev->firmware.fw_size +=
1524 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1531 gfx_v9_0_check_if_need_gfxoff(adev);
1532 gfx_v9_0_check_fw_write_wait(adev);
1535 "gfx9: Failed to load firmware \"%s\"\n",
1537 release_firmware(adev->gfx.mec_fw);
1538 adev->gfx.mec_fw = NULL;
1539 release_firmware(adev->gfx.mec2_fw);
1540 adev->gfx.mec2_fw = NULL;
1545 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1547 const char *chip_name;
1552 switch (adev->asic_type) {
1554 chip_name = "vega10";
1557 chip_name = "vega12";
1560 chip_name = "vega20";
1563 if (adev->rev_id >= 8)
1564 chip_name = "raven2";
1565 else if (adev->pdev->device == 0x15d8)
1566 chip_name = "picasso";
1568 chip_name = "raven";
1571 chip_name = "arcturus";
1574 chip_name = "renoir";
1580 /* No CPG in Arcturus */
1581 if (adev->asic_type != CHIP_ARCTURUS) {
1582 r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1587 r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1591 r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1598 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1601 const struct cs_section_def *sect = NULL;
1602 const struct cs_extent_def *ext = NULL;
1604 /* begin clear state */
1606 /* context control state */
1609 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1610 for (ext = sect->section; ext->extent != NULL; ++ext) {
1611 if (sect->id == SECT_CONTEXT)
1612 count += 2 + ext->reg_count;
1618 /* end clear state */
1626 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1627 volatile u32 *buffer)
1630 const struct cs_section_def *sect = NULL;
1631 const struct cs_extent_def *ext = NULL;
1633 if (adev->gfx.rlc.cs_data == NULL)
1638 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1639 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1641 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1642 buffer[count++] = cpu_to_le32(0x80000000);
1643 buffer[count++] = cpu_to_le32(0x80000000);
1645 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1646 for (ext = sect->section; ext->extent != NULL; ++ext) {
1647 if (sect->id == SECT_CONTEXT) {
1649 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1650 buffer[count++] = cpu_to_le32(ext->reg_index -
1651 PACKET3_SET_CONTEXT_REG_START);
1652 for (i = 0; i < ext->reg_count; i++)
1653 buffer[count++] = cpu_to_le32(ext->extent[i]);
1660 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1661 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1663 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1664 buffer[count++] = cpu_to_le32(0);
1667 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1669 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1670 uint32_t pg_always_on_cu_num = 2;
1671 uint32_t always_on_cu_num;
1673 uint32_t mask, cu_bitmap, counter;
1675 if (adev->flags & AMD_IS_APU)
1676 always_on_cu_num = 4;
1677 else if (adev->asic_type == CHIP_VEGA12)
1678 always_on_cu_num = 8;
1680 always_on_cu_num = 12;
1682 mutex_lock(&adev->grbm_idx_mutex);
1683 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1684 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1688 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1690 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1691 if (cu_info->bitmap[i][j] & mask) {
1692 if (counter == pg_always_on_cu_num)
1693 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1694 if (counter < always_on_cu_num)
1703 WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1704 cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1707 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1708 mutex_unlock(&adev->grbm_idx_mutex);
1711 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1715 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1716 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1717 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1718 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1719 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1721 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1722 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1724 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1725 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1727 mutex_lock(&adev->grbm_idx_mutex);
1728 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1729 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1730 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1732 /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1733 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1734 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1735 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1736 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1738 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1739 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1742 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1745 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1746 * programmed in gfx_v9_0_init_always_on_cu_mask()
1749 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1750 * but used for RLC_LB_CNTL configuration */
1751 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1752 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1753 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1754 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1755 mutex_unlock(&adev->grbm_idx_mutex);
1757 gfx_v9_0_init_always_on_cu_mask(adev);
1760 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1764 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1765 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1766 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1767 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1768 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1770 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1771 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1773 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1774 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1776 mutex_lock(&adev->grbm_idx_mutex);
1777 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1778 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1779 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1781 /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1782 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1783 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1784 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1785 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1787 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1788 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1791 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1794 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1795 * programmed in gfx_v9_0_init_always_on_cu_mask()
1798 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1799 * but used for RLC_LB_CNTL configuration */
1800 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1801 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1802 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1803 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1804 mutex_unlock(&adev->grbm_idx_mutex);
1806 gfx_v9_0_init_always_on_cu_mask(adev);
1809 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1811 WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1814 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1819 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1821 const struct cs_section_def *cs_data;
1824 adev->gfx.rlc.cs_data = gfx9_cs_data;
1826 cs_data = adev->gfx.rlc.cs_data;
1829 /* init clear state block */
1830 r = amdgpu_gfx_rlc_init_csb(adev);
1835 if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
1836 /* TODO: double check the cp_table_size for RV */
1837 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1838 r = amdgpu_gfx_rlc_init_cpt(adev);
1843 switch (adev->asic_type) {
1845 gfx_v9_0_init_lbpw(adev);
1848 gfx_v9_4_init_lbpw(adev);
1854 /* init spm vmid with 0xf */
1855 if (adev->gfx.rlc.funcs->update_spm_vmid)
1856 adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
1861 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1863 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1864 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1867 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1871 const __le32 *fw_data;
1874 size_t mec_hpd_size;
1876 const struct gfx_firmware_header_v1_0 *mec_hdr;
1878 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1880 /* take ownership of the relevant compute queues */
1881 amdgpu_gfx_compute_queue_acquire(adev);
1882 mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1884 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1885 AMDGPU_GEM_DOMAIN_VRAM,
1886 &adev->gfx.mec.hpd_eop_obj,
1887 &adev->gfx.mec.hpd_eop_gpu_addr,
1890 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1891 gfx_v9_0_mec_fini(adev);
1895 memset(hpd, 0, mec_hpd_size);
1897 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1898 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1900 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1902 fw_data = (const __le32 *)
1903 (adev->gfx.mec_fw->data +
1904 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1905 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1907 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1908 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1909 &adev->gfx.mec.mec_fw_obj,
1910 &adev->gfx.mec.mec_fw_gpu_addr,
1913 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1914 gfx_v9_0_mec_fini(adev);
1918 memcpy(fw, fw_data, fw_size);
1920 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1921 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1926 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1928 WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1929 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1930 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1931 (address << SQ_IND_INDEX__INDEX__SHIFT) |
1932 (SQ_IND_INDEX__FORCE_READ_MASK));
1933 return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1936 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1937 uint32_t wave, uint32_t thread,
1938 uint32_t regno, uint32_t num, uint32_t *out)
1940 WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1941 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1942 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1943 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
1944 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1945 (SQ_IND_INDEX__FORCE_READ_MASK) |
1946 (SQ_IND_INDEX__AUTO_INCR_MASK));
1948 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1951 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1953 /* type 1 wave data */
1954 dst[(*no_fields)++] = 1;
1955 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1956 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1957 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1958 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1959 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1960 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1961 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1962 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1963 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1964 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1965 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1966 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1967 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1968 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1971 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1972 uint32_t wave, uint32_t start,
1973 uint32_t size, uint32_t *dst)
1976 adev, simd, wave, 0,
1977 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1980 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1981 uint32_t wave, uint32_t thread,
1982 uint32_t start, uint32_t size,
1986 adev, simd, wave, thread,
1987 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1990 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1991 u32 me, u32 pipe, u32 q, u32 vm)
1993 soc15_grbm_select(adev, me, pipe, q, vm);
1996 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1997 .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1998 .select_se_sh = &gfx_v9_0_select_se_sh,
1999 .read_wave_data = &gfx_v9_0_read_wave_data,
2000 .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
2001 .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
2002 .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
2003 .ras_error_inject = &gfx_v9_0_ras_error_inject,
2004 .query_ras_error_count = &gfx_v9_0_query_ras_error_count,
2005 .reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
2008 static const struct amdgpu_gfx_funcs gfx_v9_4_gfx_funcs = {
2009 .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
2010 .select_se_sh = &gfx_v9_0_select_se_sh,
2011 .read_wave_data = &gfx_v9_0_read_wave_data,
2012 .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
2013 .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
2014 .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
2015 .ras_error_inject = &gfx_v9_4_ras_error_inject,
2016 .query_ras_error_count = &gfx_v9_4_query_ras_error_count,
2017 .reset_ras_error_count = &gfx_v9_4_reset_ras_error_count,
2020 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
2025 adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
2027 switch (adev->asic_type) {
2029 adev->gfx.config.max_hw_contexts = 8;
2030 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2031 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2032 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2033 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2034 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
2037 adev->gfx.config.max_hw_contexts = 8;
2038 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2039 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2040 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2041 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2042 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
2043 DRM_INFO("fix gfx.config for vega12\n");
2046 adev->gfx.config.max_hw_contexts = 8;
2047 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2048 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2049 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2050 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2051 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2052 gb_addr_config &= ~0xf3e777ff;
2053 gb_addr_config |= 0x22014042;
2054 /* check vbios table if gpu info is not available */
2055 err = amdgpu_atomfirmware_get_gfx_info(adev);
2060 adev->gfx.config.max_hw_contexts = 8;
2061 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2062 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2063 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2064 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2065 if (adev->rev_id >= 8)
2066 gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
2068 gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
2071 adev->gfx.funcs = &gfx_v9_4_gfx_funcs;
2072 adev->gfx.config.max_hw_contexts = 8;
2073 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2074 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2075 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2076 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2077 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2078 gb_addr_config &= ~0xf3e777ff;
2079 gb_addr_config |= 0x22014042;
2082 adev->gfx.config.max_hw_contexts = 8;
2083 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2084 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2085 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
2086 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2087 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2088 gb_addr_config &= ~0xf3e777ff;
2089 gb_addr_config |= 0x22010042;
2096 adev->gfx.config.gb_addr_config = gb_addr_config;
2098 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
2100 adev->gfx.config.gb_addr_config,
2104 adev->gfx.config.max_tile_pipes =
2105 adev->gfx.config.gb_addr_config_fields.num_pipes;
2107 adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
2109 adev->gfx.config.gb_addr_config,
2112 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
2114 adev->gfx.config.gb_addr_config,
2116 MAX_COMPRESSED_FRAGS);
2117 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
2119 adev->gfx.config.gb_addr_config,
2122 adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
2124 adev->gfx.config.gb_addr_config,
2126 NUM_SHADER_ENGINES);
2127 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
2129 adev->gfx.config.gb_addr_config,
2131 PIPE_INTERLEAVE_SIZE));
2136 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2137 int mec, int pipe, int queue)
2141 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2143 ring = &adev->gfx.compute_ring[ring_id];
2148 ring->queue = queue;
2150 ring->ring_obj = NULL;
2151 ring->use_doorbell = true;
2152 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2153 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2154 + (ring_id * GFX9_MEC_HPD_SIZE);
2155 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2157 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2158 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2161 /* type-2 packets are deprecated on MEC, use type-3 instead */
2162 r = amdgpu_ring_init(adev, ring, 1024,
2163 &adev->gfx.eop_irq, irq_type);
2171 static int gfx_v9_0_sw_init(void *handle)
2173 int i, j, k, r, ring_id;
2174 struct amdgpu_ring *ring;
2175 struct amdgpu_kiq *kiq;
2176 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2178 switch (adev->asic_type) {
2185 adev->gfx.mec.num_mec = 2;
2188 adev->gfx.mec.num_mec = 1;
2192 adev->gfx.mec.num_pipe_per_mec = 4;
2193 adev->gfx.mec.num_queue_per_pipe = 8;
2196 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2200 /* Privileged reg */
2201 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2202 &adev->gfx.priv_reg_irq);
2206 /* Privileged inst */
2207 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2208 &adev->gfx.priv_inst_irq);
2213 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2214 &adev->gfx.cp_ecc_error_irq);
2219 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2220 &adev->gfx.cp_ecc_error_irq);
2224 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2226 gfx_v9_0_scratch_init(adev);
2228 r = gfx_v9_0_init_microcode(adev);
2230 DRM_ERROR("Failed to load gfx firmware!\n");
2234 r = adev->gfx.rlc.funcs->init(adev);
2236 DRM_ERROR("Failed to init rlc BOs!\n");
2240 r = gfx_v9_0_mec_init(adev);
2242 DRM_ERROR("Failed to init MEC BOs!\n");
2246 /* set up the gfx ring */
2247 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2248 ring = &adev->gfx.gfx_ring[i];
2249 ring->ring_obj = NULL;
2251 sprintf(ring->name, "gfx");
2253 sprintf(ring->name, "gfx_%d", i);
2254 ring->use_doorbell = true;
2255 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2256 r = amdgpu_ring_init(adev, ring, 1024,
2257 &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
2262 /* set up the compute queues - allocate horizontally across pipes */
2264 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2265 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2266 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2267 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2270 r = gfx_v9_0_compute_ring_init(adev,
2281 r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2283 DRM_ERROR("Failed to init KIQ BOs!\n");
2287 kiq = &adev->gfx.kiq;
2288 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2292 /* create MQD for all compute queues as wel as KIQ for SRIOV case */
2293 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2297 adev->gfx.ce_ram_size = 0x8000;
2299 r = gfx_v9_0_gpu_early_init(adev);
2307 static int gfx_v9_0_sw_fini(void *handle)
2310 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2312 amdgpu_gfx_ras_fini(adev);
2314 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2315 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2316 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2317 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2319 amdgpu_gfx_mqd_sw_fini(adev);
2320 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2321 amdgpu_gfx_kiq_fini(adev);
2323 gfx_v9_0_mec_fini(adev);
2324 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
2325 if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
2326 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2327 &adev->gfx.rlc.cp_table_gpu_addr,
2328 (void **)&adev->gfx.rlc.cp_table_ptr);
2330 gfx_v9_0_free_microcode(adev);
2336 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2341 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
2345 if (instance == 0xffffffff)
2346 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2348 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2350 if (se_num == 0xffffffff)
2351 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2353 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2355 if (sh_num == 0xffffffff)
2356 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2358 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2360 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2363 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2367 data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2368 data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2370 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2371 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2373 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2374 adev->gfx.config.max_sh_per_se);
2376 return (~data) & mask;
2379 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2384 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2385 adev->gfx.config.max_sh_per_se;
2387 mutex_lock(&adev->grbm_idx_mutex);
2388 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2389 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2390 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2391 data = gfx_v9_0_get_rb_active_bitmap(adev);
2392 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2393 rb_bitmap_width_per_sh);
2396 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2397 mutex_unlock(&adev->grbm_idx_mutex);
2399 adev->gfx.config.backend_enable_mask = active_rbs;
2400 adev->gfx.config.num_rbs = hweight32(active_rbs);
2403 #define DEFAULT_SH_MEM_BASES (0x6000)
2404 #define FIRST_COMPUTE_VMID (8)
2405 #define LAST_COMPUTE_VMID (16)
2406 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2409 uint32_t sh_mem_config;
2410 uint32_t sh_mem_bases;
2413 * Configure apertures:
2414 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
2415 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
2416 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
2418 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2420 sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2421 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2422 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2424 mutex_lock(&adev->srbm_mutex);
2425 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2426 soc15_grbm_select(adev, 0, 0, 0, i);
2427 /* CP and shaders */
2428 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2429 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2431 soc15_grbm_select(adev, 0, 0, 0, 0);
2432 mutex_unlock(&adev->srbm_mutex);
2434 /* Initialize all compute VMIDs to have no GDS, GWS, or OA
2435 acccess. These should be enabled by FW for target VMIDs. */
2436 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2437 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2438 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2439 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2440 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2444 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2449 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2450 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2451 * the driver can enable them for graphics. VMID0 should maintain
2452 * access so that HWS firmware can save/restore entries.
2454 for (vmid = 1; vmid < 16; vmid++) {
2455 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2456 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2457 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2458 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2462 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
2466 switch (adev->asic_type) {
2468 tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
2469 tmp = REG_SET_FIELD(tmp, SQ_CONFIG,
2470 DISABLE_BARRIER_WAITCNT, 1);
2471 WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
2478 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2483 WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2485 gfx_v9_0_tiling_mode_table_init(adev);
2487 gfx_v9_0_setup_rb(adev);
2488 gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2489 adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2491 /* XXX SH_MEM regs */
2492 /* where to put LDS, scratch, GPUVM in FSA64 space */
2493 mutex_lock(&adev->srbm_mutex);
2494 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2495 soc15_grbm_select(adev, 0, 0, 0, i);
2496 /* CP and shaders */
2498 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2499 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2500 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2502 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2503 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2505 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2506 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2507 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2509 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2510 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2511 (adev->gmc.private_aperture_start >> 48));
2512 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2513 (adev->gmc.shared_aperture_start >> 48));
2514 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2517 soc15_grbm_select(adev, 0, 0, 0, 0);
2519 mutex_unlock(&adev->srbm_mutex);
2521 gfx_v9_0_init_compute_vmid(adev);
2522 gfx_v9_0_init_gds_vmid(adev);
2523 gfx_v9_0_init_sq_config(adev);
2526 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2531 mutex_lock(&adev->grbm_idx_mutex);
2532 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2533 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2534 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2535 for (k = 0; k < adev->usec_timeout; k++) {
2536 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2540 if (k == adev->usec_timeout) {
2541 gfx_v9_0_select_se_sh(adev, 0xffffffff,
2542 0xffffffff, 0xffffffff);
2543 mutex_unlock(&adev->grbm_idx_mutex);
2544 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2550 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2551 mutex_unlock(&adev->grbm_idx_mutex);
2553 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2554 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2555 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2556 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2557 for (k = 0; k < adev->usec_timeout; k++) {
2558 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2564 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2567 u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2569 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2570 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2571 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2572 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2574 WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2577 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2579 adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2581 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2582 adev->gfx.rlc.clear_state_gpu_addr >> 32);
2583 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2584 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2585 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2586 adev->gfx.rlc.clear_state_size);
2589 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2590 int indirect_offset,
2592 int *unique_indirect_regs,
2593 int unique_indirect_reg_count,
2594 int *indirect_start_offsets,
2595 int *indirect_start_offsets_count,
2596 int max_start_offsets_count)
2600 for (; indirect_offset < list_size; indirect_offset++) {
2601 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2602 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2603 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2605 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2606 indirect_offset += 2;
2608 /* look for the matching indice */
2609 for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2610 if (unique_indirect_regs[idx] ==
2611 register_list_format[indirect_offset] ||
2612 !unique_indirect_regs[idx])
2616 BUG_ON(idx >= unique_indirect_reg_count);
2618 if (!unique_indirect_regs[idx])
2619 unique_indirect_regs[idx] = register_list_format[indirect_offset];
2626 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2628 int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2629 int unique_indirect_reg_count = 0;
2631 int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2632 int indirect_start_offsets_count = 0;
2638 u32 *register_list_format =
2639 kmemdup(adev->gfx.rlc.register_list_format,
2640 adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2641 if (!register_list_format)
2644 /* setup unique_indirect_regs array and indirect_start_offsets array */
2645 unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2646 gfx_v9_1_parse_ind_reg_list(register_list_format,
2647 adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2648 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2649 unique_indirect_regs,
2650 unique_indirect_reg_count,
2651 indirect_start_offsets,
2652 &indirect_start_offsets_count,
2653 ARRAY_SIZE(indirect_start_offsets));
2655 /* enable auto inc in case it is disabled */
2656 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2657 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2658 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2660 /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2661 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2662 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2663 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2664 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2665 adev->gfx.rlc.register_restore[i]);
2667 /* load indirect register */
2668 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2669 adev->gfx.rlc.reg_list_format_start);
2671 /* direct register portion */
2672 for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2673 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2674 register_list_format[i]);
2676 /* indirect register portion */
2677 while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2678 if (register_list_format[i] == 0xFFFFFFFF) {
2679 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2683 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2684 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2686 for (j = 0; j < unique_indirect_reg_count; j++) {
2687 if (register_list_format[i] == unique_indirect_regs[j]) {
2688 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2693 BUG_ON(j >= unique_indirect_reg_count);
2698 /* set save/restore list size */
2699 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2700 list_size = list_size >> 1;
2701 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2702 adev->gfx.rlc.reg_restore_list_size);
2703 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2705 /* write the starting offsets to RLC scratch ram */
2706 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2707 adev->gfx.rlc.starting_offsets_start);
2708 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2709 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2710 indirect_start_offsets[i]);
2712 /* load unique indirect regs*/
2713 for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2714 if (unique_indirect_regs[i] != 0) {
2715 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2716 + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2717 unique_indirect_regs[i] & 0x3FFFF);
2719 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2720 + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2721 unique_indirect_regs[i] >> 20);
2725 kfree(register_list_format);
2729 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2731 WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2734 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2738 uint32_t default_data = 0;
2740 default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2741 if (enable == true) {
2742 /* enable GFXIP control over CGPG */
2743 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2744 if(default_data != data)
2745 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2748 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2749 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2750 if(default_data != data)
2751 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2753 /* restore GFXIP control over GCPG */
2754 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2755 if(default_data != data)
2756 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2760 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2764 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2765 AMD_PG_SUPPORT_GFX_SMG |
2766 AMD_PG_SUPPORT_GFX_DMG)) {
2767 /* init IDLE_POLL_COUNT = 60 */
2768 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2769 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2770 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2771 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2773 /* init RLC PG Delay */
2775 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2776 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2777 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2778 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2779 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2781 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2782 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2783 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2784 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2786 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2787 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2788 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2789 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2791 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2792 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2794 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2795 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2796 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2798 pwr_10_0_gfxip_control_over_cgpg(adev, true);
2802 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2806 uint32_t default_data = 0;
2808 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2809 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2810 SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2812 if (default_data != data)
2813 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2816 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2820 uint32_t default_data = 0;
2822 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2823 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2824 SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2826 if(default_data != data)
2827 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2830 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2834 uint32_t default_data = 0;
2836 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2837 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2840 if(default_data != data)
2841 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2844 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2847 uint32_t data, default_data;
2849 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2850 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2851 GFX_POWER_GATING_ENABLE,
2853 if(default_data != data)
2854 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2857 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2860 uint32_t data, default_data;
2862 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2863 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2864 GFX_PIPELINE_PG_ENABLE,
2866 if(default_data != data)
2867 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2870 /* read any GFX register to wake up GFX */
2871 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2874 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2877 uint32_t data, default_data;
2879 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2880 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2881 STATIC_PER_CU_PG_ENABLE,
2883 if(default_data != data)
2884 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2887 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2890 uint32_t data, default_data;
2892 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2893 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2894 DYN_PER_CU_PG_ENABLE,
2896 if(default_data != data)
2897 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2900 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2902 gfx_v9_0_init_csb(adev);
2905 * Rlc save restore list is workable since v2_1.
2906 * And it's needed by gfxoff feature.
2908 if (adev->gfx.rlc.is_rlc_v2_1) {
2909 if (adev->asic_type == CHIP_VEGA12 ||
2910 (adev->asic_type == CHIP_RAVEN &&
2912 gfx_v9_1_init_rlc_save_restore_list(adev);
2913 gfx_v9_0_enable_save_restore_machine(adev);
2916 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2917 AMD_PG_SUPPORT_GFX_SMG |
2918 AMD_PG_SUPPORT_GFX_DMG |
2920 AMD_PG_SUPPORT_GDS |
2921 AMD_PG_SUPPORT_RLC_SMU_HS)) {
2922 WREG32(mmRLC_JUMP_TABLE_RESTORE,
2923 adev->gfx.rlc.cp_table_gpu_addr >> 8);
2924 gfx_v9_0_init_gfx_power_gating(adev);
2928 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2930 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2931 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2932 gfx_v9_0_wait_for_rlc_serdes(adev);
2935 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2937 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2939 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2943 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2945 #ifdef AMDGPU_RLC_DEBUG_RETRY
2949 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2952 /* carrizo do enable cp interrupt after cp inited */
2953 if (!(adev->flags & AMD_IS_APU)) {
2954 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2958 #ifdef AMDGPU_RLC_DEBUG_RETRY
2959 /* RLC_GPM_GENERAL_6 : RLC Ucode version */
2960 rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2961 if(rlc_ucode_ver == 0x108) {
2962 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2963 rlc_ucode_ver, adev->gfx.rlc_fw_version);
2964 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2965 * default is 0x9C4 to create a 100us interval */
2966 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2967 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2968 * to disable the page fault retry interrupts, default is
2970 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2975 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2977 const struct rlc_firmware_header_v2_0 *hdr;
2978 const __le32 *fw_data;
2979 unsigned i, fw_size;
2981 if (!adev->gfx.rlc_fw)
2984 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2985 amdgpu_ucode_print_rlc_hdr(&hdr->header);
2987 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2988 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2989 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2991 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2992 RLCG_UCODE_LOADING_START_ADDRESS);
2993 for (i = 0; i < fw_size; i++)
2994 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2995 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3000 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
3004 if (amdgpu_sriov_vf(adev)) {
3005 gfx_v9_0_init_csb(adev);
3009 adev->gfx.rlc.funcs->stop(adev);
3012 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
3014 gfx_v9_0_init_pg(adev);
3016 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3017 /* legacy rlc firmware loading */
3018 r = gfx_v9_0_rlc_load_microcode(adev);
3023 switch (adev->asic_type) {
3025 if (amdgpu_lbpw == 0)
3026 gfx_v9_0_enable_lbpw(adev, false);
3028 gfx_v9_0_enable_lbpw(adev, true);
3031 if (amdgpu_lbpw > 0)
3032 gfx_v9_0_enable_lbpw(adev, true);
3034 gfx_v9_0_enable_lbpw(adev, false);
3040 adev->gfx.rlc.funcs->start(adev);
3045 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3048 u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3050 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3051 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3052 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3054 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
3055 adev->gfx.gfx_ring[i].sched.ready = false;
3057 WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3061 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3063 const struct gfx_firmware_header_v1_0 *pfp_hdr;
3064 const struct gfx_firmware_header_v1_0 *ce_hdr;
3065 const struct gfx_firmware_header_v1_0 *me_hdr;
3066 const __le32 *fw_data;
3067 unsigned i, fw_size;
3069 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3072 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3073 adev->gfx.pfp_fw->data;
3074 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3075 adev->gfx.ce_fw->data;
3076 me_hdr = (const struct gfx_firmware_header_v1_0 *)
3077 adev->gfx.me_fw->data;
3079 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3080 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3081 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3083 gfx_v9_0_cp_gfx_enable(adev, false);
3086 fw_data = (const __le32 *)
3087 (adev->gfx.pfp_fw->data +
3088 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3089 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3090 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3091 for (i = 0; i < fw_size; i++)
3092 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3093 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3096 fw_data = (const __le32 *)
3097 (adev->gfx.ce_fw->data +
3098 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3099 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3100 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3101 for (i = 0; i < fw_size; i++)
3102 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3103 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3106 fw_data = (const __le32 *)
3107 (adev->gfx.me_fw->data +
3108 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3109 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3110 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3111 for (i = 0; i < fw_size; i++)
3112 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3113 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3118 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3120 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3121 const struct cs_section_def *sect = NULL;
3122 const struct cs_extent_def *ext = NULL;
3126 WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3127 WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3129 gfx_v9_0_cp_gfx_enable(adev, true);
3131 r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3133 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3137 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3138 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3140 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3141 amdgpu_ring_write(ring, 0x80000000);
3142 amdgpu_ring_write(ring, 0x80000000);
3144 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3145 for (ext = sect->section; ext->extent != NULL; ++ext) {
3146 if (sect->id == SECT_CONTEXT) {
3147 amdgpu_ring_write(ring,
3148 PACKET3(PACKET3_SET_CONTEXT_REG,
3150 amdgpu_ring_write(ring,
3151 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3152 for (i = 0; i < ext->reg_count; i++)
3153 amdgpu_ring_write(ring, ext->extent[i]);
3158 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3159 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3161 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3162 amdgpu_ring_write(ring, 0);
3164 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3165 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3166 amdgpu_ring_write(ring, 0x8000);
3167 amdgpu_ring_write(ring, 0x8000);
3169 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3170 tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3171 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3172 amdgpu_ring_write(ring, tmp);
3173 amdgpu_ring_write(ring, 0);
3175 amdgpu_ring_commit(ring);
3180 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3182 struct amdgpu_ring *ring;
3185 u64 rb_addr, rptr_addr, wptr_gpu_addr;
3187 /* Set the write pointer delay */
3188 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3190 /* set the RB to use vmid 0 */
3191 WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3193 /* Set ring buffer size */
3194 ring = &adev->gfx.gfx_ring[0];
3195 rb_bufsz = order_base_2(ring->ring_size / 8);
3196 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3197 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3199 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3201 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3203 /* Initialize the ring buffer's write pointers */
3205 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3206 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3208 /* set the wb address wether it's enabled or not */
3209 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3210 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3211 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3213 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3214 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3215 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3218 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3220 rb_addr = ring->gpu_addr >> 8;
3221 WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3222 WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3224 tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3225 if (ring->use_doorbell) {
3226 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3227 DOORBELL_OFFSET, ring->doorbell_index);
3228 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3231 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3233 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3235 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3236 DOORBELL_RANGE_LOWER, ring->doorbell_index);
3237 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3239 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3240 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3243 /* start the ring */
3244 gfx_v9_0_cp_gfx_start(adev);
3245 ring->sched.ready = true;
3250 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3255 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3257 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3258 (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3259 for (i = 0; i < adev->gfx.num_compute_rings; i++)
3260 adev->gfx.compute_ring[i].sched.ready = false;
3261 adev->gfx.kiq.ring.sched.ready = false;
3266 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3268 const struct gfx_firmware_header_v1_0 *mec_hdr;
3269 const __le32 *fw_data;
3273 if (!adev->gfx.mec_fw)
3276 gfx_v9_0_cp_compute_enable(adev, false);
3278 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3279 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3281 fw_data = (const __le32 *)
3282 (adev->gfx.mec_fw->data +
3283 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3285 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3286 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3287 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3289 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3290 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3291 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3292 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3295 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3296 mec_hdr->jt_offset);
3297 for (i = 0; i < mec_hdr->jt_size; i++)
3298 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3299 le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3301 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3302 adev->gfx.mec_fw_version);
3303 /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3309 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3312 struct amdgpu_device *adev = ring->adev;
3314 /* tell RLC which is KIQ queue */
3315 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3317 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3318 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3320 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3323 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
3325 struct amdgpu_device *adev = ring->adev;
3327 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
3328 if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue)) {
3329 mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
3330 ring->has_high_prio = true;
3331 mqd->cp_hqd_queue_priority =
3332 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
3334 ring->has_high_prio = false;
3339 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3341 struct amdgpu_device *adev = ring->adev;
3342 struct v9_mqd *mqd = ring->mqd_ptr;
3343 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3346 mqd->header = 0xC0310800;
3347 mqd->compute_pipelinestat_enable = 0x00000001;
3348 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3349 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3350 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3351 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3352 mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3353 mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3354 mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3355 mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3356 mqd->compute_misc_reserved = 0x00000003;
3358 mqd->dynamic_cu_mask_addr_lo =
3359 lower_32_bits(ring->mqd_gpu_addr
3360 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3361 mqd->dynamic_cu_mask_addr_hi =
3362 upper_32_bits(ring->mqd_gpu_addr
3363 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3365 eop_base_addr = ring->eop_gpu_addr >> 8;
3366 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3367 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3369 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3370 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3371 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3372 (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3374 mqd->cp_hqd_eop_control = tmp;
3376 /* enable doorbell? */
3377 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3379 if (ring->use_doorbell) {
3380 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3381 DOORBELL_OFFSET, ring->doorbell_index);
3382 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3384 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3385 DOORBELL_SOURCE, 0);
3386 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3389 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3393 mqd->cp_hqd_pq_doorbell_control = tmp;
3395 /* disable the queue if it's active */
3397 mqd->cp_hqd_dequeue_request = 0;
3398 mqd->cp_hqd_pq_rptr = 0;
3399 mqd->cp_hqd_pq_wptr_lo = 0;
3400 mqd->cp_hqd_pq_wptr_hi = 0;
3402 /* set the pointer to the MQD */
3403 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3404 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3406 /* set MQD vmid to 0 */
3407 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3408 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3409 mqd->cp_mqd_control = tmp;
3411 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3412 hqd_gpu_addr = ring->gpu_addr >> 8;
3413 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3414 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3416 /* set up the HQD, this is similar to CP_RB0_CNTL */
3417 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3418 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3419 (order_base_2(ring->ring_size / 4) - 1));
3420 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3421 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3423 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3425 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3426 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3427 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3428 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3429 mqd->cp_hqd_pq_control = tmp;
3431 /* set the wb address whether it's enabled or not */
3432 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3433 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3434 mqd->cp_hqd_pq_rptr_report_addr_hi =
3435 upper_32_bits(wb_gpu_addr) & 0xffff;
3437 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3438 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3439 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3440 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3443 /* enable the doorbell if requested */
3444 if (ring->use_doorbell) {
3445 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3446 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3447 DOORBELL_OFFSET, ring->doorbell_index);
3449 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3451 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3452 DOORBELL_SOURCE, 0);
3453 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3457 mqd->cp_hqd_pq_doorbell_control = tmp;
3459 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3461 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3463 /* set the vmid for the queue */
3464 mqd->cp_hqd_vmid = 0;
3466 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3467 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3468 mqd->cp_hqd_persistent_state = tmp;
3470 /* set MIN_IB_AVAIL_SIZE */
3471 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3472 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3473 mqd->cp_hqd_ib_control = tmp;
3475 /* set static priority for a queue/ring */
3476 gfx_v9_0_mqd_set_priority(ring, mqd);
3477 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
3479 /* map_queues packet doesn't need activate the queue,
3480 * so only kiq need set this field.
3482 if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3483 mqd->cp_hqd_active = 1;
3488 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3490 struct amdgpu_device *adev = ring->adev;
3491 struct v9_mqd *mqd = ring->mqd_ptr;
3494 /* disable wptr polling */
3495 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3497 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3498 mqd->cp_hqd_eop_base_addr_lo);
3499 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3500 mqd->cp_hqd_eop_base_addr_hi);
3502 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3503 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3504 mqd->cp_hqd_eop_control);
3506 /* enable doorbell? */
3507 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3508 mqd->cp_hqd_pq_doorbell_control);
3510 /* disable the queue if it's active */
3511 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3512 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3513 for (j = 0; j < adev->usec_timeout; j++) {
3514 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3518 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3519 mqd->cp_hqd_dequeue_request);
3520 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3521 mqd->cp_hqd_pq_rptr);
3522 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3523 mqd->cp_hqd_pq_wptr_lo);
3524 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3525 mqd->cp_hqd_pq_wptr_hi);
3528 /* set the pointer to the MQD */
3529 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3530 mqd->cp_mqd_base_addr_lo);
3531 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3532 mqd->cp_mqd_base_addr_hi);
3534 /* set MQD vmid to 0 */
3535 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3536 mqd->cp_mqd_control);
3538 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3539 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3540 mqd->cp_hqd_pq_base_lo);
3541 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3542 mqd->cp_hqd_pq_base_hi);
3544 /* set up the HQD, this is similar to CP_RB0_CNTL */
3545 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3546 mqd->cp_hqd_pq_control);
3548 /* set the wb address whether it's enabled or not */
3549 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3550 mqd->cp_hqd_pq_rptr_report_addr_lo);
3551 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3552 mqd->cp_hqd_pq_rptr_report_addr_hi);
3554 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3555 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3556 mqd->cp_hqd_pq_wptr_poll_addr_lo);
3557 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3558 mqd->cp_hqd_pq_wptr_poll_addr_hi);
3560 /* enable the doorbell if requested */
3561 if (ring->use_doorbell) {
3562 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3563 (adev->doorbell_index.kiq * 2) << 2);
3564 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3565 (adev->doorbell_index.userqueue_end * 2) << 2);
3568 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3569 mqd->cp_hqd_pq_doorbell_control);
3571 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3572 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3573 mqd->cp_hqd_pq_wptr_lo);
3574 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3575 mqd->cp_hqd_pq_wptr_hi);
3577 /* set the vmid for the queue */
3578 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3580 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3581 mqd->cp_hqd_persistent_state);
3583 /* activate the queue */
3584 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3585 mqd->cp_hqd_active);
3587 if (ring->use_doorbell)
3588 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3593 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3595 struct amdgpu_device *adev = ring->adev;
3598 /* disable the queue if it's active */
3599 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3601 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3603 for (j = 0; j < adev->usec_timeout; j++) {
3604 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3609 if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3610 DRM_DEBUG("KIQ dequeue request failed.\n");
3612 /* Manual disable if dequeue request times out */
3613 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3616 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3620 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3621 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3622 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3623 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3624 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3625 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3626 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3627 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3632 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3634 struct amdgpu_device *adev = ring->adev;
3635 struct v9_mqd *mqd = ring->mqd_ptr;
3636 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3638 gfx_v9_0_kiq_setting(ring);
3640 if (adev->in_gpu_reset) { /* for GPU_RESET case */
3641 /* reset MQD to a clean status */
3642 if (adev->gfx.mec.mqd_backup[mqd_idx])
3643 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3645 /* reset ring buffer */
3647 amdgpu_ring_clear_ring(ring);
3649 mutex_lock(&adev->srbm_mutex);
3650 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3651 gfx_v9_0_kiq_init_register(ring);
3652 soc15_grbm_select(adev, 0, 0, 0, 0);
3653 mutex_unlock(&adev->srbm_mutex);
3655 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3656 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3657 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3658 mutex_lock(&adev->srbm_mutex);
3659 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3660 gfx_v9_0_mqd_init(ring);
3661 gfx_v9_0_kiq_init_register(ring);
3662 soc15_grbm_select(adev, 0, 0, 0, 0);
3663 mutex_unlock(&adev->srbm_mutex);
3665 if (adev->gfx.mec.mqd_backup[mqd_idx])
3666 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3672 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3674 struct amdgpu_device *adev = ring->adev;
3675 struct v9_mqd *mqd = ring->mqd_ptr;
3676 int mqd_idx = ring - &adev->gfx.compute_ring[0];
3678 if (!adev->in_gpu_reset && !adev->in_suspend) {
3679 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3680 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3681 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3682 mutex_lock(&adev->srbm_mutex);
3683 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3684 gfx_v9_0_mqd_init(ring);
3685 soc15_grbm_select(adev, 0, 0, 0, 0);
3686 mutex_unlock(&adev->srbm_mutex);
3688 if (adev->gfx.mec.mqd_backup[mqd_idx])
3689 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3690 } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3691 /* reset MQD to a clean status */
3692 if (adev->gfx.mec.mqd_backup[mqd_idx])
3693 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3695 /* reset ring buffer */
3697 atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0);
3698 amdgpu_ring_clear_ring(ring);
3700 amdgpu_ring_clear_ring(ring);
3706 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3708 struct amdgpu_ring *ring;
3711 ring = &adev->gfx.kiq.ring;
3713 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3714 if (unlikely(r != 0))
3717 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3718 if (unlikely(r != 0))
3721 gfx_v9_0_kiq_init_queue(ring);
3722 amdgpu_bo_kunmap(ring->mqd_obj);
3723 ring->mqd_ptr = NULL;
3724 amdgpu_bo_unreserve(ring->mqd_obj);
3725 ring->sched.ready = true;
3729 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3731 struct amdgpu_ring *ring = NULL;
3734 gfx_v9_0_cp_compute_enable(adev, true);
3736 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3737 ring = &adev->gfx.compute_ring[i];
3739 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3740 if (unlikely(r != 0))
3742 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3744 r = gfx_v9_0_kcq_init_queue(ring);
3745 amdgpu_bo_kunmap(ring->mqd_obj);
3746 ring->mqd_ptr = NULL;
3748 amdgpu_bo_unreserve(ring->mqd_obj);
3753 r = amdgpu_gfx_enable_kcq(adev);
3758 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3761 struct amdgpu_ring *ring;
3763 if (!(adev->flags & AMD_IS_APU))
3764 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3766 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3767 if (adev->asic_type != CHIP_ARCTURUS) {
3768 /* legacy firmware loading */
3769 r = gfx_v9_0_cp_gfx_load_microcode(adev);
3774 r = gfx_v9_0_cp_compute_load_microcode(adev);
3779 r = gfx_v9_0_kiq_resume(adev);
3783 if (adev->asic_type != CHIP_ARCTURUS) {
3784 r = gfx_v9_0_cp_gfx_resume(adev);
3789 r = gfx_v9_0_kcq_resume(adev);
3793 if (adev->asic_type != CHIP_ARCTURUS) {
3794 ring = &adev->gfx.gfx_ring[0];
3795 r = amdgpu_ring_test_helper(ring);
3800 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3801 ring = &adev->gfx.compute_ring[i];
3802 amdgpu_ring_test_helper(ring);
3805 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3810 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
3814 if (adev->asic_type != CHIP_ARCTURUS)
3817 tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
3818 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
3819 adev->df.hash_status.hash_64k);
3820 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
3821 adev->df.hash_status.hash_2m);
3822 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
3823 adev->df.hash_status.hash_1g);
3824 WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
3827 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3829 if (adev->asic_type != CHIP_ARCTURUS)
3830 gfx_v9_0_cp_gfx_enable(adev, enable);
3831 gfx_v9_0_cp_compute_enable(adev, enable);
3834 static int gfx_v9_0_hw_init(void *handle)
3837 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3839 if (!amdgpu_sriov_vf(adev))
3840 gfx_v9_0_init_golden_registers(adev);
3842 gfx_v9_0_constants_init(adev);
3844 gfx_v9_0_init_tcp_config(adev);
3846 r = adev->gfx.rlc.funcs->resume(adev);
3850 r = gfx_v9_0_cp_resume(adev);
3857 static int gfx_v9_0_hw_fini(void *handle)
3859 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3861 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3862 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3863 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3865 /* DF freeze and kcq disable will fail */
3866 if (!amdgpu_ras_intr_triggered())
3867 /* disable KCQ to avoid CPC touch memory not valid anymore */
3868 amdgpu_gfx_disable_kcq(adev);
3870 if (amdgpu_sriov_vf(adev)) {
3871 gfx_v9_0_cp_gfx_enable(adev, false);
3872 /* must disable polling for SRIOV when hw finished, otherwise
3873 * CPC engine may still keep fetching WB address which is already
3874 * invalid after sw finished and trigger DMAR reading error in
3877 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3881 /* Use deinitialize sequence from CAIL when unbinding device from driver,
3882 * otherwise KIQ is hanging when binding back
3884 if (!adev->in_gpu_reset && !adev->in_suspend) {
3885 mutex_lock(&adev->srbm_mutex);
3886 soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3887 adev->gfx.kiq.ring.pipe,
3888 adev->gfx.kiq.ring.queue, 0);
3889 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3890 soc15_grbm_select(adev, 0, 0, 0, 0);
3891 mutex_unlock(&adev->srbm_mutex);
3894 gfx_v9_0_cp_enable(adev, false);
3895 adev->gfx.rlc.funcs->stop(adev);
3900 static int gfx_v9_0_suspend(void *handle)
3902 return gfx_v9_0_hw_fini(handle);
3905 static int gfx_v9_0_resume(void *handle)
3907 return gfx_v9_0_hw_init(handle);
3910 static bool gfx_v9_0_is_idle(void *handle)
3912 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3914 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3915 GRBM_STATUS, GUI_ACTIVE))
3921 static int gfx_v9_0_wait_for_idle(void *handle)
3924 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3926 for (i = 0; i < adev->usec_timeout; i++) {
3927 if (gfx_v9_0_is_idle(handle))
3934 static int gfx_v9_0_soft_reset(void *handle)
3936 u32 grbm_soft_reset = 0;
3938 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3941 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3942 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3943 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3944 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3945 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3946 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3947 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3948 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3949 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3950 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3951 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3954 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3955 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3956 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3960 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
3961 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3962 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3963 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3966 if (grbm_soft_reset) {
3968 adev->gfx.rlc.funcs->stop(adev);
3970 if (adev->asic_type != CHIP_ARCTURUS)
3971 /* Disable GFX parsing/prefetching */
3972 gfx_v9_0_cp_gfx_enable(adev, false);
3974 /* Disable MEC parsing/prefetching */
3975 gfx_v9_0_cp_compute_enable(adev, false);
3977 if (grbm_soft_reset) {
3978 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3979 tmp |= grbm_soft_reset;
3980 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3981 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3982 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3986 tmp &= ~grbm_soft_reset;
3987 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3988 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3991 /* Wait a little for things to settle down */
3997 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
3999 signed long r, cnt = 0;
4000 unsigned long flags;
4002 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
4003 struct amdgpu_ring *ring = &kiq->ring;
4005 BUG_ON(!ring->funcs->emit_rreg);
4007 spin_lock_irqsave(&kiq->ring_lock, flags);
4008 amdgpu_ring_alloc(ring, 32);
4009 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4010 amdgpu_ring_write(ring, 9 | /* src: register*/
4011 (5 << 8) | /* dst: memory */
4012 (1 << 16) | /* count sel */
4013 (1 << 20)); /* write confirm */
4014 amdgpu_ring_write(ring, 0);
4015 amdgpu_ring_write(ring, 0);
4016 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4017 kiq->reg_val_offs * 4));
4018 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4019 kiq->reg_val_offs * 4));
4020 amdgpu_fence_emit_polling(ring, &seq);
4021 amdgpu_ring_commit(ring);
4022 spin_unlock_irqrestore(&kiq->ring_lock, flags);
4024 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4026 /* don't wait anymore for gpu reset case because this way may
4027 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
4028 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
4029 * never return if we keep waiting in virt_kiq_rreg, which cause
4030 * gpu_recover() hang there.
4032 * also don't wait anymore for IRQ context
4034 if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
4035 goto failed_kiq_read;
4038 while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
4039 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
4040 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4043 if (cnt > MAX_KIQ_REG_TRY)
4044 goto failed_kiq_read;
4046 return (uint64_t)adev->wb.wb[kiq->reg_val_offs] |
4047 (uint64_t)adev->wb.wb[kiq->reg_val_offs + 1 ] << 32ULL;
4050 pr_err("failed to read gpu clock\n");
4054 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4058 amdgpu_gfx_off_ctrl(adev, false);
4059 mutex_lock(&adev->gfx.gpu_clock_mutex);
4060 if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) {
4061 clock = gfx_v9_0_kiq_read_clock(adev);
4063 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4064 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4065 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4067 mutex_unlock(&adev->gfx.gpu_clock_mutex);
4068 amdgpu_gfx_off_ctrl(adev, true);
4072 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4074 uint32_t gds_base, uint32_t gds_size,
4075 uint32_t gws_base, uint32_t gws_size,
4076 uint32_t oa_base, uint32_t oa_size)
4078 struct amdgpu_device *adev = ring->adev;
4081 gfx_v9_0_write_data_to_reg(ring, 0, false,
4082 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4086 gfx_v9_0_write_data_to_reg(ring, 0, false,
4087 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4091 gfx_v9_0_write_data_to_reg(ring, 0, false,
4092 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4093 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4096 gfx_v9_0_write_data_to_reg(ring, 0, false,
4097 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4098 (1 << (oa_size + oa_base)) - (1 << oa_base));
4101 static const u32 vgpr_init_compute_shader[] =
4103 0xb07c0000, 0xbe8000ff,
4104 0x000000f8, 0xbf110800,
4105 0x7e000280, 0x7e020280,
4106 0x7e040280, 0x7e060280,
4107 0x7e080280, 0x7e0a0280,
4108 0x7e0c0280, 0x7e0e0280,
4109 0x80808800, 0xbe803200,
4110 0xbf84fff5, 0xbf9c0000,
4111 0xd28c0001, 0x0001007f,
4112 0xd28d0001, 0x0002027e,
4113 0x10020288, 0xb8810904,
4114 0xb7814000, 0xd1196a01,
4115 0x00000301, 0xbe800087,
4116 0xbefc00c1, 0xd89c4000,
4117 0x00020201, 0xd89cc080,
4118 0x00040401, 0x320202ff,
4119 0x00000800, 0x80808100,
4120 0xbf84fff8, 0x7e020280,
4121 0xbf810000, 0x00000000,
4124 static const u32 sgpr_init_compute_shader[] =
4126 0xb07c0000, 0xbe8000ff,
4127 0x0000005f, 0xbee50080,
4128 0xbe812c65, 0xbe822c65,
4129 0xbe832c65, 0xbe842c65,
4130 0xbe852c65, 0xb77c0005,
4131 0x80808500, 0xbf84fff8,
4132 0xbe800080, 0xbf810000,
4135 static const u32 vgpr_init_compute_shader_arcturus[] = {
4136 0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080,
4137 0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080,
4138 0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080,
4139 0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080,
4140 0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080,
4141 0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080,
4142 0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080,
4143 0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080,
4144 0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080,
4145 0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080,
4146 0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080,
4147 0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080,
4148 0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080,
4149 0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080,
4150 0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080,
4151 0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080,
4152 0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080,
4153 0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080,
4154 0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080,
4155 0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080,
4156 0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080,
4157 0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080,
4158 0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080,
4159 0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080,
4160 0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080,
4161 0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080,
4162 0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080,
4163 0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080,
4164 0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080,
4165 0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080,
4166 0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080,
4167 0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080,
4168 0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080,
4169 0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080,
4170 0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080,
4171 0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080,
4172 0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080,
4173 0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080,
4174 0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080,
4175 0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080,
4176 0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080,
4177 0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080,
4178 0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080,
4179 0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080,
4180 0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080,
4181 0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080,
4182 0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080,
4183 0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080,
4184 0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080,
4185 0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080,
4186 0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080,
4187 0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080,
4188 0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080,
4189 0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080,
4190 0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080,
4191 0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080,
4192 0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080,
4193 0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080,
4194 0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080,
4195 0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080,
4196 0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080,
4197 0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080,
4198 0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080,
4199 0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080,
4200 0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080,
4201 0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080,
4202 0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080,
4203 0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080,
4204 0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080,
4205 0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080,
4206 0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080,
4207 0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080,
4208 0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080,
4209 0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080,
4210 0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080,
4211 0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080,
4212 0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080,
4213 0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080,
4214 0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080,
4215 0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080,
4216 0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080,
4217 0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080,
4218 0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080,
4219 0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080,
4220 0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080,
4221 0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a,
4222 0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280,
4223 0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000,
4224 0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904,
4225 0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000,
4226 0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a,
4227 0xbf84fff8, 0xbf810000,
4230 /* When below register arrays changed, please update gpr_reg_size,
4231 and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
4232 to cover all gfx9 ASICs */
4233 static const struct soc15_reg_entry vgpr_init_regs[] = {
4234 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4235 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4236 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4237 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4238 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
4239 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */
4240 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4241 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4242 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4243 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4244 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4245 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4246 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4247 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4250 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = {
4251 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4252 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4253 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4254 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4255 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf },
4256 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */
4257 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4258 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4259 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4260 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4261 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4262 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4263 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4264 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4267 static const struct soc15_reg_entry sgpr1_init_regs[] = {
4268 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4269 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4270 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4271 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4272 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4273 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4274 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4275 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4276 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4277 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4278 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4279 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4280 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4281 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4284 static const struct soc15_reg_entry sgpr2_init_regs[] = {
4285 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4286 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4287 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4288 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4289 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4290 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4291 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4292 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4293 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4294 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4295 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4296 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4297 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4298 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4301 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
4302 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4303 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4304 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4305 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4306 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4307 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4308 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4309 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4310 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4311 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4312 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4313 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4314 { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4315 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4316 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4317 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4318 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4319 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4320 { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4321 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4322 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4323 { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4324 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4325 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4326 { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4327 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4328 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4329 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4330 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4331 { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4332 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4333 { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4334 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4337 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4339 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4342 /* only support when RAS is enabled */
4343 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4346 r = amdgpu_ring_alloc(ring, 7);
4348 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4353 WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4354 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4356 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4357 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4358 PACKET3_DMA_DATA_DST_SEL(1) |
4359 PACKET3_DMA_DATA_SRC_SEL(2) |
4360 PACKET3_DMA_DATA_ENGINE(0)));
4361 amdgpu_ring_write(ring, 0);
4362 amdgpu_ring_write(ring, 0);
4363 amdgpu_ring_write(ring, 0);
4364 amdgpu_ring_write(ring, 0);
4365 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4366 adev->gds.gds_size);
4368 amdgpu_ring_commit(ring);
4370 for (i = 0; i < adev->usec_timeout; i++) {
4371 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4376 if (i >= adev->usec_timeout)
4379 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4384 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4386 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4387 struct amdgpu_ib ib;
4388 struct dma_fence *f = NULL;
4390 unsigned total_size, vgpr_offset, sgpr_offset;
4393 int compute_dim_x = adev->gfx.config.max_shader_engines *
4394 adev->gfx.config.max_cu_per_sh *
4395 adev->gfx.config.max_sh_per_se;
4396 int sgpr_work_group_size = 5;
4397 int gpr_reg_size = adev->gfx.config.max_shader_engines + 6;
4398 int vgpr_init_shader_size;
4399 const u32 *vgpr_init_shader_ptr;
4400 const struct soc15_reg_entry *vgpr_init_regs_ptr;
4402 /* only support when RAS is enabled */
4403 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4406 /* bail if the compute ring is not ready */
4407 if (!ring->sched.ready)
4410 if (adev->asic_type == CHIP_ARCTURUS) {
4411 vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
4412 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
4413 vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
4415 vgpr_init_shader_ptr = vgpr_init_compute_shader;
4416 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader);
4417 vgpr_init_regs_ptr = vgpr_init_regs;
4421 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4423 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4425 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4426 total_size = ALIGN(total_size, 256);
4427 vgpr_offset = total_size;
4428 total_size += ALIGN(vgpr_init_shader_size, 256);
4429 sgpr_offset = total_size;
4430 total_size += sizeof(sgpr_init_compute_shader);
4432 /* allocate an indirect buffer to put the commands in */
4433 memset(&ib, 0, sizeof(ib));
4434 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
4436 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4440 /* load the compute shaders */
4441 for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++)
4442 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i];
4444 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4445 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4447 /* init the ib length to 0 */
4451 /* write the register state for the compute dispatch */
4452 for (i = 0; i < gpr_reg_size; i++) {
4453 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4454 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i])
4455 - PACKET3_SET_SH_REG_START;
4456 ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value;
4458 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4459 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4460 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4461 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4462 - PACKET3_SET_SH_REG_START;
4463 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4464 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4466 /* write dispatch packet */
4467 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4468 ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */
4469 ib.ptr[ib.length_dw++] = 1; /* y */
4470 ib.ptr[ib.length_dw++] = 1; /* z */
4471 ib.ptr[ib.length_dw++] =
4472 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4474 /* write CS partial flush packet */
4475 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4476 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4479 /* write the register state for the compute dispatch */
4480 for (i = 0; i < gpr_reg_size; i++) {
4481 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4482 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4483 - PACKET3_SET_SH_REG_START;
4484 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4486 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4487 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4488 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4489 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4490 - PACKET3_SET_SH_REG_START;
4491 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4492 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4494 /* write dispatch packet */
4495 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4496 ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4497 ib.ptr[ib.length_dw++] = 1; /* y */
4498 ib.ptr[ib.length_dw++] = 1; /* z */
4499 ib.ptr[ib.length_dw++] =
4500 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4502 /* write CS partial flush packet */
4503 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4504 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4507 /* write the register state for the compute dispatch */
4508 for (i = 0; i < gpr_reg_size; i++) {
4509 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4510 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4511 - PACKET3_SET_SH_REG_START;
4512 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4514 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4515 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4516 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4517 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4518 - PACKET3_SET_SH_REG_START;
4519 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4520 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4522 /* write dispatch packet */
4523 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4524 ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4525 ib.ptr[ib.length_dw++] = 1; /* y */
4526 ib.ptr[ib.length_dw++] = 1; /* z */
4527 ib.ptr[ib.length_dw++] =
4528 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4530 /* write CS partial flush packet */
4531 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4532 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4534 /* shedule the ib on the ring */
4535 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4537 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4541 /* wait for the GPU to finish processing the IB */
4542 r = dma_fence_wait(f, false);
4544 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4549 amdgpu_ib_free(adev, &ib, NULL);
4555 static int gfx_v9_0_early_init(void *handle)
4557 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4559 if (adev->asic_type == CHIP_ARCTURUS)
4560 adev->gfx.num_gfx_rings = 0;
4562 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4563 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
4564 gfx_v9_0_set_kiq_pm4_funcs(adev);
4565 gfx_v9_0_set_ring_funcs(adev);
4566 gfx_v9_0_set_irq_funcs(adev);
4567 gfx_v9_0_set_gds_init(adev);
4568 gfx_v9_0_set_rlc_funcs(adev);
4573 static int gfx_v9_0_ecc_late_init(void *handle)
4575 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4579 * Temp workaround to fix the issue that CP firmware fails to
4580 * update read pointer when CPDMA is writing clearing operation
4581 * to GDS in suspend/resume sequence on several cards. So just
4582 * limit this operation in cold boot sequence.
4584 if (!adev->in_suspend) {
4585 r = gfx_v9_0_do_edc_gds_workarounds(adev);
4590 /* requires IBs so do in late init after IB pool is initialized */
4591 r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4595 if (adev->gfx.funcs &&
4596 adev->gfx.funcs->reset_ras_error_count)
4597 adev->gfx.funcs->reset_ras_error_count(adev);
4599 r = amdgpu_gfx_ras_late_init(adev);
4606 static int gfx_v9_0_late_init(void *handle)
4608 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4611 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4615 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4619 r = gfx_v9_0_ecc_late_init(handle);
4626 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4628 uint32_t rlc_setting;
4630 /* if RLC is not enabled, do nothing */
4631 rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4632 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4638 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4643 data = RLC_SAFE_MODE__CMD_MASK;
4644 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4645 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4647 /* wait for RLC_SAFE_MODE */
4648 for (i = 0; i < adev->usec_timeout; i++) {
4649 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4655 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4659 data = RLC_SAFE_MODE__CMD_MASK;
4660 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4663 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4666 amdgpu_gfx_rlc_enter_safe_mode(adev);
4668 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4669 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4670 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4671 gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4673 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4674 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4675 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4678 amdgpu_gfx_rlc_exit_safe_mode(adev);
4681 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4684 /* TODO: double check if we need to perform under safe mode */
4685 /* gfx_v9_0_enter_rlc_safe_mode(adev); */
4687 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4688 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4690 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4692 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4693 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4695 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4697 /* gfx_v9_0_exit_rlc_safe_mode(adev); */
4700 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4705 amdgpu_gfx_rlc_enter_safe_mode(adev);
4707 /* It is disabled by HW by default */
4708 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4709 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4710 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4712 if (adev->asic_type != CHIP_VEGA12)
4713 data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4715 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4716 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4717 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4719 /* only for Vega10 & Raven1 */
4720 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4723 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4725 /* MGLS is a global flag to control all MGLS in GFX */
4726 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4727 /* 2 - RLC memory Light sleep */
4728 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4729 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4730 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4732 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4734 /* 3 - CP memory Light sleep */
4735 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4736 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4737 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4739 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4743 /* 1 - MGCG_OVERRIDE */
4744 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4746 if (adev->asic_type != CHIP_VEGA12)
4747 data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4749 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4750 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4751 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4752 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4755 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4757 /* 2 - disable MGLS in RLC */
4758 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4759 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4760 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4761 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4764 /* 3 - disable MGLS in CP */
4765 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4766 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4767 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4768 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4772 amdgpu_gfx_rlc_exit_safe_mode(adev);
4775 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4780 if (adev->asic_type == CHIP_ARCTURUS)
4783 amdgpu_gfx_rlc_enter_safe_mode(adev);
4785 /* Enable 3D CGCG/CGLS */
4786 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4787 /* write cmd to clear cgcg/cgls ov */
4788 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4789 /* unset CGCG override */
4790 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4791 /* update CGCG and CGLS override bits */
4793 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4795 /* enable 3Dcgcg FSM(0x0000363f) */
4796 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4798 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4799 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4800 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4801 data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4802 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4804 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4806 /* set IDLE_POLL_COUNT(0x00900100) */
4807 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4808 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4809 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4811 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4813 /* Disable CGCG/CGLS */
4814 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4815 /* disable cgcg, cgls should be disabled */
4816 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4817 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4818 /* disable cgcg and cgls in FSM */
4820 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4823 amdgpu_gfx_rlc_exit_safe_mode(adev);
4826 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4831 amdgpu_gfx_rlc_enter_safe_mode(adev);
4833 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4834 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4835 /* unset CGCG override */
4836 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4837 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4838 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4840 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4841 /* update CGCG and CGLS override bits */
4843 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4845 /* enable cgcg FSM(0x0000363F) */
4846 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4848 if (adev->asic_type == CHIP_ARCTURUS)
4849 data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4850 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4852 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4853 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4854 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4855 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4856 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4858 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4860 /* set IDLE_POLL_COUNT(0x00900100) */
4861 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4862 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4863 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4865 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4867 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4868 /* reset CGCG/CGLS bits */
4869 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4870 /* disable cgcg and cgls in FSM */
4872 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4875 amdgpu_gfx_rlc_exit_safe_mode(adev);
4878 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4882 /* CGCG/CGLS should be enabled after MGCG/MGLS
4883 * === MGCG + MGLS ===
4885 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4886 /* === CGCG /CGLS for GFX 3D Only === */
4887 gfx_v9_0_update_3d_clock_gating(adev, enable);
4888 /* === CGCG + CGLS === */
4889 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4891 /* CGCG/CGLS should be disabled before MGCG/MGLS
4892 * === CGCG + CGLS ===
4894 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4895 /* === CGCG /CGLS for GFX 3D Only === */
4896 gfx_v9_0_update_3d_clock_gating(adev, enable);
4897 /* === MGCG + MGLS === */
4898 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4903 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
4907 data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL);
4909 data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
4910 data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
4912 WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
4915 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
4917 struct soc15_reg_rlcg *entries, int arr_size)
4925 for (i = 0; i < arr_size; i++) {
4926 const struct soc15_reg_rlcg *entry;
4928 entry = &entries[i];
4929 reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
4937 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
4939 return gfx_v9_0_check_rlcg_range(adev, offset,
4940 (void *)rlcg_access_gc_9_0,
4941 ARRAY_SIZE(rlcg_access_gc_9_0));
4944 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4945 .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4946 .set_safe_mode = gfx_v9_0_set_safe_mode,
4947 .unset_safe_mode = gfx_v9_0_unset_safe_mode,
4948 .init = gfx_v9_0_rlc_init,
4949 .get_csb_size = gfx_v9_0_get_csb_size,
4950 .get_csb_buffer = gfx_v9_0_get_csb_buffer,
4951 .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4952 .resume = gfx_v9_0_rlc_resume,
4953 .stop = gfx_v9_0_rlc_stop,
4954 .reset = gfx_v9_0_rlc_reset,
4955 .start = gfx_v9_0_rlc_start,
4956 .update_spm_vmid = gfx_v9_0_update_spm_vmid,
4957 .rlcg_wreg = gfx_v9_0_rlcg_wreg,
4958 .is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
4961 static int gfx_v9_0_set_powergating_state(void *handle,
4962 enum amd_powergating_state state)
4964 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4965 bool enable = (state == AMD_PG_STATE_GATE);
4967 switch (adev->asic_type) {
4971 amdgpu_gfx_off_ctrl(adev, false);
4973 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4974 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4975 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4977 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4978 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4981 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4982 gfx_v9_0_enable_cp_power_gating(adev, true);
4984 gfx_v9_0_enable_cp_power_gating(adev, false);
4986 /* update gfx cgpg state */
4987 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4989 /* update mgcg state */
4990 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4993 amdgpu_gfx_off_ctrl(adev, true);
4996 amdgpu_gfx_off_ctrl(adev, enable);
5005 static int gfx_v9_0_set_clockgating_state(void *handle,
5006 enum amd_clockgating_state state)
5008 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5010 if (amdgpu_sriov_vf(adev))
5013 switch (adev->asic_type) {
5020 gfx_v9_0_update_gfx_clock_gating(adev,
5021 state == AMD_CG_STATE_GATE);
5029 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
5031 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5034 if (amdgpu_sriov_vf(adev))
5037 /* AMD_CG_SUPPORT_GFX_MGCG */
5038 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
5039 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5040 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5042 /* AMD_CG_SUPPORT_GFX_CGCG */
5043 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
5044 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5045 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5047 /* AMD_CG_SUPPORT_GFX_CGLS */
5048 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5049 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5051 /* AMD_CG_SUPPORT_GFX_RLC_LS */
5052 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
5053 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5054 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5056 /* AMD_CG_SUPPORT_GFX_CP_LS */
5057 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
5058 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5059 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5061 if (adev->asic_type != CHIP_ARCTURUS) {
5062 /* AMD_CG_SUPPORT_GFX_3D_CGCG */
5063 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
5064 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5065 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5067 /* AMD_CG_SUPPORT_GFX_3D_CGLS */
5068 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5069 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5073 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5075 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
5078 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5080 struct amdgpu_device *adev = ring->adev;
5083 /* XXX check if swapping is necessary on BE */
5084 if (ring->use_doorbell) {
5085 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
5087 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
5088 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
5094 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5096 struct amdgpu_device *adev = ring->adev;
5098 if (ring->use_doorbell) {
5099 /* XXX check if swapping is necessary on BE */
5100 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5101 WDOORBELL64(ring->doorbell_index, ring->wptr);
5103 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5104 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
5108 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5110 struct amdgpu_device *adev = ring->adev;
5111 u32 ref_and_mask, reg_mem_engine;
5112 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5114 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5117 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5120 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5127 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5128 reg_mem_engine = 1; /* pfp */
5131 gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5132 adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5133 adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5134 ref_and_mask, ref_and_mask, 0x20);
5137 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5138 struct amdgpu_job *job,
5139 struct amdgpu_ib *ib,
5142 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5143 u32 header, control = 0;
5145 if (ib->flags & AMDGPU_IB_FLAG_CE)
5146 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5148 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5150 control |= ib->length_dw | (vmid << 24);
5152 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
5153 control |= INDIRECT_BUFFER_PRE_ENB(1);
5155 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
5156 gfx_v9_0_ring_emit_de_meta(ring);
5159 amdgpu_ring_write(ring, header);
5160 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5161 amdgpu_ring_write(ring,
5165 lower_32_bits(ib->gpu_addr));
5166 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5167 amdgpu_ring_write(ring, control);
5170 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5171 struct amdgpu_job *job,
5172 struct amdgpu_ib *ib,
5175 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5176 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5178 /* Currently, there is a high possibility to get wave ID mismatch
5179 * between ME and GDS, leading to a hw deadlock, because ME generates
5180 * different wave IDs than the GDS expects. This situation happens
5181 * randomly when at least 5 compute pipes use GDS ordered append.
5182 * The wave IDs generated by ME are also wrong after suspend/resume.
5183 * Those are probably bugs somewhere else in the kernel driver.
5185 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5186 * GDS to 0 for this ring (me/pipe).
5188 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5189 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5190 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5191 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5194 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5195 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5196 amdgpu_ring_write(ring,
5200 lower_32_bits(ib->gpu_addr));
5201 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5202 amdgpu_ring_write(ring, control);
5205 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5206 u64 seq, unsigned flags)
5208 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5209 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5210 bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5212 /* RELEASE_MEM - flush caches, send int */
5213 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5214 amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
5215 EOP_TC_NC_ACTION_EN) :
5216 (EOP_TCL1_ACTION_EN |
5218 EOP_TC_WB_ACTION_EN |
5219 EOP_TC_MD_ACTION_EN)) |
5220 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5222 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5225 * the address should be Qword aligned if 64bit write, Dword
5226 * aligned if only send 32bit data low (discard data high)
5232 amdgpu_ring_write(ring, lower_32_bits(addr));
5233 amdgpu_ring_write(ring, upper_32_bits(addr));
5234 amdgpu_ring_write(ring, lower_32_bits(seq));
5235 amdgpu_ring_write(ring, upper_32_bits(seq));
5236 amdgpu_ring_write(ring, 0);
5239 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5241 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5242 uint32_t seq = ring->fence_drv.sync_seq;
5243 uint64_t addr = ring->fence_drv.gpu_addr;
5245 gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5246 lower_32_bits(addr), upper_32_bits(addr),
5247 seq, 0xffffffff, 4);
5250 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5251 unsigned vmid, uint64_t pd_addr)
5253 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5255 /* compute doesn't have PFP */
5256 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5257 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5258 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5259 amdgpu_ring_write(ring, 0x0);
5263 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5265 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
5268 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5272 /* XXX check if swapping is necessary on BE */
5273 if (ring->use_doorbell)
5274 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
5280 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5282 struct amdgpu_device *adev = ring->adev;
5284 /* XXX check if swapping is necessary on BE */
5285 if (ring->use_doorbell) {
5286 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5287 WDOORBELL64(ring->doorbell_index, ring->wptr);
5289 BUG(); /* only DOORBELL method supported on gfx9 now */
5293 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5294 u64 seq, unsigned int flags)
5296 struct amdgpu_device *adev = ring->adev;
5298 /* we only allocate 32bit for each seq wb address */
5299 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5301 /* write fence seq to the "addr" */
5302 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5303 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5304 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5305 amdgpu_ring_write(ring, lower_32_bits(addr));
5306 amdgpu_ring_write(ring, upper_32_bits(addr));
5307 amdgpu_ring_write(ring, lower_32_bits(seq));
5309 if (flags & AMDGPU_FENCE_FLAG_INT) {
5310 /* set register to trigger INT */
5311 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5312 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5313 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5314 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5315 amdgpu_ring_write(ring, 0);
5316 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5320 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5322 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5323 amdgpu_ring_write(ring, 0);
5326 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5328 struct v9_ce_ib_state ce_payload = {0};
5332 cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5333 csa_addr = amdgpu_csa_vaddr(ring->adev);
5335 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5336 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5337 WRITE_DATA_DST_SEL(8) |
5339 WRITE_DATA_CACHE_POLICY(0));
5340 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5341 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5342 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5345 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5347 struct v9_de_ib_state de_payload = {0};
5348 uint64_t csa_addr, gds_addr;
5351 csa_addr = amdgpu_csa_vaddr(ring->adev);
5352 gds_addr = csa_addr + 4096;
5353 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5354 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5356 cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5357 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5358 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5359 WRITE_DATA_DST_SEL(8) |
5361 WRITE_DATA_CACHE_POLICY(0));
5362 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5363 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5364 amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5367 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
5369 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5370 amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
5373 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5377 if (amdgpu_sriov_vf(ring->adev))
5378 gfx_v9_0_ring_emit_ce_meta(ring);
5380 gfx_v9_0_ring_emit_tmz(ring, true);
5382 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5383 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5384 /* set load_global_config & load_global_uconfig */
5386 /* set load_cs_sh_regs */
5388 /* set load_per_context_state & load_gfx_sh_regs for GFX */
5391 /* set load_ce_ram if preamble presented */
5392 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5395 /* still load_ce_ram if this is the first time preamble presented
5396 * although there is no context switch happens.
5398 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5402 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5403 amdgpu_ring_write(ring, dw2);
5404 amdgpu_ring_write(ring, 0);
5407 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5410 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5411 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5412 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5413 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5414 ret = ring->wptr & ring->buf_mask;
5415 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5419 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5422 BUG_ON(offset > ring->buf_mask);
5423 BUG_ON(ring->ring[offset] != 0x55aa55aa);
5425 cur = (ring->wptr & ring->buf_mask) - 1;
5426 if (likely(cur > offset))
5427 ring->ring[offset] = cur - offset;
5429 ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5432 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
5434 struct amdgpu_device *adev = ring->adev;
5435 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
5437 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5438 amdgpu_ring_write(ring, 0 | /* src: register*/
5439 (5 << 8) | /* dst: memory */
5440 (1 << 20)); /* write confirm */
5441 amdgpu_ring_write(ring, reg);
5442 amdgpu_ring_write(ring, 0);
5443 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5444 kiq->reg_val_offs * 4));
5445 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5446 kiq->reg_val_offs * 4));
5449 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5454 switch (ring->funcs->type) {
5455 case AMDGPU_RING_TYPE_GFX:
5456 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5458 case AMDGPU_RING_TYPE_KIQ:
5459 cmd = (1 << 16); /* no inc addr */
5465 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5466 amdgpu_ring_write(ring, cmd);
5467 amdgpu_ring_write(ring, reg);
5468 amdgpu_ring_write(ring, 0);
5469 amdgpu_ring_write(ring, val);
5472 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5473 uint32_t val, uint32_t mask)
5475 gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5478 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5479 uint32_t reg0, uint32_t reg1,
5480 uint32_t ref, uint32_t mask)
5482 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5483 struct amdgpu_device *adev = ring->adev;
5484 bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5485 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5488 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5491 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5495 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5497 struct amdgpu_device *adev = ring->adev;
5500 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5501 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5502 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5503 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5504 WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5507 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5508 enum amdgpu_interrupt_state state)
5511 case AMDGPU_IRQ_STATE_DISABLE:
5512 case AMDGPU_IRQ_STATE_ENABLE:
5513 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5514 TIME_STAMP_INT_ENABLE,
5515 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5522 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5524 enum amdgpu_interrupt_state state)
5526 u32 mec_int_cntl, mec_int_cntl_reg;
5529 * amdgpu controls only the first MEC. That's why this function only
5530 * handles the setting of interrupts for this specific MEC. All other
5531 * pipes' interrupts are set by amdkfd.
5537 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5540 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5543 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5546 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5549 DRM_DEBUG("invalid pipe %d\n", pipe);
5553 DRM_DEBUG("invalid me %d\n", me);
5558 case AMDGPU_IRQ_STATE_DISABLE:
5559 mec_int_cntl = RREG32(mec_int_cntl_reg);
5560 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5561 TIME_STAMP_INT_ENABLE, 0);
5562 WREG32(mec_int_cntl_reg, mec_int_cntl);
5564 case AMDGPU_IRQ_STATE_ENABLE:
5565 mec_int_cntl = RREG32(mec_int_cntl_reg);
5566 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5567 TIME_STAMP_INT_ENABLE, 1);
5568 WREG32(mec_int_cntl_reg, mec_int_cntl);
5575 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5576 struct amdgpu_irq_src *source,
5578 enum amdgpu_interrupt_state state)
5581 case AMDGPU_IRQ_STATE_DISABLE:
5582 case AMDGPU_IRQ_STATE_ENABLE:
5583 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5584 PRIV_REG_INT_ENABLE,
5585 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5594 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5595 struct amdgpu_irq_src *source,
5597 enum amdgpu_interrupt_state state)
5600 case AMDGPU_IRQ_STATE_DISABLE:
5601 case AMDGPU_IRQ_STATE_ENABLE:
5602 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5603 PRIV_INSTR_INT_ENABLE,
5604 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5612 #define ENABLE_ECC_ON_ME_PIPE(me, pipe) \
5613 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5614 CP_ECC_ERROR_INT_ENABLE, 1)
5616 #define DISABLE_ECC_ON_ME_PIPE(me, pipe) \
5617 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5618 CP_ECC_ERROR_INT_ENABLE, 0)
5620 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5621 struct amdgpu_irq_src *source,
5623 enum amdgpu_interrupt_state state)
5626 case AMDGPU_IRQ_STATE_DISABLE:
5627 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5628 CP_ECC_ERROR_INT_ENABLE, 0);
5629 DISABLE_ECC_ON_ME_PIPE(1, 0);
5630 DISABLE_ECC_ON_ME_PIPE(1, 1);
5631 DISABLE_ECC_ON_ME_PIPE(1, 2);
5632 DISABLE_ECC_ON_ME_PIPE(1, 3);
5635 case AMDGPU_IRQ_STATE_ENABLE:
5636 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5637 CP_ECC_ERROR_INT_ENABLE, 1);
5638 ENABLE_ECC_ON_ME_PIPE(1, 0);
5639 ENABLE_ECC_ON_ME_PIPE(1, 1);
5640 ENABLE_ECC_ON_ME_PIPE(1, 2);
5641 ENABLE_ECC_ON_ME_PIPE(1, 3);
5651 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5652 struct amdgpu_irq_src *src,
5654 enum amdgpu_interrupt_state state)
5657 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5658 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5660 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5661 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5663 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5664 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5666 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5667 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5669 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5670 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5672 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5673 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5675 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5676 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5678 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5679 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5681 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5682 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5690 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5691 struct amdgpu_irq_src *source,
5692 struct amdgpu_iv_entry *entry)
5695 u8 me_id, pipe_id, queue_id;
5696 struct amdgpu_ring *ring;
5698 DRM_DEBUG("IH: CP EOP\n");
5699 me_id = (entry->ring_id & 0x0c) >> 2;
5700 pipe_id = (entry->ring_id & 0x03) >> 0;
5701 queue_id = (entry->ring_id & 0x70) >> 4;
5705 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5709 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5710 ring = &adev->gfx.compute_ring[i];
5711 /* Per-queue interrupt is supported for MEC starting from VI.
5712 * The interrupt can only be enabled/disabled per pipe instead of per queue.
5714 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5715 amdgpu_fence_process(ring);
5722 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5723 struct amdgpu_iv_entry *entry)
5725 u8 me_id, pipe_id, queue_id;
5726 struct amdgpu_ring *ring;
5729 me_id = (entry->ring_id & 0x0c) >> 2;
5730 pipe_id = (entry->ring_id & 0x03) >> 0;
5731 queue_id = (entry->ring_id & 0x70) >> 4;
5735 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5739 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5740 ring = &adev->gfx.compute_ring[i];
5741 if (ring->me == me_id && ring->pipe == pipe_id &&
5742 ring->queue == queue_id)
5743 drm_sched_fault(&ring->sched);
5749 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5750 struct amdgpu_irq_src *source,
5751 struct amdgpu_iv_entry *entry)
5753 DRM_ERROR("Illegal register access in command stream\n");
5754 gfx_v9_0_fault(adev, entry);
5758 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5759 struct amdgpu_irq_src *source,
5760 struct amdgpu_iv_entry *entry)
5762 DRM_ERROR("Illegal instruction in command stream\n");
5763 gfx_v9_0_fault(adev, entry);
5768 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
5769 { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
5770 SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5771 SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
5773 { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
5774 SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
5775 SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
5777 { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5778 SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
5781 { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5782 SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
5785 { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
5786 SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
5787 SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
5789 { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5790 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
5793 { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5794 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
5795 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
5797 { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
5798 SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
5799 SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
5801 { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
5802 SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
5805 { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
5806 SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
5809 { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
5810 SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
5813 { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5814 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
5815 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
5817 { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5818 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
5821 { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5822 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
5823 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
5825 { "GDS_OA_PHY_PHY_CMD_RAM_MEM",
5826 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5827 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
5828 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
5830 { "GDS_OA_PHY_PHY_DATA_RAM_MEM",
5831 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5832 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
5835 { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
5836 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5837 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
5838 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
5840 { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
5841 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5842 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
5843 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
5845 { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
5846 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5847 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
5848 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
5850 { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
5851 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5852 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
5853 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
5855 { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
5856 SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
5859 { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5860 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
5861 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
5863 { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5864 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
5867 { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5868 SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
5871 { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5872 SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
5875 { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5876 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
5879 { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
5880 SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
5883 { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
5884 SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
5887 { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5888 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
5889 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
5891 { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5892 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
5893 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
5895 { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5896 SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
5897 SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
5899 { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5900 SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
5901 SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
5903 { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5904 SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
5905 SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
5907 { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5908 SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
5911 { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5912 SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
5915 { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5916 SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
5919 { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5920 SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
5923 { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5924 SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
5927 { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5928 SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
5931 { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5932 SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
5935 { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5936 SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
5939 { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5940 SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
5943 { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5944 SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
5947 { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5948 SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
5951 { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5952 SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
5955 { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5956 SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
5959 { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
5960 SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
5963 { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5964 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
5965 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
5967 { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5968 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
5969 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
5971 { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5972 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
5975 { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5976 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
5979 { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5980 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
5983 { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5984 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
5985 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
5987 { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5988 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
5989 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
5991 { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
5992 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
5993 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
5995 { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
5996 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
5997 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
5999 { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6000 SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
6003 { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6004 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
6005 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
6007 { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6008 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
6009 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
6011 { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6012 SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
6013 SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
6015 { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6016 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
6017 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
6019 { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6020 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
6021 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
6023 { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6024 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
6025 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
6027 { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6028 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
6029 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
6031 { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6032 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
6033 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
6035 { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6036 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
6037 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
6039 { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6040 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
6041 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
6043 { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6044 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
6045 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
6047 { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6048 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
6049 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
6051 { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6052 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
6053 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
6055 { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6056 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
6057 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
6059 { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6060 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
6061 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
6063 { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6064 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
6065 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
6067 { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6068 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
6069 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
6071 { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6072 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
6075 { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6076 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
6079 { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6080 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
6083 { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6084 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
6087 { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6088 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
6091 { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6092 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
6093 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
6095 { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6096 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
6097 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
6099 { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6100 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
6101 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
6103 { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6104 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
6105 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
6107 { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6108 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
6109 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
6111 { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6112 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
6115 { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6116 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
6119 { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6120 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
6123 { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6124 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
6127 { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6128 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
6131 { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6132 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6133 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
6135 { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6136 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6137 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
6139 { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6140 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6141 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
6143 { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6144 SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6145 SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
6147 { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6148 SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6149 SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
6151 { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6152 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
6155 { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6156 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
6159 { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6160 SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
6163 { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6164 SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
6167 { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6168 SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
6171 { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6172 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6173 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
6175 { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6176 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6177 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
6179 { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6180 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6181 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
6183 { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6184 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
6187 { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6188 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
6191 { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6192 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
6195 { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6196 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
6199 { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6200 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
6203 { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6204 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
6209 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6212 struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6214 struct ta_ras_trigger_error_input block_info = { 0 };
6216 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6219 if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6222 if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6225 if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6227 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6228 ras_gfx_subblocks[info->head.sub_block_index].name,
6233 if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6235 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6236 ras_gfx_subblocks[info->head.sub_block_index].name,
6241 block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6242 block_info.sub_block_index =
6243 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6244 block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6245 block_info.address = info->address;
6246 block_info.value = info->value;
6248 mutex_lock(&adev->grbm_idx_mutex);
6249 ret = psp_ras_trigger_error(&adev->psp, &block_info);
6250 mutex_unlock(&adev->grbm_idx_mutex);
6255 static const char *vml2_mems[] = {
6256 "UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6257 "UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6258 "UTC_VML2_BANK_CACHE_0_4K_MEM0",
6259 "UTC_VML2_BANK_CACHE_0_4K_MEM1",
6260 "UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6261 "UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6262 "UTC_VML2_BANK_CACHE_1_4K_MEM0",
6263 "UTC_VML2_BANK_CACHE_1_4K_MEM1",
6264 "UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6265 "UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6266 "UTC_VML2_BANK_CACHE_2_4K_MEM0",
6267 "UTC_VML2_BANK_CACHE_2_4K_MEM1",
6268 "UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6269 "UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6270 "UTC_VML2_BANK_CACHE_3_4K_MEM0",
6271 "UTC_VML2_BANK_CACHE_3_4K_MEM1",
6274 static const char *vml2_walker_mems[] = {
6275 "UTC_VML2_CACHE_PDE0_MEM0",
6276 "UTC_VML2_CACHE_PDE0_MEM1",
6277 "UTC_VML2_CACHE_PDE1_MEM0",
6278 "UTC_VML2_CACHE_PDE1_MEM1",
6279 "UTC_VML2_CACHE_PDE2_MEM0",
6280 "UTC_VML2_CACHE_PDE2_MEM1",
6281 "UTC_VML2_RDIF_LOG_FIFO",
6284 static const char *atc_l2_cache_2m_mems[] = {
6285 "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6286 "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6287 "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6288 "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6291 static const char *atc_l2_cache_4k_mems[] = {
6292 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6293 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6294 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6295 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6296 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6297 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6298 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6299 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6300 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6301 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6302 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6303 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6304 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6305 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6306 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6307 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6308 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6309 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6310 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6311 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6312 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6313 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6314 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6315 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6316 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6317 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6318 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6319 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6320 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6321 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6322 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6323 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6326 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6327 struct ras_err_data *err_data)
6330 uint32_t sec_count, ded_count;
6332 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6333 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6334 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6335 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6336 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6337 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6338 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6339 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6341 for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6342 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6343 data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6345 sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6347 DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6348 vml2_mems[i], sec_count);
6349 err_data->ce_count += sec_count;
6352 ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6354 DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6355 vml2_mems[i], ded_count);
6356 err_data->ue_count += ded_count;
6360 for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6361 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6362 data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6364 sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6367 DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6368 vml2_walker_mems[i], sec_count);
6369 err_data->ce_count += sec_count;
6372 ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6375 DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6376 vml2_walker_mems[i], ded_count);
6377 err_data->ue_count += ded_count;
6381 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6382 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6383 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6385 sec_count = (data & 0x00006000L) >> 0xd;
6387 DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6388 atc_l2_cache_2m_mems[i], sec_count);
6389 err_data->ce_count += sec_count;
6393 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6394 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6395 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6397 sec_count = (data & 0x00006000L) >> 0xd;
6399 DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6400 atc_l2_cache_4k_mems[i], sec_count);
6401 err_data->ce_count += sec_count;
6404 ded_count = (data & 0x00018000L) >> 0xf;
6406 DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6407 atc_l2_cache_4k_mems[i], ded_count);
6408 err_data->ue_count += ded_count;
6412 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6413 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6414 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6415 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6420 static int gfx_v9_0_ras_error_count(const struct soc15_reg_entry *reg,
6421 uint32_t se_id, uint32_t inst_id, uint32_t value,
6422 uint32_t *sec_count, uint32_t *ded_count)
6425 uint32_t sec_cnt, ded_cnt;
6427 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
6428 if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
6429 gfx_v9_0_ras_fields[i].seg != reg->seg ||
6430 gfx_v9_0_ras_fields[i].inst != reg->inst)
6434 gfx_v9_0_ras_fields[i].sec_count_mask) >>
6435 gfx_v9_0_ras_fields[i].sec_count_shift;
6437 DRM_INFO("GFX SubBlock %s, Instance[%d][%d], SEC %d\n",
6438 gfx_v9_0_ras_fields[i].name,
6441 *sec_count += sec_cnt;
6445 gfx_v9_0_ras_fields[i].ded_count_mask) >>
6446 gfx_v9_0_ras_fields[i].ded_count_shift;
6448 DRM_INFO("GFX SubBlock %s, Instance[%d][%d], DED %d\n",
6449 gfx_v9_0_ras_fields[i].name,
6452 *ded_count += ded_cnt;
6459 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
6463 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6466 /* read back registers to clear the counters */
6467 mutex_lock(&adev->grbm_idx_mutex);
6468 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6469 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6470 for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6471 gfx_v9_0_select_se_sh(adev, j, 0x0, k);
6472 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6476 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
6477 mutex_unlock(&adev->grbm_idx_mutex);
6479 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6480 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6481 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6482 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6483 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6484 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6485 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6486 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6488 for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6489 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6490 RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6493 for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6494 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6495 RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6498 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6499 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6500 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6503 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6504 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6505 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6508 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6509 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6510 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6511 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6514 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6515 void *ras_error_status)
6517 struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6518 uint32_t sec_count = 0, ded_count = 0;
6522 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6525 err_data->ue_count = 0;
6526 err_data->ce_count = 0;
6528 mutex_lock(&adev->grbm_idx_mutex);
6530 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6531 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6532 for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6533 gfx_v9_0_select_se_sh(adev, j, 0, k);
6535 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6537 gfx_v9_0_ras_error_count(&gfx_v9_0_edc_counter_regs[i],
6539 &sec_count, &ded_count);
6544 err_data->ce_count += sec_count;
6545 err_data->ue_count += ded_count;
6547 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6548 mutex_unlock(&adev->grbm_idx_mutex);
6550 gfx_v9_0_query_utc_edc_status(adev, err_data);
6555 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6557 .early_init = gfx_v9_0_early_init,
6558 .late_init = gfx_v9_0_late_init,
6559 .sw_init = gfx_v9_0_sw_init,
6560 .sw_fini = gfx_v9_0_sw_fini,
6561 .hw_init = gfx_v9_0_hw_init,
6562 .hw_fini = gfx_v9_0_hw_fini,
6563 .suspend = gfx_v9_0_suspend,
6564 .resume = gfx_v9_0_resume,
6565 .is_idle = gfx_v9_0_is_idle,
6566 .wait_for_idle = gfx_v9_0_wait_for_idle,
6567 .soft_reset = gfx_v9_0_soft_reset,
6568 .set_clockgating_state = gfx_v9_0_set_clockgating_state,
6569 .set_powergating_state = gfx_v9_0_set_powergating_state,
6570 .get_clockgating_state = gfx_v9_0_get_clockgating_state,
6573 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6574 .type = AMDGPU_RING_TYPE_GFX,
6576 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6577 .support_64bit_ptrs = true,
6578 .vmhub = AMDGPU_GFXHUB_0,
6579 .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6580 .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6581 .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6582 .emit_frame_size = /* totally 242 maximum if 16 IBs */
6584 7 + /* PIPELINE_SYNC */
6585 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6586 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6588 8 + /* FENCE for VM_FLUSH */
6589 20 + /* GDS switch */
6590 4 + /* double SWITCH_BUFFER,
6591 the first COND_EXEC jump to the place just
6592 prior to this double SWITCH_BUFFER */
6600 8 + 8 + /* FENCE x2 */
6601 2, /* SWITCH_BUFFER */
6602 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
6603 .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6604 .emit_fence = gfx_v9_0_ring_emit_fence,
6605 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6606 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6607 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6608 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6609 .test_ring = gfx_v9_0_ring_test_ring,
6610 .test_ib = gfx_v9_0_ring_test_ib,
6611 .insert_nop = amdgpu_ring_insert_nop,
6612 .pad_ib = amdgpu_ring_generic_pad_ib,
6613 .emit_switch_buffer = gfx_v9_ring_emit_sb,
6614 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6615 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6616 .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6617 .emit_tmz = gfx_v9_0_ring_emit_tmz,
6618 .emit_wreg = gfx_v9_0_ring_emit_wreg,
6619 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6620 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6621 .soft_recovery = gfx_v9_0_ring_soft_recovery,
6624 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6625 .type = AMDGPU_RING_TYPE_COMPUTE,
6627 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6628 .support_64bit_ptrs = true,
6629 .vmhub = AMDGPU_GFXHUB_0,
6630 .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6631 .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6632 .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6634 20 + /* gfx_v9_0_ring_emit_gds_switch */
6635 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6636 5 + /* hdp invalidate */
6637 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6638 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6639 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6640 2 + /* gfx_v9_0_ring_emit_vm_flush */
6641 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6642 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6643 .emit_ib = gfx_v9_0_ring_emit_ib_compute,
6644 .emit_fence = gfx_v9_0_ring_emit_fence,
6645 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6646 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6647 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6648 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6649 .test_ring = gfx_v9_0_ring_test_ring,
6650 .test_ib = gfx_v9_0_ring_test_ib,
6651 .insert_nop = amdgpu_ring_insert_nop,
6652 .pad_ib = amdgpu_ring_generic_pad_ib,
6653 .emit_wreg = gfx_v9_0_ring_emit_wreg,
6654 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6655 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6658 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6659 .type = AMDGPU_RING_TYPE_KIQ,
6661 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6662 .support_64bit_ptrs = true,
6663 .vmhub = AMDGPU_GFXHUB_0,
6664 .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6665 .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6666 .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6668 20 + /* gfx_v9_0_ring_emit_gds_switch */
6669 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6670 5 + /* hdp invalidate */
6671 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6672 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6673 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6674 2 + /* gfx_v9_0_ring_emit_vm_flush */
6675 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6676 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6677 .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
6678 .test_ring = gfx_v9_0_ring_test_ring,
6679 .insert_nop = amdgpu_ring_insert_nop,
6680 .pad_ib = amdgpu_ring_generic_pad_ib,
6681 .emit_rreg = gfx_v9_0_ring_emit_rreg,
6682 .emit_wreg = gfx_v9_0_ring_emit_wreg,
6683 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6684 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6687 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
6691 adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
6693 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6694 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
6696 for (i = 0; i < adev->gfx.num_compute_rings; i++)
6697 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
6700 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
6701 .set = gfx_v9_0_set_eop_interrupt_state,
6702 .process = gfx_v9_0_eop_irq,
6705 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
6706 .set = gfx_v9_0_set_priv_reg_fault_state,
6707 .process = gfx_v9_0_priv_reg_irq,
6710 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
6711 .set = gfx_v9_0_set_priv_inst_fault_state,
6712 .process = gfx_v9_0_priv_inst_irq,
6715 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
6716 .set = gfx_v9_0_set_cp_ecc_error_state,
6717 .process = amdgpu_gfx_cp_ecc_error_irq,
6721 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
6723 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6724 adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
6726 adev->gfx.priv_reg_irq.num_types = 1;
6727 adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
6729 adev->gfx.priv_inst_irq.num_types = 1;
6730 adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
6732 adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
6733 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
6736 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
6738 switch (adev->asic_type) {
6745 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
6752 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
6754 /* init asci gds info */
6755 switch (adev->asic_type) {
6759 adev->gds.gds_size = 0x10000;
6763 adev->gds.gds_size = 0x1000;
6766 adev->gds.gds_size = 0x10000;
6770 switch (adev->asic_type) {
6773 adev->gds.gds_compute_max_wave_id = 0x7ff;
6776 adev->gds.gds_compute_max_wave_id = 0x27f;
6779 if (adev->rev_id >= 0x8)
6780 adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
6782 adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
6785 adev->gds.gds_compute_max_wave_id = 0xfff;
6788 /* this really depends on the chip */
6789 adev->gds.gds_compute_max_wave_id = 0x7ff;
6793 adev->gds.gws_size = 64;
6794 adev->gds.oa_size = 16;
6797 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6805 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6806 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6808 WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
6811 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6815 data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
6816 data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
6818 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6819 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6821 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6823 return (~data) & mask;
6826 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
6827 struct amdgpu_cu_info *cu_info)
6829 int i, j, k, counter, active_cu_number = 0;
6830 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6831 unsigned disable_masks[4 * 4];
6833 if (!adev || !cu_info)
6837 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
6839 if (adev->gfx.config.max_shader_engines *
6840 adev->gfx.config.max_sh_per_se > 16)
6843 amdgpu_gfx_parse_disable_cu(disable_masks,
6844 adev->gfx.config.max_shader_engines,
6845 adev->gfx.config.max_sh_per_se);
6847 mutex_lock(&adev->grbm_idx_mutex);
6848 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6849 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6853 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
6854 gfx_v9_0_set_user_cu_inactive_bitmap(
6855 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
6856 bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
6859 * The bitmap(and ao_cu_bitmap) in cu_info structure is
6860 * 4x4 size array, and it's usually suitable for Vega
6861 * ASICs which has 4*2 SE/SH layout.
6862 * But for Arcturus, SE/SH layout is changed to 8*1.
6863 * To mostly reduce the impact, we make it compatible
6864 * with current bitmap array as below:
6865 * SE4,SH0 --> bitmap[0][1]
6866 * SE5,SH0 --> bitmap[1][1]
6867 * SE6,SH0 --> bitmap[2][1]
6868 * SE7,SH0 --> bitmap[3][1]
6870 cu_info->bitmap[i % 4][j + i / 4] = bitmap;
6872 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
6873 if (bitmap & mask) {
6874 if (counter < adev->gfx.config.max_cu_per_sh)
6880 active_cu_number += counter;
6882 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6883 cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
6886 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6887 mutex_unlock(&adev->grbm_idx_mutex);
6889 cu_info->number = active_cu_number;
6890 cu_info->ao_cu_mask = ao_cu_mask;
6891 cu_info->simd_per_cu = NUM_SIMD_PER_CU;
6896 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
6898 .type = AMD_IP_BLOCK_TYPE_GFX,
6902 .funcs = &gfx_v9_0_ip_funcs,