Linux-libre 5.3.12-gnu
[librecmc/linux-libre.git] / drivers / gpu / drm / amd / amdgpu / vce_v4_0.c
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  */
26
27 #include <linux/firmware.h>
28
29 #include "amdgpu.h"
30 #include "amdgpu_vce.h"
31 #include "soc15.h"
32 #include "soc15d.h"
33 #include "soc15_common.h"
34 #include "mmsch_v1_0.h"
35
36 #include "vce/vce_4_0_offset.h"
37 #include "vce/vce_4_0_default.h"
38 #include "vce/vce_4_0_sh_mask.h"
39 #include "mmhub/mmhub_1_0_offset.h"
40 #include "mmhub/mmhub_1_0_sh_mask.h"
41
42 #include "ivsrcid/vce/irqsrcs_vce_4_0.h"
43
44 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK   0x02
45
46 #define VCE_V4_0_FW_SIZE        (384 * 1024)
47 #define VCE_V4_0_STACK_SIZE     (64 * 1024)
48 #define VCE_V4_0_DATA_SIZE      ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
49
50 static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
51 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
52 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
53
54 /**
55  * vce_v4_0_ring_get_rptr - get read pointer
56  *
57  * @ring: amdgpu_ring pointer
58  *
59  * Returns the current hardware read pointer
60  */
61 static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
62 {
63         struct amdgpu_device *adev = ring->adev;
64
65         if (ring->me == 0)
66                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
67         else if (ring->me == 1)
68                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
69         else
70                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
71 }
72
73 /**
74  * vce_v4_0_ring_get_wptr - get write pointer
75  *
76  * @ring: amdgpu_ring pointer
77  *
78  * Returns the current hardware write pointer
79  */
80 static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
81 {
82         struct amdgpu_device *adev = ring->adev;
83
84         if (ring->use_doorbell)
85                 return adev->wb.wb[ring->wptr_offs];
86
87         if (ring->me == 0)
88                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
89         else if (ring->me == 1)
90                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
91         else
92                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
93 }
94
95 /**
96  * vce_v4_0_ring_set_wptr - set write pointer
97  *
98  * @ring: amdgpu_ring pointer
99  *
100  * Commits the write pointer to the hardware
101  */
102 static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
103 {
104         struct amdgpu_device *adev = ring->adev;
105
106         if (ring->use_doorbell) {
107                 /* XXX check if swapping is necessary on BE */
108                 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
109                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
110                 return;
111         }
112
113         if (ring->me == 0)
114                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
115                         lower_32_bits(ring->wptr));
116         else if (ring->me == 1)
117                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
118                         lower_32_bits(ring->wptr));
119         else
120                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
121                         lower_32_bits(ring->wptr));
122 }
123
124 static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
125 {
126         int i, j;
127
128         for (i = 0; i < 10; ++i) {
129                 for (j = 0; j < 100; ++j) {
130                         uint32_t status =
131                                 RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
132
133                         if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
134                                 return 0;
135                         mdelay(10);
136                 }
137
138                 DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
139                 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
140                                 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
141                                 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
142                 mdelay(10);
143                 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
144                                 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
145                 mdelay(10);
146
147         }
148
149         return -ETIMEDOUT;
150 }
151
152 static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
153                                 struct amdgpu_mm_table *table)
154 {
155         uint32_t data = 0, loop;
156         uint64_t addr = table->gpu_addr;
157         struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
158         uint32_t size;
159
160         size = header->header_size + header->vce_table_size + header->uvd_table_size;
161
162         /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
163         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
164         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
165
166         /* 2, update vmid of descriptor */
167         data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
168         data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
169         data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
170         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
171
172         /* 3, notify mmsch about the size of this descriptor */
173         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
174
175         /* 4, set resp to zero */
176         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
177
178         WDOORBELL32(adev->vce.ring[0].doorbell_index, 0);
179         adev->wb.wb[adev->vce.ring[0].wptr_offs] = 0;
180         adev->vce.ring[0].wptr = 0;
181         adev->vce.ring[0].wptr_old = 0;
182
183         /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
184         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
185
186         data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
187         loop = 1000;
188         while ((data & 0x10000002) != 0x10000002) {
189                 udelay(10);
190                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
191                 loop--;
192                 if (!loop)
193                         break;
194         }
195
196         if (!loop) {
197                 dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
198                 return -EBUSY;
199         }
200
201         return 0;
202 }
203
204 static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
205 {
206         struct amdgpu_ring *ring;
207         uint32_t offset, size;
208         uint32_t table_size = 0;
209         struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
210         struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
211         struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
212         struct mmsch_v1_0_cmd_end end = { { 0 } };
213         uint32_t *init_table = adev->virt.mm_table.cpu_addr;
214         struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
215
216         direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
217         direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
218         direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
219         end.cmd_header.command_type = MMSCH_COMMAND__END;
220
221         if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
222                 header->version = MMSCH_VERSION;
223                 header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
224
225                 if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
226                         header->vce_table_offset = header->header_size;
227                 else
228                         header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
229
230                 init_table += header->vce_table_offset;
231
232                 ring = &adev->vce.ring[0];
233                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
234                                             lower_32_bits(ring->gpu_addr));
235                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
236                                             upper_32_bits(ring->gpu_addr));
237                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
238                                             ring->ring_size / 4);
239
240                 /* BEGING OF MC_RESUME */
241                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
242                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
243                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
244                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
245                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
246
247                 offset = AMDGPU_VCE_FIRMWARE_OFFSET;
248                 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
249                         uint32_t low = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
250                         uint32_t hi = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi;
251                         uint64_t tmr_mc_addr = (uint64_t)(hi) << 32 | low;
252
253                         MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
254                                                 mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), tmr_mc_addr >> 8);
255                         MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
256                                                 mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
257                                                 (tmr_mc_addr >> 40) & 0xff);
258                         MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
259                 } else {
260                         MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
261                                                 mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
262                                                 adev->vce.gpu_addr >> 8);
263                         MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
264                                                 mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
265                                                 (adev->vce.gpu_addr >> 40) & 0xff);
266                         MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
267                                                 offset & ~0x0f000000);
268
269                 }
270                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
271                                                 mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
272                                                 adev->vce.gpu_addr >> 8);
273                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
274                                                 mmVCE_LMI_VCPU_CACHE_64BIT_BAR1),
275                                                 (adev->vce.gpu_addr >> 40) & 0xff);
276                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
277                                                 mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
278                                                 adev->vce.gpu_addr >> 8);
279                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
280                                                 mmVCE_LMI_VCPU_CACHE_64BIT_BAR2),
281                                                 (adev->vce.gpu_addr >> 40) & 0xff);
282
283                 size = VCE_V4_0_FW_SIZE;
284                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
285
286                 offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
287                 size = VCE_V4_0_STACK_SIZE;
288                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
289                                         (offset & ~0x0f000000) | (1 << 24));
290                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
291
292                 offset += size;
293                 size = VCE_V4_0_DATA_SIZE;
294                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
295                                         (offset & ~0x0f000000) | (2 << 24));
296                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
297
298                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
299                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
300                                                    VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
301                                                    VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
302
303                 /* end of MC_RESUME */
304                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
305                                                    VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
306                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
307                                                    ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
308                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
309                                                    ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
310
311                 MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
312                                               VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
313                                               VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
314
315                 /* clear BUSY flag */
316                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
317                                                    ~VCE_STATUS__JOB_BUSY_MASK, 0);
318
319                 /* add end packet */
320                 memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
321                 table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
322                 header->vce_table_size = table_size;
323         }
324
325         return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
326 }
327
328 /**
329  * vce_v4_0_start - start VCE block
330  *
331  * @adev: amdgpu_device pointer
332  *
333  * Setup and start the VCE block
334  */
335 static int vce_v4_0_start(struct amdgpu_device *adev)
336 {
337         struct amdgpu_ring *ring;
338         int r;
339
340         ring = &adev->vce.ring[0];
341
342         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
343         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
344         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
345         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
346         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
347
348         ring = &adev->vce.ring[1];
349
350         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
351         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
352         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
353         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
354         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
355
356         ring = &adev->vce.ring[2];
357
358         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
359         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
360         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
361         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
362         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
363
364         vce_v4_0_mc_resume(adev);
365         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
366                         ~VCE_STATUS__JOB_BUSY_MASK);
367
368         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
369
370         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
371                         ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
372         mdelay(100);
373
374         r = vce_v4_0_firmware_loaded(adev);
375
376         /* clear BUSY flag */
377         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
378
379         if (r) {
380                 DRM_ERROR("VCE not responding, giving up!!!\n");
381                 return r;
382         }
383
384         return 0;
385 }
386
387 static int vce_v4_0_stop(struct amdgpu_device *adev)
388 {
389
390         /* Disable VCPU */
391         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
392
393         /* hold on ECPU */
394         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
395                         VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
396                         ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
397
398         /* clear VCE_STATUS */
399         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0);
400
401         /* Set Clock-Gating off */
402         /* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
403                 vce_v4_0_set_vce_sw_clock_gating(adev, false);
404         */
405
406         return 0;
407 }
408
409 static int vce_v4_0_early_init(void *handle)
410 {
411         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
412
413         if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
414                 adev->vce.num_rings = 1;
415         else
416                 adev->vce.num_rings = 3;
417
418         vce_v4_0_set_ring_funcs(adev);
419         vce_v4_0_set_irq_funcs(adev);
420
421         return 0;
422 }
423
424 static int vce_v4_0_sw_init(void *handle)
425 {
426         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
427         struct amdgpu_ring *ring;
428
429         unsigned size;
430         int r, i;
431
432         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
433         if (r)
434                 return r;
435
436         size  = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE;
437         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
438                 size += VCE_V4_0_FW_SIZE;
439
440         r = amdgpu_vce_sw_init(adev, size);
441         if (r)
442                 return r;
443
444         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
445                 const struct common_firmware_header *hdr;
446                 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
447
448                 adev->vce.saved_bo = kvmalloc(size, GFP_KERNEL);
449                 if (!adev->vce.saved_bo)
450                         return -ENOMEM;
451
452                 hdr = (const struct common_firmware_header *)adev->vce.fw->data;
453                 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
454                 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
455                 adev->firmware.fw_size +=
456                         ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
457                 DRM_INFO("PSP loading VCE firmware\n");
458         } else {
459                 r = amdgpu_vce_resume(adev);
460                 if (r)
461                         return r;
462         }
463
464         for (i = 0; i < adev->vce.num_rings; i++) {
465                 ring = &adev->vce.ring[i];
466                 sprintf(ring->name, "vce%d", i);
467                 if (amdgpu_sriov_vf(adev)) {
468                         /* DOORBELL only works under SRIOV */
469                         ring->use_doorbell = true;
470
471                         /* currently only use the first encoding ring for sriov,
472                          * so set unused location for other unused rings.
473                          */
474                         if (i == 0)
475                                 ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring0_1 * 2;
476                         else
477                                 ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring2_3 * 2 + 1;
478                 }
479                 r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0);
480                 if (r)
481                         return r;
482         }
483
484
485         r = amdgpu_vce_entity_init(adev);
486         if (r)
487                 return r;
488
489         r = amdgpu_virt_alloc_mm_table(adev);
490         if (r)
491                 return r;
492
493         return r;
494 }
495
496 static int vce_v4_0_sw_fini(void *handle)
497 {
498         int r;
499         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
500
501         /* free MM table */
502         amdgpu_virt_free_mm_table(adev);
503
504         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
505                 kvfree(adev->vce.saved_bo);
506                 adev->vce.saved_bo = NULL;
507         }
508
509         r = amdgpu_vce_suspend(adev);
510         if (r)
511                 return r;
512
513         return amdgpu_vce_sw_fini(adev);
514 }
515
516 static int vce_v4_0_hw_init(void *handle)
517 {
518         int r, i;
519         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
520
521         if (amdgpu_sriov_vf(adev))
522                 r = vce_v4_0_sriov_start(adev);
523         else
524                 r = vce_v4_0_start(adev);
525         if (r)
526                 return r;
527
528         for (i = 0; i < adev->vce.num_rings; i++) {
529                 r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
530                 if (r)
531                         return r;
532         }
533
534         DRM_INFO("VCE initialized successfully.\n");
535
536         return 0;
537 }
538
539 static int vce_v4_0_hw_fini(void *handle)
540 {
541         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
542         int i;
543
544         if (!amdgpu_sriov_vf(adev)) {
545                 /* vce_v4_0_wait_for_idle(handle); */
546                 vce_v4_0_stop(adev);
547         } else {
548                 /* full access mode, so don't touch any VCE register */
549                 DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
550         }
551
552         for (i = 0; i < adev->vce.num_rings; i++)
553                 adev->vce.ring[i].sched.ready = false;
554
555         return 0;
556 }
557
558 static int vce_v4_0_suspend(void *handle)
559 {
560         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
561         int r;
562
563         if (adev->vce.vcpu_bo == NULL)
564                 return 0;
565
566         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
567                 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
568                 void *ptr = adev->vce.cpu_addr;
569
570                 memcpy_fromio(adev->vce.saved_bo, ptr, size);
571         }
572
573         r = vce_v4_0_hw_fini(adev);
574         if (r)
575                 return r;
576
577         return amdgpu_vce_suspend(adev);
578 }
579
580 static int vce_v4_0_resume(void *handle)
581 {
582         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
583         int r;
584
585         if (adev->vce.vcpu_bo == NULL)
586                 return -EINVAL;
587
588         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
589                 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
590                 void *ptr = adev->vce.cpu_addr;
591
592                 memcpy_toio(ptr, adev->vce.saved_bo, size);
593         } else {
594                 r = amdgpu_vce_resume(adev);
595                 if (r)
596                         return r;
597         }
598
599         return vce_v4_0_hw_init(adev);
600 }
601
602 static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
603 {
604         uint32_t offset, size;
605         uint64_t tmr_mc_addr;
606
607         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
608         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
609         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
610         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
611
612         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
613         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
614         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
615         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
616         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
617
618         offset = AMDGPU_VCE_FIRMWARE_OFFSET;
619
620         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
621                 tmr_mc_addr = (uint64_t)(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi) << 32 |
622                                                                                 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
623                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
624                         (tmr_mc_addr >> 8));
625                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
626                         (tmr_mc_addr >> 40) & 0xff);
627                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
628         } else {
629                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
630                         (adev->vce.gpu_addr >> 8));
631                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
632                         (adev->vce.gpu_addr >> 40) & 0xff);
633                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
634         }
635
636         size = VCE_V4_0_FW_SIZE;
637         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
638
639         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
640         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
641         offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
642         size = VCE_V4_0_STACK_SIZE;
643         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
644         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
645
646         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
647         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
648         offset += size;
649         size = VCE_V4_0_DATA_SIZE;
650         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
651         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
652
653         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
654         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
655                         VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
656                         ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
657 }
658
659 static int vce_v4_0_set_clockgating_state(void *handle,
660                                           enum amd_clockgating_state state)
661 {
662         /* needed for driver unload*/
663         return 0;
664 }
665
666 #if 0
667 static bool vce_v4_0_is_idle(void *handle)
668 {
669         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
670         u32 mask = 0;
671
672         mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
673         mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
674
675         return !(RREG32(mmSRBM_STATUS2) & mask);
676 }
677
678 static int vce_v4_0_wait_for_idle(void *handle)
679 {
680         unsigned i;
681         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
682
683         for (i = 0; i < adev->usec_timeout; i++)
684                 if (vce_v4_0_is_idle(handle))
685                         return 0;
686
687         return -ETIMEDOUT;
688 }
689
690 #define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
691 #define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
692 #define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
693 #define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
694                                       VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
695
696 static bool vce_v4_0_check_soft_reset(void *handle)
697 {
698         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
699         u32 srbm_soft_reset = 0;
700
701         /* According to VCE team , we should use VCE_STATUS instead
702          * SRBM_STATUS.VCE_BUSY bit for busy status checking.
703          * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
704          * instance's registers are accessed
705          * (0 for 1st instance, 10 for 2nd instance).
706          *
707          *VCE_STATUS
708          *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
709          *|----+----+-----------+----+----+----+----------+---------+----|
710          *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
711          *
712          * VCE team suggest use bit 3--bit 6 for busy status check
713          */
714         mutex_lock(&adev->grbm_idx_mutex);
715         WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
716         if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
717                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
718                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
719         }
720         WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
721         if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
722                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
723                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
724         }
725         WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
726         mutex_unlock(&adev->grbm_idx_mutex);
727
728         if (srbm_soft_reset) {
729                 adev->vce.srbm_soft_reset = srbm_soft_reset;
730                 return true;
731         } else {
732                 adev->vce.srbm_soft_reset = 0;
733                 return false;
734         }
735 }
736
737 static int vce_v4_0_soft_reset(void *handle)
738 {
739         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
740         u32 srbm_soft_reset;
741
742         if (!adev->vce.srbm_soft_reset)
743                 return 0;
744         srbm_soft_reset = adev->vce.srbm_soft_reset;
745
746         if (srbm_soft_reset) {
747                 u32 tmp;
748
749                 tmp = RREG32(mmSRBM_SOFT_RESET);
750                 tmp |= srbm_soft_reset;
751                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
752                 WREG32(mmSRBM_SOFT_RESET, tmp);
753                 tmp = RREG32(mmSRBM_SOFT_RESET);
754
755                 udelay(50);
756
757                 tmp &= ~srbm_soft_reset;
758                 WREG32(mmSRBM_SOFT_RESET, tmp);
759                 tmp = RREG32(mmSRBM_SOFT_RESET);
760
761                 /* Wait a little for things to settle down */
762                 udelay(50);
763         }
764
765         return 0;
766 }
767
768 static int vce_v4_0_pre_soft_reset(void *handle)
769 {
770         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
771
772         if (!adev->vce.srbm_soft_reset)
773                 return 0;
774
775         mdelay(5);
776
777         return vce_v4_0_suspend(adev);
778 }
779
780
781 static int vce_v4_0_post_soft_reset(void *handle)
782 {
783         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
784
785         if (!adev->vce.srbm_soft_reset)
786                 return 0;
787
788         mdelay(5);
789
790         return vce_v4_0_resume(adev);
791 }
792
793 static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
794 {
795         u32 tmp, data;
796
797         tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
798         if (override)
799                 data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
800         else
801                 data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
802
803         if (tmp != data)
804                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
805 }
806
807 static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
808                                              bool gated)
809 {
810         u32 data;
811
812         /* Set Override to disable Clock Gating */
813         vce_v4_0_override_vce_clock_gating(adev, true);
814
815         /* This function enables MGCG which is controlled by firmware.
816            With the clocks in the gated state the core is still
817            accessible but the firmware will throttle the clocks on the
818            fly as necessary.
819         */
820         if (gated) {
821                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
822                 data |= 0x1ff;
823                 data &= ~0xef0000;
824                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
825
826                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
827                 data |= 0x3ff000;
828                 data &= ~0xffc00000;
829                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
830
831                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
832                 data |= 0x2;
833                 data &= ~0x00010000;
834                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
835
836                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
837                 data |= 0x37f;
838                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
839
840                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
841                 data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
842                         VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
843                         VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
844                         0x8;
845                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
846         } else {
847                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
848                 data &= ~0x80010;
849                 data |= 0xe70008;
850                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
851
852                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
853                 data |= 0xffc00000;
854                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
855
856                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
857                 data |= 0x10000;
858                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
859
860                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
861                 data &= ~0xffc00000;
862                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
863
864                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
865                 data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
866                           VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
867                           VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
868                           0x8);
869                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
870         }
871         vce_v4_0_override_vce_clock_gating(adev, false);
872 }
873
874 static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
875 {
876         u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
877
878         if (enable)
879                 tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
880         else
881                 tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
882
883         WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
884 }
885
886 static int vce_v4_0_set_clockgating_state(void *handle,
887                                           enum amd_clockgating_state state)
888 {
889         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
890         bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
891         int i;
892
893         if ((adev->asic_type == CHIP_POLARIS10) ||
894                 (adev->asic_type == CHIP_TONGA) ||
895                 (adev->asic_type == CHIP_FIJI))
896                 vce_v4_0_set_bypass_mode(adev, enable);
897
898         if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
899                 return 0;
900
901         mutex_lock(&adev->grbm_idx_mutex);
902         for (i = 0; i < 2; i++) {
903                 /* Program VCE Instance 0 or 1 if not harvested */
904                 if (adev->vce.harvest_config & (1 << i))
905                         continue;
906
907                 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
908
909                 if (enable) {
910                         /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
911                         uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
912                         data &= ~(0xf | 0xff0);
913                         data |= ((0x0 << 0) | (0x04 << 4));
914                         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
915
916                         /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
917                         data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
918                         data &= ~(0xf | 0xff0);
919                         data |= ((0x0 << 0) | (0x04 << 4));
920                         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
921                 }
922
923                 vce_v4_0_set_vce_sw_clock_gating(adev, enable);
924         }
925
926         WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
927         mutex_unlock(&adev->grbm_idx_mutex);
928
929         return 0;
930 }
931 #endif
932
933 static int vce_v4_0_set_powergating_state(void *handle,
934                                           enum amd_powergating_state state)
935 {
936         /* This doesn't actually powergate the VCE block.
937          * That's done in the dpm code via the SMC.  This
938          * just re-inits the block as necessary.  The actual
939          * gating still happens in the dpm code.  We should
940          * revisit this when there is a cleaner line between
941          * the smc and the hw blocks
942          */
943         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
944
945         if (state == AMD_PG_STATE_GATE)
946                 return vce_v4_0_stop(adev);
947         else
948                 return vce_v4_0_start(adev);
949 }
950
951 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
952                                         struct amdgpu_ib *ib, uint32_t flags)
953 {
954         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
955
956         amdgpu_ring_write(ring, VCE_CMD_IB_VM);
957         amdgpu_ring_write(ring, vmid);
958         amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
959         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
960         amdgpu_ring_write(ring, ib->length_dw);
961 }
962
963 static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
964                         u64 seq, unsigned flags)
965 {
966         WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
967
968         amdgpu_ring_write(ring, VCE_CMD_FENCE);
969         amdgpu_ring_write(ring, addr);
970         amdgpu_ring_write(ring, upper_32_bits(addr));
971         amdgpu_ring_write(ring, seq);
972         amdgpu_ring_write(ring, VCE_CMD_TRAP);
973 }
974
975 static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
976 {
977         amdgpu_ring_write(ring, VCE_CMD_END);
978 }
979
980 static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
981                                    uint32_t val, uint32_t mask)
982 {
983         amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
984         amdgpu_ring_write(ring, reg << 2);
985         amdgpu_ring_write(ring, mask);
986         amdgpu_ring_write(ring, val);
987 }
988
989 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
990                                    unsigned int vmid, uint64_t pd_addr)
991 {
992         struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
993
994         pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
995
996         /* wait for reg writes */
997         vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2,
998                                lower_32_bits(pd_addr), 0xffffffff);
999 }
1000
1001 static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring,
1002                                uint32_t reg, uint32_t val)
1003 {
1004         amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
1005         amdgpu_ring_write(ring, reg << 2);
1006         amdgpu_ring_write(ring, val);
1007 }
1008
1009 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
1010                                         struct amdgpu_irq_src *source,
1011                                         unsigned type,
1012                                         enum amdgpu_interrupt_state state)
1013 {
1014         uint32_t val = 0;
1015
1016         if (!amdgpu_sriov_vf(adev)) {
1017                 if (state == AMDGPU_IRQ_STATE_ENABLE)
1018                         val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
1019
1020                 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
1021                                 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
1022         }
1023         return 0;
1024 }
1025
1026 static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
1027                                       struct amdgpu_irq_src *source,
1028                                       struct amdgpu_iv_entry *entry)
1029 {
1030         DRM_DEBUG("IH: VCE\n");
1031
1032         switch (entry->src_data[0]) {
1033         case 0:
1034         case 1:
1035         case 2:
1036                 amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
1037                 break;
1038         default:
1039                 DRM_ERROR("Unhandled interrupt: %d %d\n",
1040                           entry->src_id, entry->src_data[0]);
1041                 break;
1042         }
1043
1044         return 0;
1045 }
1046
1047 const struct amd_ip_funcs vce_v4_0_ip_funcs = {
1048         .name = "vce_v4_0",
1049         .early_init = vce_v4_0_early_init,
1050         .late_init = NULL,
1051         .sw_init = vce_v4_0_sw_init,
1052         .sw_fini = vce_v4_0_sw_fini,
1053         .hw_init = vce_v4_0_hw_init,
1054         .hw_fini = vce_v4_0_hw_fini,
1055         .suspend = vce_v4_0_suspend,
1056         .resume = vce_v4_0_resume,
1057         .is_idle = NULL /* vce_v4_0_is_idle */,
1058         .wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
1059         .check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
1060         .pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
1061         .soft_reset = NULL /* vce_v4_0_soft_reset */,
1062         .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
1063         .set_clockgating_state = vce_v4_0_set_clockgating_state,
1064         .set_powergating_state = vce_v4_0_set_powergating_state,
1065 };
1066
1067 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1068         .type = AMDGPU_RING_TYPE_VCE,
1069         .align_mask = 0x3f,
1070         .nop = VCE_CMD_NO_OP,
1071         .support_64bit_ptrs = false,
1072         .no_user_fence = true,
1073         .vmhub = AMDGPU_MMHUB,
1074         .get_rptr = vce_v4_0_ring_get_rptr,
1075         .get_wptr = vce_v4_0_ring_get_wptr,
1076         .set_wptr = vce_v4_0_ring_set_wptr,
1077         .parse_cs = amdgpu_vce_ring_parse_cs_vm,
1078         .emit_frame_size =
1079                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1080                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
1081                 4 + /* vce_v4_0_emit_vm_flush */
1082                 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
1083                 1, /* vce_v4_0_ring_insert_end */
1084         .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
1085         .emit_ib = vce_v4_0_ring_emit_ib,
1086         .emit_vm_flush = vce_v4_0_emit_vm_flush,
1087         .emit_fence = vce_v4_0_ring_emit_fence,
1088         .test_ring = amdgpu_vce_ring_test_ring,
1089         .test_ib = amdgpu_vce_ring_test_ib,
1090         .insert_nop = amdgpu_ring_insert_nop,
1091         .insert_end = vce_v4_0_ring_insert_end,
1092         .pad_ib = amdgpu_ring_generic_pad_ib,
1093         .begin_use = amdgpu_vce_ring_begin_use,
1094         .end_use = amdgpu_vce_ring_end_use,
1095         .emit_wreg = vce_v4_0_emit_wreg,
1096         .emit_reg_wait = vce_v4_0_emit_reg_wait,
1097         .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1098 };
1099
1100 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1101 {
1102         int i;
1103
1104         for (i = 0; i < adev->vce.num_rings; i++) {
1105                 adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
1106                 adev->vce.ring[i].me = i;
1107         }
1108         DRM_INFO("VCE enabled in VM mode\n");
1109 }
1110
1111 static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
1112         .set = vce_v4_0_set_interrupt_state,
1113         .process = vce_v4_0_process_interrupt,
1114 };
1115
1116 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1117 {
1118         adev->vce.irq.num_types = 1;
1119         adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
1120 };
1121
1122 const struct amdgpu_ip_block_version vce_v4_0_ip_block =
1123 {
1124         .type = AMD_IP_BLOCK_TYPE_VCE,
1125         .major = 4,
1126         .minor = 0,
1127         .rev = 0,
1128         .funcs = &vce_v4_0_ip_funcs,
1129 };