Linux-libre 5.7.3-gnu
[librecmc/linux-libre.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_vce.c
1 /*
2  * Copyright 2013 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  * Authors: Christian König <christian.koenig@amd.com>
26  */
27
28 #include <linux/firmware.h>
29 #include <linux/module.h>
30
31 #include <drm/drm.h>
32
33 #include "amdgpu.h"
34 #include "amdgpu_pm.h"
35 #include "amdgpu_vce.h"
36 #include "cikd.h"
37
38 /* 1 second timeout */
39 #define VCE_IDLE_TIMEOUT        msecs_to_jiffies(1000)
40
41 /* Firmware Names */
42 #ifdef CONFIG_DRM_AMDGPU_CIK
43 #define FIRMWARE_BONAIRE        "/*(DEBLOBBED)*/"
44 #define FIRMWARE_KABINI "/*(DEBLOBBED)*/"
45 #define FIRMWARE_KAVERI "/*(DEBLOBBED)*/"
46 #define FIRMWARE_HAWAII "/*(DEBLOBBED)*/"
47 #define FIRMWARE_MULLINS        "/*(DEBLOBBED)*/"
48 #endif
49 #define FIRMWARE_TONGA          "/*(DEBLOBBED)*/"
50 #define FIRMWARE_CARRIZO        "/*(DEBLOBBED)*/"
51 #define FIRMWARE_FIJI           "/*(DEBLOBBED)*/"
52 #define FIRMWARE_STONEY         "/*(DEBLOBBED)*/"
53 #define FIRMWARE_POLARIS10      "/*(DEBLOBBED)*/"
54 #define FIRMWARE_POLARIS11      "/*(DEBLOBBED)*/"
55 #define FIRMWARE_POLARIS12      "/*(DEBLOBBED)*/"
56 #define FIRMWARE_VEGAM          "/*(DEBLOBBED)*/"
57
58 #define FIRMWARE_VEGA10         "/*(DEBLOBBED)*/"
59 #define FIRMWARE_VEGA12         "/*(DEBLOBBED)*/"
60 #define FIRMWARE_VEGA20         "/*(DEBLOBBED)*/"
61
62 #ifdef CONFIG_DRM_AMDGPU_CIK
63 /*(DEBLOBBED)*/
64 #endif
65 /*(DEBLOBBED)*/
66
67 static void amdgpu_vce_idle_work_handler(struct work_struct *work);
68 static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
69                                      struct amdgpu_bo *bo,
70                                      struct dma_fence **fence);
71 static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
72                                       bool direct, struct dma_fence **fence);
73
74 /**
75  * amdgpu_vce_init - allocate memory, load vce firmware
76  *
77  * @adev: amdgpu_device pointer
78  *
79  * First step to get VCE online, allocate memory and load the firmware
80  */
81 int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
82 {
83         const char *fw_name;
84         const struct common_firmware_header *hdr;
85         unsigned ucode_version, version_major, version_minor, binary_id;
86         int i, r;
87
88         switch (adev->asic_type) {
89 #ifdef CONFIG_DRM_AMDGPU_CIK
90         case CHIP_BONAIRE:
91                 fw_name = FIRMWARE_BONAIRE;
92                 break;
93         case CHIP_KAVERI:
94                 fw_name = FIRMWARE_KAVERI;
95                 break;
96         case CHIP_KABINI:
97                 fw_name = FIRMWARE_KABINI;
98                 break;
99         case CHIP_HAWAII:
100                 fw_name = FIRMWARE_HAWAII;
101                 break;
102         case CHIP_MULLINS:
103                 fw_name = FIRMWARE_MULLINS;
104                 break;
105 #endif
106         case CHIP_TONGA:
107                 fw_name = FIRMWARE_TONGA;
108                 break;
109         case CHIP_CARRIZO:
110                 fw_name = FIRMWARE_CARRIZO;
111                 break;
112         case CHIP_FIJI:
113                 fw_name = FIRMWARE_FIJI;
114                 break;
115         case CHIP_STONEY:
116                 fw_name = FIRMWARE_STONEY;
117                 break;
118         case CHIP_POLARIS10:
119                 fw_name = FIRMWARE_POLARIS10;
120                 break;
121         case CHIP_POLARIS11:
122                 fw_name = FIRMWARE_POLARIS11;
123                 break;
124         case CHIP_POLARIS12:
125                 fw_name = FIRMWARE_POLARIS12;
126                 break;
127         case CHIP_VEGAM:
128                 fw_name = FIRMWARE_VEGAM;
129                 break;
130         case CHIP_VEGA10:
131                 fw_name = FIRMWARE_VEGA10;
132                 break;
133         case CHIP_VEGA12:
134                 fw_name = FIRMWARE_VEGA12;
135                 break;
136         case CHIP_VEGA20:
137                 fw_name = FIRMWARE_VEGA20;
138                 break;
139
140         default:
141                 return -EINVAL;
142         }
143
144         r = reject_firmware(&adev->vce.fw, fw_name, adev->dev);
145         if (r) {
146                 dev_err(adev->dev, "amdgpu_vce: Can't load firmware \"%s\"\n",
147                         fw_name);
148                 return r;
149         }
150
151         r = amdgpu_ucode_validate(adev->vce.fw);
152         if (r) {
153                 dev_err(adev->dev, "amdgpu_vce: Can't validate firmware \"%s\"\n",
154                         fw_name);
155                 release_firmware(adev->vce.fw);
156                 adev->vce.fw = NULL;
157                 return r;
158         }
159
160         hdr = (const struct common_firmware_header *)adev->vce.fw->data;
161
162         ucode_version = le32_to_cpu(hdr->ucode_version);
163         version_major = (ucode_version >> 20) & 0xfff;
164         version_minor = (ucode_version >> 8) & 0xfff;
165         binary_id = ucode_version & 0xff;
166         DRM_INFO("Found VCE firmware Version: %hhd.%hhd Binary ID: %hhd\n",
167                 version_major, version_minor, binary_id);
168         adev->vce.fw_version = ((version_major << 24) | (version_minor << 16) |
169                                 (binary_id << 8));
170
171         r = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE,
172                                     AMDGPU_GEM_DOMAIN_VRAM, &adev->vce.vcpu_bo,
173                                     &adev->vce.gpu_addr, &adev->vce.cpu_addr);
174         if (r) {
175                 dev_err(adev->dev, "(%d) failed to allocate VCE bo\n", r);
176                 return r;
177         }
178
179         for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
180                 atomic_set(&adev->vce.handles[i], 0);
181                 adev->vce.filp[i] = NULL;
182         }
183
184         INIT_DELAYED_WORK(&adev->vce.idle_work, amdgpu_vce_idle_work_handler);
185         mutex_init(&adev->vce.idle_mutex);
186
187         return 0;
188 }
189
190 /**
191  * amdgpu_vce_fini - free memory
192  *
193  * @adev: amdgpu_device pointer
194  *
195  * Last step on VCE teardown, free firmware memory
196  */
197 int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
198 {
199         unsigned i;
200
201         if (adev->vce.vcpu_bo == NULL)
202                 return 0;
203
204         cancel_delayed_work_sync(&adev->vce.idle_work);
205         drm_sched_entity_destroy(&adev->vce.entity);
206
207         amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr,
208                 (void **)&adev->vce.cpu_addr);
209
210         for (i = 0; i < adev->vce.num_rings; i++)
211                 amdgpu_ring_fini(&adev->vce.ring[i]);
212
213         release_firmware(adev->vce.fw);
214         mutex_destroy(&adev->vce.idle_mutex);
215
216         return 0;
217 }
218
219 /**
220  * amdgpu_vce_entity_init - init entity
221  *
222  * @adev: amdgpu_device pointer
223  *
224  */
225 int amdgpu_vce_entity_init(struct amdgpu_device *adev)
226 {
227         struct amdgpu_ring *ring;
228         struct drm_gpu_scheduler *sched;
229         int r;
230
231         ring = &adev->vce.ring[0];
232         sched = &ring->sched;
233         r = drm_sched_entity_init(&adev->vce.entity, DRM_SCHED_PRIORITY_NORMAL,
234                                   &sched, 1, NULL);
235         if (r != 0) {
236                 DRM_ERROR("Failed setting up VCE run queue.\n");
237                 return r;
238         }
239
240         return 0;
241 }
242
243 /**
244  * amdgpu_vce_suspend - unpin VCE fw memory
245  *
246  * @adev: amdgpu_device pointer
247  *
248  */
249 int amdgpu_vce_suspend(struct amdgpu_device *adev)
250 {
251         int i;
252
253         cancel_delayed_work_sync(&adev->vce.idle_work);
254
255         if (adev->vce.vcpu_bo == NULL)
256                 return 0;
257
258         for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i)
259                 if (atomic_read(&adev->vce.handles[i]))
260                         break;
261
262         if (i == AMDGPU_MAX_VCE_HANDLES)
263                 return 0;
264
265         /* TODO: suspending running encoding sessions isn't supported */
266         return -EINVAL;
267 }
268
269 /**
270  * amdgpu_vce_resume - pin VCE fw memory
271  *
272  * @adev: amdgpu_device pointer
273  *
274  */
275 int amdgpu_vce_resume(struct amdgpu_device *adev)
276 {
277         void *cpu_addr;
278         const struct common_firmware_header *hdr;
279         unsigned offset;
280         int r;
281
282         if (adev->vce.vcpu_bo == NULL)
283                 return -EINVAL;
284
285         r = amdgpu_bo_reserve(adev->vce.vcpu_bo, false);
286         if (r) {
287                 dev_err(adev->dev, "(%d) failed to reserve VCE bo\n", r);
288                 return r;
289         }
290
291         r = amdgpu_bo_kmap(adev->vce.vcpu_bo, &cpu_addr);
292         if (r) {
293                 amdgpu_bo_unreserve(adev->vce.vcpu_bo);
294                 dev_err(adev->dev, "(%d) VCE map failed\n", r);
295                 return r;
296         }
297
298         hdr = (const struct common_firmware_header *)adev->vce.fw->data;
299         offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
300         memcpy_toio(cpu_addr, adev->vce.fw->data + offset,
301                     adev->vce.fw->size - offset);
302
303         amdgpu_bo_kunmap(adev->vce.vcpu_bo);
304
305         amdgpu_bo_unreserve(adev->vce.vcpu_bo);
306
307         return 0;
308 }
309
310 /**
311  * amdgpu_vce_idle_work_handler - power off VCE
312  *
313  * @work: pointer to work structure
314  *
315  * power of VCE when it's not used any more
316  */
317 static void amdgpu_vce_idle_work_handler(struct work_struct *work)
318 {
319         struct amdgpu_device *adev =
320                 container_of(work, struct amdgpu_device, vce.idle_work.work);
321         unsigned i, count = 0;
322
323         for (i = 0; i < adev->vce.num_rings; i++)
324                 count += amdgpu_fence_count_emitted(&adev->vce.ring[i]);
325
326         if (count == 0) {
327                 if (adev->pm.dpm_enabled) {
328                         amdgpu_dpm_enable_vce(adev, false);
329                 } else {
330                         amdgpu_asic_set_vce_clocks(adev, 0, 0);
331                         amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
332                                                                AMD_PG_STATE_GATE);
333                         amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
334                                                                AMD_CG_STATE_GATE);
335                 }
336         } else {
337                 schedule_delayed_work(&adev->vce.idle_work, VCE_IDLE_TIMEOUT);
338         }
339 }
340
341 /**
342  * amdgpu_vce_ring_begin_use - power up VCE
343  *
344  * @ring: amdgpu ring
345  *
346  * Make sure VCE is powerd up when we want to use it
347  */
348 void amdgpu_vce_ring_begin_use(struct amdgpu_ring *ring)
349 {
350         struct amdgpu_device *adev = ring->adev;
351         bool set_clocks;
352
353         if (amdgpu_sriov_vf(adev))
354                 return;
355
356         mutex_lock(&adev->vce.idle_mutex);
357         set_clocks = !cancel_delayed_work_sync(&adev->vce.idle_work);
358         if (set_clocks) {
359                 if (adev->pm.dpm_enabled) {
360                         amdgpu_dpm_enable_vce(adev, true);
361                 } else {
362                         amdgpu_asic_set_vce_clocks(adev, 53300, 40000);
363                         amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
364                                                                AMD_CG_STATE_UNGATE);
365                         amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
366                                                                AMD_PG_STATE_UNGATE);
367
368                 }
369         }
370         mutex_unlock(&adev->vce.idle_mutex);
371 }
372
373 /**
374  * amdgpu_vce_ring_end_use - power VCE down
375  *
376  * @ring: amdgpu ring
377  *
378  * Schedule work to power VCE down again
379  */
380 void amdgpu_vce_ring_end_use(struct amdgpu_ring *ring)
381 {
382         if (!amdgpu_sriov_vf(ring->adev))
383                 schedule_delayed_work(&ring->adev->vce.idle_work, VCE_IDLE_TIMEOUT);
384 }
385
386 /**
387  * amdgpu_vce_free_handles - free still open VCE handles
388  *
389  * @adev: amdgpu_device pointer
390  * @filp: drm file pointer
391  *
392  * Close all VCE handles still open by this file pointer
393  */
394 void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
395 {
396         struct amdgpu_ring *ring = &adev->vce.ring[0];
397         int i, r;
398         for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
399                 uint32_t handle = atomic_read(&adev->vce.handles[i]);
400
401                 if (!handle || adev->vce.filp[i] != filp)
402                         continue;
403
404                 r = amdgpu_vce_get_destroy_msg(ring, handle, false, NULL);
405                 if (r)
406                         DRM_ERROR("Error destroying VCE handle (%d)!\n", r);
407
408                 adev->vce.filp[i] = NULL;
409                 atomic_set(&adev->vce.handles[i], 0);
410         }
411 }
412
413 /**
414  * amdgpu_vce_get_create_msg - generate a VCE create msg
415  *
416  * @adev: amdgpu_device pointer
417  * @ring: ring we should submit the msg to
418  * @handle: VCE session handle to use
419  * @fence: optional fence to return
420  *
421  * Open up a stream for HW test
422  */
423 static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
424                                      struct amdgpu_bo *bo,
425                                      struct dma_fence **fence)
426 {
427         const unsigned ib_size_dw = 1024;
428         struct amdgpu_job *job;
429         struct amdgpu_ib *ib;
430         struct dma_fence *f = NULL;
431         uint64_t addr;
432         int i, r;
433
434         r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
435         if (r)
436                 return r;
437
438         ib = &job->ibs[0];
439
440         addr = amdgpu_bo_gpu_offset(bo);
441
442         /* stitch together an VCE create msg */
443         ib->length_dw = 0;
444         ib->ptr[ib->length_dw++] = 0x0000000c; /* len */
445         ib->ptr[ib->length_dw++] = 0x00000001; /* session cmd */
446         ib->ptr[ib->length_dw++] = handle;
447
448         if ((ring->adev->vce.fw_version >> 24) >= 52)
449                 ib->ptr[ib->length_dw++] = 0x00000040; /* len */
450         else
451                 ib->ptr[ib->length_dw++] = 0x00000030; /* len */
452         ib->ptr[ib->length_dw++] = 0x01000001; /* create cmd */
453         ib->ptr[ib->length_dw++] = 0x00000000;
454         ib->ptr[ib->length_dw++] = 0x00000042;
455         ib->ptr[ib->length_dw++] = 0x0000000a;
456         ib->ptr[ib->length_dw++] = 0x00000001;
457         ib->ptr[ib->length_dw++] = 0x00000080;
458         ib->ptr[ib->length_dw++] = 0x00000060;
459         ib->ptr[ib->length_dw++] = 0x00000100;
460         ib->ptr[ib->length_dw++] = 0x00000100;
461         ib->ptr[ib->length_dw++] = 0x0000000c;
462         ib->ptr[ib->length_dw++] = 0x00000000;
463         if ((ring->adev->vce.fw_version >> 24) >= 52) {
464                 ib->ptr[ib->length_dw++] = 0x00000000;
465                 ib->ptr[ib->length_dw++] = 0x00000000;
466                 ib->ptr[ib->length_dw++] = 0x00000000;
467                 ib->ptr[ib->length_dw++] = 0x00000000;
468         }
469
470         ib->ptr[ib->length_dw++] = 0x00000014; /* len */
471         ib->ptr[ib->length_dw++] = 0x05000005; /* feedback buffer */
472         ib->ptr[ib->length_dw++] = upper_32_bits(addr);
473         ib->ptr[ib->length_dw++] = addr;
474         ib->ptr[ib->length_dw++] = 0x00000001;
475
476         for (i = ib->length_dw; i < ib_size_dw; ++i)
477                 ib->ptr[i] = 0x0;
478
479         r = amdgpu_job_submit_direct(job, ring, &f);
480         if (r)
481                 goto err;
482
483         if (fence)
484                 *fence = dma_fence_get(f);
485         dma_fence_put(f);
486         return 0;
487
488 err:
489         amdgpu_job_free(job);
490         return r;
491 }
492
493 /**
494  * amdgpu_vce_get_destroy_msg - generate a VCE destroy msg
495  *
496  * @adev: amdgpu_device pointer
497  * @ring: ring we should submit the msg to
498  * @handle: VCE session handle to use
499  * @fence: optional fence to return
500  *
501  * Close up a stream for HW test or if userspace failed to do so
502  */
503 static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
504                                       bool direct, struct dma_fence **fence)
505 {
506         const unsigned ib_size_dw = 1024;
507         struct amdgpu_job *job;
508         struct amdgpu_ib *ib;
509         struct dma_fence *f = NULL;
510         int i, r;
511
512         r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
513         if (r)
514                 return r;
515
516         ib = &job->ibs[0];
517
518         /* stitch together an VCE destroy msg */
519         ib->length_dw = 0;
520         ib->ptr[ib->length_dw++] = 0x0000000c; /* len */
521         ib->ptr[ib->length_dw++] = 0x00000001; /* session cmd */
522         ib->ptr[ib->length_dw++] = handle;
523
524         ib->ptr[ib->length_dw++] = 0x00000020; /* len */
525         ib->ptr[ib->length_dw++] = 0x00000002; /* task info */
526         ib->ptr[ib->length_dw++] = 0xffffffff; /* next task info, set to 0xffffffff if no */
527         ib->ptr[ib->length_dw++] = 0x00000001; /* destroy session */
528         ib->ptr[ib->length_dw++] = 0x00000000;
529         ib->ptr[ib->length_dw++] = 0x00000000;
530         ib->ptr[ib->length_dw++] = 0xffffffff; /* feedback is not needed, set to 0xffffffff and firmware will not output feedback */
531         ib->ptr[ib->length_dw++] = 0x00000000;
532
533         ib->ptr[ib->length_dw++] = 0x00000008; /* len */
534         ib->ptr[ib->length_dw++] = 0x02000001; /* destroy cmd */
535
536         for (i = ib->length_dw; i < ib_size_dw; ++i)
537                 ib->ptr[i] = 0x0;
538
539         if (direct)
540                 r = amdgpu_job_submit_direct(job, ring, &f);
541         else
542                 r = amdgpu_job_submit(job, &ring->adev->vce.entity,
543                                       AMDGPU_FENCE_OWNER_UNDEFINED, &f);
544         if (r)
545                 goto err;
546
547         if (fence)
548                 *fence = dma_fence_get(f);
549         dma_fence_put(f);
550         return 0;
551
552 err:
553         amdgpu_job_free(job);
554         return r;
555 }
556
557 /**
558  * amdgpu_vce_cs_validate_bo - make sure not to cross 4GB boundary
559  *
560  * @p: parser context
561  * @lo: address of lower dword
562  * @hi: address of higher dword
563  * @size: minimum size
564  * @index: bs/fb index
565  *
566  * Make sure that no BO cross a 4GB boundary.
567  */
568 static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p, uint32_t ib_idx,
569                                   int lo, int hi, unsigned size, int32_t index)
570 {
571         int64_t offset = ((uint64_t)size) * ((int64_t)index);
572         struct ttm_operation_ctx ctx = { false, false };
573         struct amdgpu_bo_va_mapping *mapping;
574         unsigned i, fpfn, lpfn;
575         struct amdgpu_bo *bo;
576         uint64_t addr;
577         int r;
578
579         addr = ((uint64_t)amdgpu_get_ib_value(p, ib_idx, lo)) |
580                ((uint64_t)amdgpu_get_ib_value(p, ib_idx, hi)) << 32;
581         if (index >= 0) {
582                 addr += offset;
583                 fpfn = PAGE_ALIGN(offset) >> PAGE_SHIFT;
584                 lpfn = 0x100000000ULL >> PAGE_SHIFT;
585         } else {
586                 fpfn = 0;
587                 lpfn = (0x100000000ULL - PAGE_ALIGN(offset)) >> PAGE_SHIFT;
588         }
589
590         r = amdgpu_cs_find_mapping(p, addr, &bo, &mapping);
591         if (r) {
592                 DRM_ERROR("Can't find BO for addr 0x%010Lx %d %d %d %d\n",
593                           addr, lo, hi, size, index);
594                 return r;
595         }
596
597         for (i = 0; i < bo->placement.num_placement; ++i) {
598                 bo->placements[i].fpfn = max(bo->placements[i].fpfn, fpfn);
599                 bo->placements[i].lpfn = bo->placements[i].lpfn ?
600                         min(bo->placements[i].lpfn, lpfn) : lpfn;
601         }
602         return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
603 }
604
605
606 /**
607  * amdgpu_vce_cs_reloc - command submission relocation
608  *
609  * @p: parser context
610  * @lo: address of lower dword
611  * @hi: address of higher dword
612  * @size: minimum size
613  *
614  * Patch relocation inside command stream with real buffer address
615  */
616 static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx,
617                                int lo, int hi, unsigned size, uint32_t index)
618 {
619         struct amdgpu_bo_va_mapping *mapping;
620         struct amdgpu_bo *bo;
621         uint64_t addr;
622         int r;
623
624         if (index == 0xffffffff)
625                 index = 0;
626
627         addr = ((uint64_t)amdgpu_get_ib_value(p, ib_idx, lo)) |
628                ((uint64_t)amdgpu_get_ib_value(p, ib_idx, hi)) << 32;
629         addr += ((uint64_t)size) * ((uint64_t)index);
630
631         r = amdgpu_cs_find_mapping(p, addr, &bo, &mapping);
632         if (r) {
633                 DRM_ERROR("Can't find BO for addr 0x%010Lx %d %d %d %d\n",
634                           addr, lo, hi, size, index);
635                 return r;
636         }
637
638         if ((addr + (uint64_t)size) >
639             (mapping->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
640                 DRM_ERROR("BO too small for addr 0x%010Lx %d %d\n",
641                           addr, lo, hi);
642                 return -EINVAL;
643         }
644
645         addr -= mapping->start * AMDGPU_GPU_PAGE_SIZE;
646         addr += amdgpu_bo_gpu_offset(bo);
647         addr -= ((uint64_t)size) * ((uint64_t)index);
648
649         amdgpu_set_ib_value(p, ib_idx, lo, lower_32_bits(addr));
650         amdgpu_set_ib_value(p, ib_idx, hi, upper_32_bits(addr));
651
652         return 0;
653 }
654
655 /**
656  * amdgpu_vce_validate_handle - validate stream handle
657  *
658  * @p: parser context
659  * @handle: handle to validate
660  * @allocated: allocated a new handle?
661  *
662  * Validates the handle and return the found session index or -EINVAL
663  * we we don't have another free session index.
664  */
665 static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p,
666                                       uint32_t handle, uint32_t *allocated)
667 {
668         unsigned i;
669
670         /* validate the handle */
671         for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
672                 if (atomic_read(&p->adev->vce.handles[i]) == handle) {
673                         if (p->adev->vce.filp[i] != p->filp) {
674                                 DRM_ERROR("VCE handle collision detected!\n");
675                                 return -EINVAL;
676                         }
677                         return i;
678                 }
679         }
680
681         /* handle not found try to alloc a new one */
682         for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
683                 if (!atomic_cmpxchg(&p->adev->vce.handles[i], 0, handle)) {
684                         p->adev->vce.filp[i] = p->filp;
685                         p->adev->vce.img_size[i] = 0;
686                         *allocated |= 1 << i;
687                         return i;
688                 }
689         }
690
691         DRM_ERROR("No more free VCE handles!\n");
692         return -EINVAL;
693 }
694
695 /**
696  * amdgpu_vce_cs_parse - parse and validate the command stream
697  *
698  * @p: parser context
699  *
700  */
701 int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
702 {
703         struct amdgpu_ib *ib = &p->job->ibs[ib_idx];
704         unsigned fb_idx = 0, bs_idx = 0;
705         int session_idx = -1;
706         uint32_t destroyed = 0;
707         uint32_t created = 0;
708         uint32_t allocated = 0;
709         uint32_t tmp, handle = 0;
710         uint32_t *size = &tmp;
711         unsigned idx;
712         int i, r = 0;
713
714         p->job->vm = NULL;
715         ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
716
717         for (idx = 0; idx < ib->length_dw;) {
718                 uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
719                 uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1);
720
721                 if ((len < 8) || (len & 3)) {
722                         DRM_ERROR("invalid VCE command length (%d)!\n", len);
723                         r = -EINVAL;
724                         goto out;
725                 }
726
727                 switch (cmd) {
728                 case 0x00000002: /* task info */
729                         fb_idx = amdgpu_get_ib_value(p, ib_idx, idx + 6);
730                         bs_idx = amdgpu_get_ib_value(p, ib_idx, idx + 7);
731                         break;
732
733                 case 0x03000001: /* encode */
734                         r = amdgpu_vce_validate_bo(p, ib_idx, idx + 10,
735                                                    idx + 9, 0, 0);
736                         if (r)
737                                 goto out;
738
739                         r = amdgpu_vce_validate_bo(p, ib_idx, idx + 12,
740                                                    idx + 11, 0, 0);
741                         if (r)
742                                 goto out;
743                         break;
744
745                 case 0x05000001: /* context buffer */
746                         r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3,
747                                                    idx + 2, 0, 0);
748                         if (r)
749                                 goto out;
750                         break;
751
752                 case 0x05000004: /* video bitstream buffer */
753                         tmp = amdgpu_get_ib_value(p, ib_idx, idx + 4);
754                         r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3, idx + 2,
755                                                    tmp, bs_idx);
756                         if (r)
757                                 goto out;
758                         break;
759
760                 case 0x05000005: /* feedback buffer */
761                         r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3, idx + 2,
762                                                    4096, fb_idx);
763                         if (r)
764                                 goto out;
765                         break;
766
767                 case 0x0500000d: /* MV buffer */
768                         r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3,
769                                                         idx + 2, 0, 0);
770                         if (r)
771                                 goto out;
772
773                         r = amdgpu_vce_validate_bo(p, ib_idx, idx + 8,
774                                                         idx + 7, 0, 0);
775                         if (r)
776                                 goto out;
777                         break;
778                 }
779
780                 idx += len / 4;
781         }
782
783         for (idx = 0; idx < ib->length_dw;) {
784                 uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
785                 uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1);
786
787                 switch (cmd) {
788                 case 0x00000001: /* session */
789                         handle = amdgpu_get_ib_value(p, ib_idx, idx + 2);
790                         session_idx = amdgpu_vce_validate_handle(p, handle,
791                                                                  &allocated);
792                         if (session_idx < 0) {
793                                 r = session_idx;
794                                 goto out;
795                         }
796                         size = &p->adev->vce.img_size[session_idx];
797                         break;
798
799                 case 0x00000002: /* task info */
800                         fb_idx = amdgpu_get_ib_value(p, ib_idx, idx + 6);
801                         bs_idx = amdgpu_get_ib_value(p, ib_idx, idx + 7);
802                         break;
803
804                 case 0x01000001: /* create */
805                         created |= 1 << session_idx;
806                         if (destroyed & (1 << session_idx)) {
807                                 destroyed &= ~(1 << session_idx);
808                                 allocated |= 1 << session_idx;
809
810                         } else if (!(allocated & (1 << session_idx))) {
811                                 DRM_ERROR("Handle already in use!\n");
812                                 r = -EINVAL;
813                                 goto out;
814                         }
815
816                         *size = amdgpu_get_ib_value(p, ib_idx, idx + 8) *
817                                 amdgpu_get_ib_value(p, ib_idx, idx + 10) *
818                                 8 * 3 / 2;
819                         break;
820
821                 case 0x04000001: /* config extension */
822                 case 0x04000002: /* pic control */
823                 case 0x04000005: /* rate control */
824                 case 0x04000007: /* motion estimation */
825                 case 0x04000008: /* rdo */
826                 case 0x04000009: /* vui */
827                 case 0x05000002: /* auxiliary buffer */
828                 case 0x05000009: /* clock table */
829                         break;
830
831                 case 0x0500000c: /* hw config */
832                         switch (p->adev->asic_type) {
833 #ifdef CONFIG_DRM_AMDGPU_CIK
834                         case CHIP_KAVERI:
835                         case CHIP_MULLINS:
836 #endif
837                         case CHIP_CARRIZO:
838                                 break;
839                         default:
840                                 r = -EINVAL;
841                                 goto out;
842                         }
843                         break;
844
845                 case 0x03000001: /* encode */
846                         r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 10, idx + 9,
847                                                 *size, 0);
848                         if (r)
849                                 goto out;
850
851                         r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 12, idx + 11,
852                                                 *size / 3, 0);
853                         if (r)
854                                 goto out;
855                         break;
856
857                 case 0x02000001: /* destroy */
858                         destroyed |= 1 << session_idx;
859                         break;
860
861                 case 0x05000001: /* context buffer */
862                         r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2,
863                                                 *size * 2, 0);
864                         if (r)
865                                 goto out;
866                         break;
867
868                 case 0x05000004: /* video bitstream buffer */
869                         tmp = amdgpu_get_ib_value(p, ib_idx, idx + 4);
870                         r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2,
871                                                 tmp, bs_idx);
872                         if (r)
873                                 goto out;
874                         break;
875
876                 case 0x05000005: /* feedback buffer */
877                         r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2,
878                                                 4096, fb_idx);
879                         if (r)
880                                 goto out;
881                         break;
882
883                 case 0x0500000d: /* MV buffer */
884                         r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3,
885                                                         idx + 2, *size, 0);
886                         if (r)
887                                 goto out;
888
889                         r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 8,
890                                                         idx + 7, *size / 12, 0);
891                         if (r)
892                                 goto out;
893                         break;
894
895                 default:
896                         DRM_ERROR("invalid VCE command (0x%x)!\n", cmd);
897                         r = -EINVAL;
898                         goto out;
899                 }
900
901                 if (session_idx == -1) {
902                         DRM_ERROR("no session command at start of IB\n");
903                         r = -EINVAL;
904                         goto out;
905                 }
906
907                 idx += len / 4;
908         }
909
910         if (allocated & ~created) {
911                 DRM_ERROR("New session without create command!\n");
912                 r = -ENOENT;
913         }
914
915 out:
916         if (!r) {
917                 /* No error, free all destroyed handle slots */
918                 tmp = destroyed;
919         } else {
920                 /* Error during parsing, free all allocated handle slots */
921                 tmp = allocated;
922         }
923
924         for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i)
925                 if (tmp & (1 << i))
926                         atomic_set(&p->adev->vce.handles[i], 0);
927
928         return r;
929 }
930
931 /**
932  * amdgpu_vce_cs_parse_vm - parse the command stream in VM mode
933  *
934  * @p: parser context
935  *
936  */
937 int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx)
938 {
939         struct amdgpu_ib *ib = &p->job->ibs[ib_idx];
940         int session_idx = -1;
941         uint32_t destroyed = 0;
942         uint32_t created = 0;
943         uint32_t allocated = 0;
944         uint32_t tmp, handle = 0;
945         int i, r = 0, idx = 0;
946
947         while (idx < ib->length_dw) {
948                 uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
949                 uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1);
950
951                 if ((len < 8) || (len & 3)) {
952                         DRM_ERROR("invalid VCE command length (%d)!\n", len);
953                         r = -EINVAL;
954                         goto out;
955                 }
956
957                 switch (cmd) {
958                 case 0x00000001: /* session */
959                         handle = amdgpu_get_ib_value(p, ib_idx, idx + 2);
960                         session_idx = amdgpu_vce_validate_handle(p, handle,
961                                                                  &allocated);
962                         if (session_idx < 0) {
963                                 r = session_idx;
964                                 goto out;
965                         }
966                         break;
967
968                 case 0x01000001: /* create */
969                         created |= 1 << session_idx;
970                         if (destroyed & (1 << session_idx)) {
971                                 destroyed &= ~(1 << session_idx);
972                                 allocated |= 1 << session_idx;
973
974                         } else if (!(allocated & (1 << session_idx))) {
975                                 DRM_ERROR("Handle already in use!\n");
976                                 r = -EINVAL;
977                                 goto out;
978                         }
979
980                         break;
981
982                 case 0x02000001: /* destroy */
983                         destroyed |= 1 << session_idx;
984                         break;
985
986                 default:
987                         break;
988                 }
989
990                 if (session_idx == -1) {
991                         DRM_ERROR("no session command at start of IB\n");
992                         r = -EINVAL;
993                         goto out;
994                 }
995
996                 idx += len / 4;
997         }
998
999         if (allocated & ~created) {
1000                 DRM_ERROR("New session without create command!\n");
1001                 r = -ENOENT;
1002         }
1003
1004 out:
1005         if (!r) {
1006                 /* No error, free all destroyed handle slots */
1007                 tmp = destroyed;
1008                 amdgpu_ib_free(p->adev, ib, NULL);
1009         } else {
1010                 /* Error during parsing, free all allocated handle slots */
1011                 tmp = allocated;
1012         }
1013
1014         for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i)
1015                 if (tmp & (1 << i))
1016                         atomic_set(&p->adev->vce.handles[i], 0);
1017
1018         return r;
1019 }
1020
1021 /**
1022  * amdgpu_vce_ring_emit_ib - execute indirect buffer
1023  *
1024  * @ring: engine to use
1025  * @ib: the IB to execute
1026  *
1027  */
1028 void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring,
1029                                 struct amdgpu_job *job,
1030                                 struct amdgpu_ib *ib,
1031                                 uint32_t flags)
1032 {
1033         amdgpu_ring_write(ring, VCE_CMD_IB);
1034         amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
1035         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
1036         amdgpu_ring_write(ring, ib->length_dw);
1037 }
1038
1039 /**
1040  * amdgpu_vce_ring_emit_fence - add a fence command to the ring
1041  *
1042  * @ring: engine to use
1043  * @fence: the fence
1044  *
1045  */
1046 void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
1047                                 unsigned flags)
1048 {
1049         WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
1050
1051         amdgpu_ring_write(ring, VCE_CMD_FENCE);
1052         amdgpu_ring_write(ring, addr);
1053         amdgpu_ring_write(ring, upper_32_bits(addr));
1054         amdgpu_ring_write(ring, seq);
1055         amdgpu_ring_write(ring, VCE_CMD_TRAP);
1056         amdgpu_ring_write(ring, VCE_CMD_END);
1057 }
1058
1059 /**
1060  * amdgpu_vce_ring_test_ring - test if VCE ring is working
1061  *
1062  * @ring: the engine to test on
1063  *
1064  */
1065 int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
1066 {
1067         struct amdgpu_device *adev = ring->adev;
1068         uint32_t rptr;
1069         unsigned i;
1070         int r, timeout = adev->usec_timeout;
1071
1072         /* skip ring test for sriov*/
1073         if (amdgpu_sriov_vf(adev))
1074                 return 0;
1075
1076         r = amdgpu_ring_alloc(ring, 16);
1077         if (r)
1078                 return r;
1079
1080         rptr = amdgpu_ring_get_rptr(ring);
1081
1082         amdgpu_ring_write(ring, VCE_CMD_END);
1083         amdgpu_ring_commit(ring);
1084
1085         for (i = 0; i < timeout; i++) {
1086                 if (amdgpu_ring_get_rptr(ring) != rptr)
1087                         break;
1088                 udelay(1);
1089         }
1090
1091         if (i >= timeout)
1092                 r = -ETIMEDOUT;
1093
1094         return r;
1095 }
1096
1097 /**
1098  * amdgpu_vce_ring_test_ib - test if VCE IBs are working
1099  *
1100  * @ring: the engine to test on
1101  *
1102  */
1103 int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1104 {
1105         struct dma_fence *fence = NULL;
1106         struct amdgpu_bo *bo = NULL;
1107         long r;
1108
1109         /* skip vce ring1/2 ib test for now, since it's not reliable */
1110         if (ring != &ring->adev->vce.ring[0])
1111                 return 0;
1112
1113         r = amdgpu_bo_create_reserved(ring->adev, 512, PAGE_SIZE,
1114                                       AMDGPU_GEM_DOMAIN_VRAM,
1115                                       &bo, NULL, NULL);
1116         if (r)
1117                 return r;
1118
1119         r = amdgpu_vce_get_create_msg(ring, 1, bo, NULL);
1120         if (r)
1121                 goto error;
1122
1123         r = amdgpu_vce_get_destroy_msg(ring, 1, true, &fence);
1124         if (r)
1125                 goto error;
1126
1127         r = dma_fence_wait_timeout(fence, false, timeout);
1128         if (r == 0)
1129                 r = -ETIMEDOUT;
1130         else if (r > 0)
1131                 r = 0;
1132
1133 error:
1134         dma_fence_put(fence);
1135         amdgpu_bo_unreserve(bo);
1136         amdgpu_bo_unref(&bo);
1137         return r;
1138 }