bcm27xx: update patches from RPi foundation
[oweals/openwrt.git] / target / linux / bcm27xx / patches-5.4 / 950-0510-staging-media-Add-Raspberry-Pi-V4L2-H265-decoder.patch
1 From 82bbd353e2dc364bf37e6f0b91890cb432b1a72f Mon Sep 17 00:00:00 2001
2 From: John Cox <jc@kynesim.co.uk>
3 Date: Thu, 5 Mar 2020 18:30:41 +0000
4 Subject: [PATCH] staging: media: Add Raspberry Pi V4L2 H265 decoder
5
6 This driver is for the HEVC/H265 decoder block on the Raspberry
7 Pi 4, and conforms to the V4L2 stateless decoder API.
8
9 Signed-off-by: John Cox <jc@kynesim.co.uk>
10 ---
11  drivers/staging/media/Kconfig               |    2 +
12  drivers/staging/media/Makefile              |    1 +
13  drivers/staging/media/rpivid/Kconfig        |   16 +
14  drivers/staging/media/rpivid/Makefile       |    5 +
15  drivers/staging/media/rpivid/rpivid.c       |  432 ++++
16  drivers/staging/media/rpivid/rpivid.h       |  181 ++
17  drivers/staging/media/rpivid/rpivid_dec.c   |   79 +
18  drivers/staging/media/rpivid/rpivid_dec.h   |   19 +
19  drivers/staging/media/rpivid/rpivid_h265.c  | 2275 +++++++++++++++++++
20  drivers/staging/media/rpivid/rpivid_hw.c    |  321 +++
21  drivers/staging/media/rpivid/rpivid_hw.h    |  300 +++
22  drivers/staging/media/rpivid/rpivid_video.c |  593 +++++
23  drivers/staging/media/rpivid/rpivid_video.h |   30 +
24  14 files changed, 4256 insertions(+)
25  create mode 100644 drivers/staging/media/rpivid/Kconfig
26  create mode 100644 drivers/staging/media/rpivid/Makefile
27  create mode 100644 drivers/staging/media/rpivid/rpivid.c
28  create mode 100644 drivers/staging/media/rpivid/rpivid.h
29  create mode 100644 drivers/staging/media/rpivid/rpivid_dec.c
30  create mode 100644 drivers/staging/media/rpivid/rpivid_dec.h
31  create mode 100644 drivers/staging/media/rpivid/rpivid_h265.c
32  create mode 100644 drivers/staging/media/rpivid/rpivid_hw.c
33  create mode 100644 drivers/staging/media/rpivid/rpivid_hw.h
34  create mode 100644 drivers/staging/media/rpivid/rpivid_video.c
35  create mode 100644 drivers/staging/media/rpivid/rpivid_video.h
36
37 --- a/drivers/staging/media/Kconfig
38 +++ b/drivers/staging/media/Kconfig
39 @@ -30,6 +30,8 @@ source "drivers/staging/media/meson/vdec
40  
41  source "drivers/staging/media/omap4iss/Kconfig"
42  
43 +source "drivers/staging/media/rpivid/Kconfig"
44 +
45  source "drivers/staging/media/sunxi/Kconfig"
46  
47  source "drivers/staging/media/tegra-vde/Kconfig"
48 --- a/drivers/staging/media/Makefile
49 +++ b/drivers/staging/media/Makefile
50 @@ -3,6 +3,7 @@ obj-$(CONFIG_VIDEO_ALLEGRO_DVT) += alleg
51  obj-$(CONFIG_VIDEO_IMX_MEDIA)  += imx/
52  obj-$(CONFIG_VIDEO_MESON_VDEC) += meson/vdec/
53  obj-$(CONFIG_VIDEO_OMAP4)      += omap4iss/
54 +obj-$(CONFIG_VIDEO_RPIVID)     += rpivid/
55  obj-$(CONFIG_VIDEO_SUNXI)      += sunxi/
56  obj-$(CONFIG_TEGRA_VDE)                += tegra-vde/
57  obj-$(CONFIG_VIDEO_HANTRO)     += hantro/
58 --- /dev/null
59 +++ b/drivers/staging/media/rpivid/Kconfig
60 @@ -0,0 +1,16 @@
61 +# SPDX-License-Identifier: GPL-2.0
62 +
63 +config VIDEO_RPIVID
64 +       tristate "Rpi H265 driver"
65 +       depends on VIDEO_DEV && VIDEO_V4L2
66 +       depends on MEDIA_CONTROLLER
67 +       depends on OF
68 +       depends on MEDIA_CONTROLLER_REQUEST_API
69 +       select VIDEOBUF2_DMA_CONTIG
70 +       select V4L2_MEM2MEM_DEV
71 +       help
72 +         Support for the Rpi H265 h/w decoder.
73 +
74 +         To compile this driver as a module, choose M here: the module
75 +         will be called rpivid-hevc.
76 +
77 --- /dev/null
78 +++ b/drivers/staging/media/rpivid/Makefile
79 @@ -0,0 +1,5 @@
80 +# SPDX-License-Identifier: GPL-2.0
81 +obj-$(CONFIG_VIDEO_RPIVID) += rpivid-hevc.o
82 +
83 +rpivid-hevc-y = rpivid.o rpivid_video.o rpivid_dec.o \
84 +                rpivid_hw.o rpivid_h265.o
85 --- /dev/null
86 +++ b/drivers/staging/media/rpivid/rpivid.c
87 @@ -0,0 +1,432 @@
88 +// SPDX-License-Identifier: GPL-2.0
89 +/*
90 + * Raspberry Pi HEVC driver
91 + *
92 + * Copyright (C) 2020 Raspberry Pi (Trading) Ltd
93 + *
94 + * Based on the Cedrus VPU driver, that is:
95 + *
96 + * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
97 + * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
98 + * Copyright (C) 2018 Bootlin
99 + */
100 +
101 +#include <linux/platform_device.h>
102 +#include <linux/module.h>
103 +#include <linux/of.h>
104 +
105 +#include <media/v4l2-device.h>
106 +#include <media/v4l2-ioctl.h>
107 +#include <media/v4l2-ctrls.h>
108 +#include <media/v4l2-mem2mem.h>
109 +
110 +#include "rpivid.h"
111 +#include "rpivid_video.h"
112 +#include "rpivid_hw.h"
113 +#include "rpivid_dec.h"
114 +
115 +/*
116 + * Default /dev/videoN node number.
117 + * Deliberately avoid the very low numbers as these are often taken by webcams
118 + * etc, and simple apps tend to only go for /dev/video0.
119 + */
120 +static int video_nr = 19;
121 +module_param(video_nr, int, 0644);
122 +MODULE_PARM_DESC(video_nr, "decoder video device number");
123 +
124 +static const struct rpivid_control rpivid_ctrls[] = {
125 +       {
126 +               .cfg = {
127 +                       .id     = V4L2_CID_MPEG_VIDEO_HEVC_SPS,
128 +               },
129 +               .required       = true,
130 +       },
131 +       {
132 +               .cfg = {
133 +                       .id     = V4L2_CID_MPEG_VIDEO_HEVC_PPS,
134 +               },
135 +               .required       = true,
136 +       },
137 +       {
138 +               .cfg = {
139 +                       .id = V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX,
140 +               },
141 +               .required       = false,
142 +       },
143 +       {
144 +               .cfg = {
145 +                       .id     = V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS,
146 +               },
147 +               .required       = true,
148 +       },
149 +       {
150 +               .cfg = {
151 +                       .id     = V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE,
152 +                       .max    = V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED,
153 +                       .def    = V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED,
154 +               },
155 +               .required       = false,
156 +       },
157 +       {
158 +               .cfg = {
159 +                       .id     = V4L2_CID_MPEG_VIDEO_HEVC_START_CODE,
160 +                       .max    = V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE,
161 +                       .def    = V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE,
162 +               },
163 +               .required       = false,
164 +       },
165 +};
166 +
167 +#define rpivid_ctrls_COUNT     ARRAY_SIZE(rpivid_ctrls)
168 +
169 +void *rpivid_find_control_data(struct rpivid_ctx *ctx, u32 id)
170 +{
171 +       unsigned int i;
172 +
173 +       for (i = 0; ctx->ctrls[i]; i++)
174 +               if (ctx->ctrls[i]->id == id)
175 +                       return ctx->ctrls[i]->p_cur.p;
176 +
177 +       return NULL;
178 +}
179 +
180 +static int rpivid_init_ctrls(struct rpivid_dev *dev, struct rpivid_ctx *ctx)
181 +{
182 +       struct v4l2_ctrl_handler *hdl = &ctx->hdl;
183 +       struct v4l2_ctrl *ctrl;
184 +       unsigned int ctrl_size;
185 +       unsigned int i;
186 +
187 +       v4l2_ctrl_handler_init(hdl, rpivid_ctrls_COUNT);
188 +       if (hdl->error) {
189 +               v4l2_err(&dev->v4l2_dev,
190 +                        "Failed to initialize control handler\n");
191 +               return hdl->error;
192 +       }
193 +
194 +       ctrl_size = sizeof(ctrl) * rpivid_ctrls_COUNT + 1;
195 +
196 +       ctx->ctrls = kzalloc(ctrl_size, GFP_KERNEL);
197 +       if (!ctx->ctrls)
198 +               return -ENOMEM;
199 +
200 +       for (i = 0; i < rpivid_ctrls_COUNT; i++) {
201 +               ctrl = v4l2_ctrl_new_custom(hdl, &rpivid_ctrls[i].cfg,
202 +                                           NULL);
203 +               if (hdl->error) {
204 +                       v4l2_err(&dev->v4l2_dev,
205 +                                "Failed to create new custom control id=%#x\n",
206 +                                rpivid_ctrls[i].cfg.id);
207 +
208 +                       v4l2_ctrl_handler_free(hdl);
209 +                       kfree(ctx->ctrls);
210 +                       return hdl->error;
211 +               }
212 +
213 +               ctx->ctrls[i] = ctrl;
214 +       }
215 +
216 +       ctx->fh.ctrl_handler = hdl;
217 +       v4l2_ctrl_handler_setup(hdl);
218 +
219 +       return 0;
220 +}
221 +
222 +static int rpivid_request_validate(struct media_request *req)
223 +{
224 +       struct media_request_object *obj;
225 +       struct v4l2_ctrl_handler *parent_hdl, *hdl;
226 +       struct rpivid_ctx *ctx = NULL;
227 +       struct v4l2_ctrl *ctrl_test;
228 +       unsigned int count;
229 +       unsigned int i;
230 +
231 +       list_for_each_entry(obj, &req->objects, list) {
232 +               struct vb2_buffer *vb;
233 +
234 +               if (vb2_request_object_is_buffer(obj)) {
235 +                       vb = container_of(obj, struct vb2_buffer, req_obj);
236 +                       ctx = vb2_get_drv_priv(vb->vb2_queue);
237 +
238 +                       break;
239 +               }
240 +       }
241 +
242 +       if (!ctx)
243 +               return -ENOENT;
244 +
245 +       count = vb2_request_buffer_cnt(req);
246 +       if (!count) {
247 +               v4l2_info(&ctx->dev->v4l2_dev,
248 +                         "No buffer was provided with the request\n");
249 +               return -ENOENT;
250 +       } else if (count > 1) {
251 +               v4l2_info(&ctx->dev->v4l2_dev,
252 +                         "More than one buffer was provided with the request\n");
253 +               return -EINVAL;
254 +       }
255 +
256 +       parent_hdl = &ctx->hdl;
257 +
258 +       hdl = v4l2_ctrl_request_hdl_find(req, parent_hdl);
259 +       if (!hdl) {
260 +               v4l2_info(&ctx->dev->v4l2_dev, "Missing codec control(s)\n");
261 +               return -ENOENT;
262 +       }
263 +
264 +       for (i = 0; i < rpivid_ctrls_COUNT; i++) {
265 +               if (!rpivid_ctrls[i].required)
266 +                       continue;
267 +
268 +               ctrl_test =
269 +                       v4l2_ctrl_request_hdl_ctrl_find(hdl,
270 +                                                       rpivid_ctrls[i].cfg.id);
271 +               if (!ctrl_test) {
272 +                       v4l2_info(&ctx->dev->v4l2_dev,
273 +                                 "Missing required codec control\n");
274 +                       return -ENOENT;
275 +               }
276 +       }
277 +
278 +       v4l2_ctrl_request_hdl_put(hdl);
279 +
280 +       return vb2_request_validate(req);
281 +}
282 +
283 +static int rpivid_open(struct file *file)
284 +{
285 +       struct rpivid_dev *dev = video_drvdata(file);
286 +       struct rpivid_ctx *ctx = NULL;
287 +       int ret;
288 +
289 +       if (mutex_lock_interruptible(&dev->dev_mutex))
290 +               return -ERESTARTSYS;
291 +
292 +       ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
293 +       if (!ctx) {
294 +               mutex_unlock(&dev->dev_mutex);
295 +               return -ENOMEM;
296 +       }
297 +
298 +       v4l2_fh_init(&ctx->fh, video_devdata(file));
299 +       file->private_data = &ctx->fh;
300 +       ctx->dev = dev;
301 +
302 +       ret = rpivid_init_ctrls(dev, ctx);
303 +       if (ret)
304 +               goto err_free;
305 +
306 +       ctx->fh.m2m_ctx = v4l2_m2m_ctx_init(dev->m2m_dev, ctx,
307 +                                           &rpivid_queue_init);
308 +       if (IS_ERR(ctx->fh.m2m_ctx)) {
309 +               ret = PTR_ERR(ctx->fh.m2m_ctx);
310 +               goto err_ctrls;
311 +       }
312 +
313 +       /* The only bit of format info that we can guess now is H265 src
314 +        * Everything else we need more info for
315 +        */
316 +       ctx->src_fmt.pixelformat = RPIVID_SRC_PIXELFORMAT_DEFAULT;
317 +       rpivid_prepare_src_format(&ctx->src_fmt);
318 +
319 +       v4l2_fh_add(&ctx->fh);
320 +
321 +       mutex_unlock(&dev->dev_mutex);
322 +
323 +       return 0;
324 +
325 +err_ctrls:
326 +       v4l2_ctrl_handler_free(&ctx->hdl);
327 +err_free:
328 +       kfree(ctx);
329 +       mutex_unlock(&dev->dev_mutex);
330 +
331 +       return ret;
332 +}
333 +
334 +static int rpivid_release(struct file *file)
335 +{
336 +       struct rpivid_dev *dev = video_drvdata(file);
337 +       struct rpivid_ctx *ctx = container_of(file->private_data,
338 +                                             struct rpivid_ctx, fh);
339 +
340 +       mutex_lock(&dev->dev_mutex);
341 +
342 +       v4l2_fh_del(&ctx->fh);
343 +       v4l2_m2m_ctx_release(ctx->fh.m2m_ctx);
344 +
345 +       v4l2_ctrl_handler_free(&ctx->hdl);
346 +       kfree(ctx->ctrls);
347 +
348 +       v4l2_fh_exit(&ctx->fh);
349 +
350 +       kfree(ctx);
351 +
352 +       mutex_unlock(&dev->dev_mutex);
353 +
354 +       return 0;
355 +}
356 +
357 +static const struct v4l2_file_operations rpivid_fops = {
358 +       .owner          = THIS_MODULE,
359 +       .open           = rpivid_open,
360 +       .release        = rpivid_release,
361 +       .poll           = v4l2_m2m_fop_poll,
362 +       .unlocked_ioctl = video_ioctl2,
363 +       .mmap           = v4l2_m2m_fop_mmap,
364 +};
365 +
366 +static const struct video_device rpivid_video_device = {
367 +       .name           = RPIVID_NAME,
368 +       .vfl_dir        = VFL_DIR_M2M,
369 +       .fops           = &rpivid_fops,
370 +       .ioctl_ops      = &rpivid_ioctl_ops,
371 +       .minor          = -1,
372 +       .release        = video_device_release_empty,
373 +       .device_caps    = V4L2_CAP_VIDEO_M2M | V4L2_CAP_STREAMING,
374 +};
375 +
376 +static const struct v4l2_m2m_ops rpivid_m2m_ops = {
377 +       .device_run     = rpivid_device_run,
378 +};
379 +
380 +static const struct media_device_ops rpivid_m2m_media_ops = {
381 +       .req_validate   = rpivid_request_validate,
382 +       .req_queue      = v4l2_m2m_request_queue,
383 +};
384 +
385 +static int rpivid_probe(struct platform_device *pdev)
386 +{
387 +       struct rpivid_dev *dev;
388 +       struct video_device *vfd;
389 +       int ret;
390 +
391 +       dev = devm_kzalloc(&pdev->dev, sizeof(*dev), GFP_KERNEL);
392 +       if (!dev)
393 +               return -ENOMEM;
394 +
395 +       dev->vfd = rpivid_video_device;
396 +       dev->dev = &pdev->dev;
397 +       dev->pdev = pdev;
398 +
399 +       ret = 0;
400 +       ret = rpivid_hw_probe(dev);
401 +       if (ret) {
402 +               dev_err(&pdev->dev, "Failed to probe hardware\n");
403 +               return ret;
404 +       }
405 +
406 +       dev->dec_ops = &rpivid_dec_ops_h265;
407 +
408 +       mutex_init(&dev->dev_mutex);
409 +
410 +       ret = v4l2_device_register(&pdev->dev, &dev->v4l2_dev);
411 +       if (ret) {
412 +               dev_err(&pdev->dev, "Failed to register V4L2 device\n");
413 +               return ret;
414 +       }
415 +
416 +       vfd = &dev->vfd;
417 +       vfd->lock = &dev->dev_mutex;
418 +       vfd->v4l2_dev = &dev->v4l2_dev;
419 +
420 +       snprintf(vfd->name, sizeof(vfd->name), "%s", rpivid_video_device.name);
421 +       video_set_drvdata(vfd, dev);
422 +
423 +       dev->m2m_dev = v4l2_m2m_init(&rpivid_m2m_ops);
424 +       if (IS_ERR(dev->m2m_dev)) {
425 +               v4l2_err(&dev->v4l2_dev,
426 +                        "Failed to initialize V4L2 M2M device\n");
427 +               ret = PTR_ERR(dev->m2m_dev);
428 +
429 +               goto err_v4l2;
430 +       }
431 +
432 +       dev->mdev.dev = &pdev->dev;
433 +       strscpy(dev->mdev.model, RPIVID_NAME, sizeof(dev->mdev.model));
434 +       strscpy(dev->mdev.bus_info, "platform:" RPIVID_NAME,
435 +               sizeof(dev->mdev.bus_info));
436 +
437 +       media_device_init(&dev->mdev);
438 +       dev->mdev.ops = &rpivid_m2m_media_ops;
439 +       dev->v4l2_dev.mdev = &dev->mdev;
440 +
441 +       ret = video_register_device(vfd, VFL_TYPE_GRABBER, video_nr);
442 +       if (ret) {
443 +               v4l2_err(&dev->v4l2_dev, "Failed to register video device\n");
444 +               goto err_m2m;
445 +       }
446 +
447 +       v4l2_info(&dev->v4l2_dev,
448 +                 "Device registered as /dev/video%d\n", vfd->num);
449 +
450 +       ret = v4l2_m2m_register_media_controller(dev->m2m_dev, vfd,
451 +                                                MEDIA_ENT_F_PROC_VIDEO_DECODER);
452 +       if (ret) {
453 +               v4l2_err(&dev->v4l2_dev,
454 +                        "Failed to initialize V4L2 M2M media controller\n");
455 +               goto err_video;
456 +       }
457 +
458 +       ret = media_device_register(&dev->mdev);
459 +       if (ret) {
460 +               v4l2_err(&dev->v4l2_dev, "Failed to register media device\n");
461 +               goto err_m2m_mc;
462 +       }
463 +
464 +       platform_set_drvdata(pdev, dev);
465 +
466 +       return 0;
467 +
468 +err_m2m_mc:
469 +       v4l2_m2m_unregister_media_controller(dev->m2m_dev);
470 +err_video:
471 +       video_unregister_device(&dev->vfd);
472 +err_m2m:
473 +       v4l2_m2m_release(dev->m2m_dev);
474 +err_v4l2:
475 +       v4l2_device_unregister(&dev->v4l2_dev);
476 +
477 +       return ret;
478 +}
479 +
480 +static int rpivid_remove(struct platform_device *pdev)
481 +{
482 +       struct rpivid_dev *dev = platform_get_drvdata(pdev);
483 +
484 +       if (media_devnode_is_registered(dev->mdev.devnode)) {
485 +               media_device_unregister(&dev->mdev);
486 +               v4l2_m2m_unregister_media_controller(dev->m2m_dev);
487 +               media_device_cleanup(&dev->mdev);
488 +       }
489 +
490 +       v4l2_m2m_release(dev->m2m_dev);
491 +       video_unregister_device(&dev->vfd);
492 +       v4l2_device_unregister(&dev->v4l2_dev);
493 +
494 +       rpivid_hw_remove(dev);
495 +
496 +       return 0;
497 +}
498 +
499 +static const struct of_device_id rpivid_dt_match[] = {
500 +       {
501 +               .compatible = "raspberrypi,rpivid-vid-decoder",
502 +       },
503 +       { /* sentinel */ }
504 +};
505 +MODULE_DEVICE_TABLE(of, rpivid_dt_match);
506 +
507 +static struct platform_driver rpivid_driver = {
508 +       .probe          = rpivid_probe,
509 +       .remove         = rpivid_remove,
510 +       .driver         = {
511 +               .name = RPIVID_NAME,
512 +               .of_match_table = of_match_ptr(rpivid_dt_match),
513 +       },
514 +};
515 +module_platform_driver(rpivid_driver);
516 +
517 +MODULE_LICENSE("GPL v2");
518 +MODULE_AUTHOR("John Cox <jc@kynesim.co.uk>");
519 +MODULE_DESCRIPTION("Raspberry Pi HEVC V4L2 driver");
520 --- /dev/null
521 +++ b/drivers/staging/media/rpivid/rpivid.h
522 @@ -0,0 +1,181 @@
523 +/* SPDX-License-Identifier: GPL-2.0 */
524 +/*
525 + * Raspberry Pi HEVC driver
526 + *
527 + * Copyright (C) 2020 Raspberry Pi (Trading) Ltd
528 + *
529 + * Based on the Cedrus VPU driver, that is:
530 + *
531 + * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
532 + * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
533 + * Copyright (C) 2018 Bootlin
534 + */
535 +
536 +#ifndef _RPIVID_H_
537 +#define _RPIVID_H_
538 +
539 +#include <linux/clk.h>
540 +#include <linux/platform_device.h>
541 +#include <media/v4l2-ctrls.h>
542 +#include <media/v4l2-device.h>
543 +#include <media/v4l2-mem2mem.h>
544 +#include <media/videobuf2-v4l2.h>
545 +#include <media/videobuf2-dma-contig.h>
546 +
547 +#define OPT_DEBUG_POLL_IRQ  0
548 +
549 +#define RPIVID_NAME                    "rpivid"
550 +
551 +#define RPIVID_CAPABILITY_UNTILED      BIT(0)
552 +#define RPIVID_CAPABILITY_H265_DEC     BIT(1)
553 +
554 +#define RPIVID_QUIRK_NO_DMA_OFFSET     BIT(0)
555 +
556 +#define RPIVID_SRC_PIXELFORMAT_DEFAULT V4L2_PIX_FMT_HEVC_SLICE
557 +
558 +enum rpivid_irq_status {
559 +       RPIVID_IRQ_NONE,
560 +       RPIVID_IRQ_ERROR,
561 +       RPIVID_IRQ_OK,
562 +};
563 +
564 +struct rpivid_control {
565 +       struct v4l2_ctrl_config cfg;
566 +       unsigned char           required:1;
567 +};
568 +
569 +struct rpivid_h265_run {
570 +       const struct v4l2_ctrl_hevc_sps                 *sps;
571 +       const struct v4l2_ctrl_hevc_pps                 *pps;
572 +       const struct v4l2_ctrl_hevc_slice_params        *slice_params;
573 +       const struct v4l2_ctrl_hevc_scaling_matrix      *scaling_matrix;
574 +};
575 +
576 +struct rpivid_run {
577 +       struct vb2_v4l2_buffer  *src;
578 +       struct vb2_v4l2_buffer  *dst;
579 +
580 +       struct rpivid_h265_run  h265;
581 +};
582 +
583 +struct rpivid_buffer {
584 +       struct v4l2_m2m_buffer          m2m_buf;
585 +};
586 +
587 +struct rpivid_dec_state;
588 +struct rpivid_dec_env;
589 +#define RPIVID_DEC_ENV_COUNT 3
590 +
591 +struct rpivid_gptr {
592 +       size_t size;
593 +       __u8 *ptr;
594 +       dma_addr_t addr;
595 +       unsigned long attrs;
596 +};
597 +
598 +struct rpivid_dev;
599 +typedef void (*rpivid_irq_callback)(struct rpivid_dev *dev, void *ctx);
600 +
601 +struct rpivid_q_aux;
602 +#define RPIVID_AUX_ENT_COUNT VB2_MAX_FRAME
603 +
604 +#define RPIVID_P2BUF_COUNT 2
605 +
606 +struct rpivid_ctx {
607 +       struct v4l2_fh                  fh;
608 +       struct rpivid_dev               *dev;
609 +
610 +       struct v4l2_pix_format          src_fmt;
611 +       struct v4l2_pix_format          dst_fmt;
612 +       int dst_fmt_set;
613 +
614 +       struct v4l2_ctrl_handler        hdl;
615 +       struct v4l2_ctrl                **ctrls;
616 +
617 +       /* Decode state - stateless decoder my *** */
618 +       /* state contains stuff that is only needed in phase0
619 +        * it could be held in dec_env but that would be wasteful
620 +        */
621 +       struct rpivid_dec_state *state;
622 +       struct rpivid_dec_env *dec0;
623 +
624 +       /* Spinlock protecting dec_free */
625 +       spinlock_t dec_lock;
626 +       struct rpivid_dec_env *dec_free;
627 +
628 +       struct rpivid_dec_env *dec_pool;
629 +
630 +       /* Some of these should be in dev */
631 +       struct rpivid_gptr bitbufs[1];  /* Will be 2 */
632 +       struct rpivid_gptr cmdbufs[1];  /* Will be 2 */
633 +       unsigned int p2idx;
634 +       atomic_t p2out;
635 +       struct rpivid_gptr pu_bufs[RPIVID_P2BUF_COUNT];
636 +       struct rpivid_gptr coeff_bufs[RPIVID_P2BUF_COUNT];
637 +
638 +       /* Spinlock protecting aux_free */
639 +       spinlock_t aux_lock;
640 +       struct rpivid_q_aux *aux_free;
641 +
642 +       struct rpivid_q_aux *aux_ents[RPIVID_AUX_ENT_COUNT];
643 +
644 +       unsigned int colmv_stride;
645 +       unsigned int colmv_picsize;
646 +};
647 +
648 +struct rpivid_dec_ops {
649 +       void (*setup)(struct rpivid_ctx *ctx, struct rpivid_run *run);
650 +       int (*start)(struct rpivid_ctx *ctx);
651 +       void (*stop)(struct rpivid_ctx *ctx);
652 +       void (*trigger)(struct rpivid_ctx *ctx);
653 +};
654 +
655 +struct rpivid_variant {
656 +       unsigned int    capabilities;
657 +       unsigned int    quirks;
658 +       unsigned int    mod_rate;
659 +};
660 +
661 +struct rpivid_hw_irq_ent;
662 +
663 +struct rpivid_hw_irq_ctrl {
664 +       /* Spinlock protecting claim and tail */
665 +       spinlock_t lock;
666 +       struct rpivid_hw_irq_ent *claim;
667 +       struct rpivid_hw_irq_ent *tail;
668 +
669 +       /* Ent for pending irq - also prevents sched */
670 +       struct rpivid_hw_irq_ent *irq;
671 +       /* Non-zero => do not start a new job - outer layer sched pending */
672 +       int no_sched;
673 +       /* Thread CB requested */
674 +       bool thread_reqed;
675 +};
676 +
677 +struct rpivid_dev {
678 +       struct v4l2_device      v4l2_dev;
679 +       struct video_device     vfd;
680 +       struct media_device     mdev;
681 +       struct media_pad        pad[2];
682 +       struct platform_device  *pdev;
683 +       struct device           *dev;
684 +       struct v4l2_m2m_dev     *m2m_dev;
685 +       struct rpivid_dec_ops   *dec_ops;
686 +
687 +       /* Device file mutex */
688 +       struct mutex            dev_mutex;
689 +
690 +       void __iomem            *base_irq;
691 +       void __iomem            *base_h265;
692 +
693 +       struct clk              *clock;
694 +
695 +       struct rpivid_hw_irq_ctrl ic_active1;
696 +       struct rpivid_hw_irq_ctrl ic_active2;
697 +};
698 +
699 +extern struct rpivid_dec_ops rpivid_dec_ops_h265;
700 +
701 +void *rpivid_find_control_data(struct rpivid_ctx *ctx, u32 id);
702 +
703 +#endif
704 --- /dev/null
705 +++ b/drivers/staging/media/rpivid/rpivid_dec.c
706 @@ -0,0 +1,79 @@
707 +// SPDX-License-Identifier: GPL-2.0
708 +/*
709 + * Raspberry Pi HEVC driver
710 + *
711 + * Copyright (C) 2020 Raspberry Pi (Trading) Ltd
712 + *
713 + * Based on the Cedrus VPU driver, that is:
714 + *
715 + * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
716 + * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
717 + * Copyright (C) 2018 Bootlin
718 + */
719 +
720 +#include <media/v4l2-device.h>
721 +#include <media/v4l2-ioctl.h>
722 +#include <media/v4l2-event.h>
723 +#include <media/v4l2-mem2mem.h>
724 +
725 +#include "rpivid.h"
726 +#include "rpivid_dec.h"
727 +
728 +void rpivid_device_run(void *priv)
729 +{
730 +       struct rpivid_ctx *ctx = priv;
731 +       struct rpivid_dev *dev = ctx->dev;
732 +       struct rpivid_run run = {};
733 +       struct media_request *src_req;
734 +
735 +       run.src = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
736 +       run.dst = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
737 +
738 +       if (!run.src || !run.dst) {
739 +               v4l2_err(&dev->v4l2_dev, "%s: Missing buffer: src=%p, dst=%p\n",
740 +                        __func__, run.src, run.dst);
741 +               /* We are stuffed - this probably won't dig us out of our
742 +                * current situation but it is better than nothing
743 +                */
744 +               v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx,
745 +                                                VB2_BUF_STATE_ERROR);
746 +               return;
747 +       }
748 +
749 +       /* Apply request(s) controls if needed. */
750 +       src_req = run.src->vb2_buf.req_obj.req;
751 +
752 +       if (src_req)
753 +               v4l2_ctrl_request_setup(src_req, &ctx->hdl);
754 +
755 +       switch (ctx->src_fmt.pixelformat) {
756 +       case V4L2_PIX_FMT_HEVC_SLICE:
757 +               run.h265.sps =
758 +                       rpivid_find_control_data(ctx,
759 +                                                V4L2_CID_MPEG_VIDEO_HEVC_SPS);
760 +               run.h265.pps =
761 +                       rpivid_find_control_data(ctx,
762 +                                                V4L2_CID_MPEG_VIDEO_HEVC_PPS);
763 +               run.h265.slice_params =
764 +                       rpivid_find_control_data(ctx,
765 +                                                V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS);
766 +               run.h265.scaling_matrix =
767 +                       rpivid_find_control_data(ctx,
768 +                                                V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX);
769 +               break;
770 +
771 +       default:
772 +               break;
773 +       }
774 +
775 +       v4l2_m2m_buf_copy_metadata(run.src, run.dst, true);
776 +
777 +       dev->dec_ops->setup(ctx, &run);
778 +
779 +       /* Complete request(s) controls if needed. */
780 +
781 +       if (src_req)
782 +               v4l2_ctrl_request_complete(src_req, &ctx->hdl);
783 +
784 +       dev->dec_ops->trigger(ctx);
785 +}
786 --- /dev/null
787 +++ b/drivers/staging/media/rpivid/rpivid_dec.h
788 @@ -0,0 +1,19 @@
789 +/* SPDX-License-Identifier: GPL-2.0 */
790 +/*
791 + * Raspberry Pi HEVC driver
792 + *
793 + * Copyright (C) 2020 Raspberry Pi (Trading) Ltd
794 + *
795 + * Based on the Cedrus VPU driver, that is:
796 + *
797 + * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
798 + * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
799 + * Copyright (C) 2018 Bootlin
800 + */
801 +
802 +#ifndef _RPIVID_DEC_H_
803 +#define _RPIVID_DEC_H_
804 +
805 +void rpivid_device_run(void *priv);
806 +
807 +#endif
808 --- /dev/null
809 +++ b/drivers/staging/media/rpivid/rpivid_h265.c
810 @@ -0,0 +1,2275 @@
811 +// SPDX-License-Identifier: GPL-2.0-or-later
812 +/*
813 + * Raspberry Pi HEVC driver
814 + *
815 + * Copyright (C) 2020 Raspberry Pi (Trading) Ltd
816 + *
817 + * Based on the Cedrus VPU driver, that is:
818 + *
819 + * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
820 + * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
821 + * Copyright (C) 2018 Bootlin
822 + */
823 +
824 +#include <linux/delay.h>
825 +#include <linux/types.h>
826 +
827 +#include <media/videobuf2-dma-contig.h>
828 +
829 +#include "rpivid.h"
830 +#include "rpivid_hw.h"
831 +
832 +#define DEBUG_TRACE_P1_CMD 0
833 +#define DEBUG_TRACE_EXECUTION 0
834 +
835 +#if DEBUG_TRACE_EXECUTION
836 +#define xtrace_in(dev_, de_)\
837 +       v4l2_info(&(dev_)->v4l2_dev, "%s[%d]: in\n",   __func__,\
838 +                 (de_) == NULL ? -1 : (de_)->decode_order)
839 +#define xtrace_ok(dev_, de_)\
840 +       v4l2_info(&(dev_)->v4l2_dev, "%s[%d]: ok\n",   __func__,\
841 +                 (de_) == NULL ? -1 : (de_)->decode_order)
842 +#define xtrace_fin(dev_, de_)\
843 +       v4l2_info(&(dev_)->v4l2_dev, "%s[%d]: finish\n", __func__,\
844 +                 (de_) == NULL ? -1 : (de_)->decode_order)
845 +#define xtrace_fail(dev_, de_)\
846 +       v4l2_info(&(dev_)->v4l2_dev, "%s[%d]: FAIL\n", __func__,\
847 +                 (de_) == NULL ? -1 : (de_)->decode_order)
848 +#else
849 +#define xtrace_in(dev_, de_)
850 +#define xtrace_ok(dev_, de_)
851 +#define xtrace_fin(dev_, de_)
852 +#define xtrace_fail(dev_, de_)
853 +#endif
854 +
855 +enum hevc_slice_type {
856 +       HEVC_SLICE_B = 0,
857 +       HEVC_SLICE_P = 1,
858 +       HEVC_SLICE_I = 2,
859 +};
860 +
861 +enum hevc_layer { L0 = 0, L1 = 1 };
862 +
863 +static int gptr_alloc(struct rpivid_dev *const dev, struct rpivid_gptr *gptr,
864 +                     size_t size, unsigned long attrs)
865 +{
866 +       gptr->size = size;
867 +       gptr->attrs = attrs;
868 +       gptr->addr = 0;
869 +       gptr->ptr = dma_alloc_attrs(dev->dev, gptr->size, &gptr->addr,
870 +                                   GFP_KERNEL, gptr->attrs);
871 +       return !gptr->ptr ? -ENOMEM : 0;
872 +}
873 +
874 +static void gptr_free(struct rpivid_dev *const dev,
875 +                     struct rpivid_gptr *const gptr)
876 +{
877 +       if (gptr->ptr)
878 +               dma_free_attrs(dev->dev, gptr->size, gptr->ptr, gptr->addr,
879 +                              gptr->attrs);
880 +       gptr->size = 0;
881 +       gptr->ptr = NULL;
882 +       gptr->addr = 0;
883 +       gptr->attrs = 0;
884 +}
885 +
886 +/* Realloc but do not copy */
887 +static int gptr_realloc_new(struct rpivid_dev * const dev,
888 +                           struct rpivid_gptr * const gptr, size_t size)
889 +{
890 +       if (size == gptr->size)
891 +               return 0;
892 +
893 +       if (gptr->ptr)
894 +               dma_free_attrs(dev->dev, gptr->size, gptr->ptr,
895 +                              gptr->addr, gptr->attrs);
896 +
897 +       gptr->addr = 0;
898 +       gptr->size = size;
899 +       gptr->ptr = dma_alloc_attrs(dev->dev, gptr->size,
900 +                                   &gptr->addr, GFP_KERNEL, gptr->attrs);
901 +       return gptr->ptr ? 0 : -ENOMEM;
902 +}
903 +
904 +/* floor(log2(x)) */
905 +static unsigned int log2_size(size_t x)
906 +{
907 +       unsigned int n = 0;
908 +
909 +       if (x & ~0xffff) {
910 +               n += 16;
911 +               x >>= 16;
912 +       }
913 +       if (x & ~0xff) {
914 +               n += 8;
915 +               x >>= 8;
916 +       }
917 +       if (x & ~0xf) {
918 +               n += 4;
919 +               x >>= 4;
920 +       }
921 +       if (x & ~3) {
922 +               n += 2;
923 +               x >>= 2;
924 +       }
925 +       return (x & ~1) ? n + 1 : n;
926 +}
927 +
928 +static size_t round_up_size(const size_t x)
929 +{
930 +       /* Admit no size < 256 */
931 +       const unsigned int n = x < 256 ? 8 : log2_size(x) - 1;
932 +
933 +       return x >= (3 << n) ? 4 << n : (3 << n);
934 +}
935 +
936 +static size_t next_size(const size_t x)
937 +{
938 +       return round_up_size(x + 1);
939 +}
940 +
941 +#define NUM_SCALING_FACTORS 4064 /* Not a typo = 0xbe0 + 0x400 */
942 +
943 +#define AXI_BASE64 0
944 +
945 +#define PROB_BACKUP ((20 << 12) + (20 << 6) + (0 << 0))
946 +#define PROB_RELOAD ((20 << 12) + (20 << 0) + (0 << 6))
947 +
948 +#define HEVC_MAX_REFS V4L2_HEVC_DPB_ENTRIES_NUM_MAX
949 +
950 +//////////////////////////////////////////////////////////////////////////////
951 +
952 +struct rpi_cmd {
953 +       u32 addr;
954 +       u32 data;
955 +} __packed;
956 +
957 +struct rpivid_q_aux {
958 +       unsigned int refcount;
959 +       unsigned int q_index;
960 +       struct rpivid_q_aux *next;
961 +       struct rpivid_gptr col;
962 +};
963 +
964 +//////////////////////////////////////////////////////////////////////////////
965 +
966 +enum rpivid_decode_state {
967 +       RPIVID_DECODE_SLICE_START,
968 +       RPIVID_DECODE_SLICE_CONTINUE,
969 +       RPIVID_DECODE_ERROR_CONTINUE,
970 +       RPIVID_DECODE_ERROR_DONE,
971 +       RPIVID_DECODE_PHASE1,
972 +       RPIVID_DECODE_END,
973 +};
974 +
975 +struct rpivid_dec_env {
976 +       struct rpivid_ctx *ctx;
977 +       struct rpivid_dec_env *next;
978 +
979 +       enum rpivid_decode_state state;
980 +       unsigned int decode_order;
981 +       int p1_status;          /* P1 status - what to realloc */
982 +
983 +       struct rpivid_dec_env *phase_wait_q_next;
984 +
985 +       struct rpi_cmd *cmd_fifo;
986 +       unsigned int cmd_len, cmd_max;
987 +       unsigned int num_slice_msgs;
988 +       unsigned int pic_width_in_ctbs_y;
989 +       unsigned int pic_height_in_ctbs_y;
990 +       unsigned int dpbno_col;
991 +       u32 reg_slicestart;
992 +       int collocated_from_l0_flag;
993 +       unsigned int wpp_entry_x;
994 +       unsigned int wpp_entry_y;
995 +
996 +       u32 rpi_config2;
997 +       u32 rpi_framesize;
998 +       u32 rpi_currpoc;
999 +
1000 +       struct vb2_v4l2_buffer *frame_buf; // Detached dest buffer
1001 +       unsigned int frame_c_offset;
1002 +       unsigned int frame_stride;
1003 +       dma_addr_t frame_addr;
1004 +       dma_addr_t ref_addrs[16];
1005 +       struct rpivid_q_aux *frame_aux;
1006 +       struct rpivid_q_aux *col_aux;
1007 +
1008 +       dma_addr_t pu_base_vc;
1009 +       dma_addr_t coeff_base_vc;
1010 +       u32 pu_stride;
1011 +       u32 coeff_stride;
1012 +
1013 +       struct rpivid_gptr *bit_copy_gptr;
1014 +       size_t bit_copy_len;
1015 +       struct rpivid_gptr *cmd_copy_gptr;
1016 +
1017 +       u16 slice_msgs[2 * HEVC_MAX_REFS * 8 + 3];
1018 +       u8 scaling_factors[NUM_SCALING_FACTORS];
1019 +
1020 +       struct rpivid_hw_irq_ent irq_ent;
1021 +};
1022 +
1023 +#define member_size(type, member) sizeof(((type *)0)->member)
1024 +
1025 +struct rpivid_dec_state {
1026 +       struct v4l2_ctrl_hevc_sps sps;
1027 +       struct v4l2_ctrl_hevc_pps pps;
1028 +
1029 +       // Helper vars & tables derived from sps/pps
1030 +       unsigned int log2_ctb_size; /* log2 width of a CTB */
1031 +       unsigned int ctb_width; /* Width in CTBs */
1032 +       unsigned int ctb_height; /* Height in CTBs */
1033 +       unsigned int ctb_size; /* Pic area in CTBs */
1034 +       unsigned int num_tile_columns;
1035 +       unsigned int num_tile_rows;
1036 +       u8 column_width[member_size(struct v4l2_ctrl_hevc_pps,
1037 +                                   column_width_minus1)];
1038 +       u8 row_height[member_size(struct v4l2_ctrl_hevc_pps,
1039 +                                 row_height_minus1)];
1040 +
1041 +       int *col_bd;
1042 +       int *row_bd;
1043 +       int *ctb_addr_rs_to_ts;
1044 +       int *ctb_addr_ts_to_rs;
1045 +       int *tile_id;
1046 +
1047 +       // Aux starage for DPB
1048 +       // Hold refs
1049 +       struct rpivid_q_aux *ref_aux[HEVC_MAX_REFS];
1050 +       struct rpivid_q_aux *frame_aux;
1051 +
1052 +       // Slice vars
1053 +       unsigned int slice_idx;
1054 +       bool frame_end;
1055 +       bool slice_temporal_mvp;  /* Slice flag but constant for frame */
1056 +
1057 +       // Temp vars per run - don't actually need to persist
1058 +       u8 *src_buf;
1059 +       dma_addr_t src_addr;
1060 +       const struct v4l2_ctrl_hevc_slice_params *sh;
1061 +       unsigned int nb_refs[2];
1062 +       unsigned int slice_qp;
1063 +       unsigned int max_num_merge_cand; // 0 if I-slice
1064 +       bool dependent_slice_segment_flag;
1065 +};
1066 +
1067 +static inline int clip_int(const int x, const int lo, const int hi)
1068 +{
1069 +       return x < lo ? lo : x > hi ? hi : x;
1070 +}
1071 +
1072 +//////////////////////////////////////////////////////////////////////////////
1073 +// Phase 1 command and bit FIFOs
1074 +
1075 +#if DEBUG_TRACE_P1_CMD
1076 +static int p1_z;
1077 +#endif
1078 +
1079 +// ???? u16 addr - put in u32
1080 +static int p1_apb_write(struct rpivid_dec_env *const de, const u16 addr,
1081 +                       const u32 data)
1082 +{
1083 +       if (de->cmd_len == de->cmd_max)
1084 +               de->cmd_fifo =
1085 +                       krealloc(de->cmd_fifo,
1086 +                                (de->cmd_max *= 2) * sizeof(struct rpi_cmd),
1087 +                                GFP_KERNEL);
1088 +       de->cmd_fifo[de->cmd_len].addr = addr;
1089 +       de->cmd_fifo[de->cmd_len].data = data;
1090 +
1091 +#if DEBUG_TRACE_P1_CMD
1092 +       if (++p1_z < 256) {
1093 +               v4l2_info(&de->ctx->dev->v4l2_dev, "[%02x] %x %x\n",
1094 +                         de->cmd_len, addr, data);
1095 +       }
1096 +#endif
1097 +
1098 +       return de->cmd_len++;
1099 +}
1100 +
1101 +static int ctb_to_tile(unsigned int ctb, unsigned int *bd, int num)
1102 +{
1103 +       int i;
1104 +
1105 +       for (i = 1; ctb >= bd[i]; i++)
1106 +               ; // bd[] has num+1 elements; bd[0]=0;
1107 +       return i - 1;
1108 +}
1109 +
1110 +static int ctb_to_slice_w_h(unsigned int ctb, int ctb_size, int width,
1111 +                           unsigned int *bd, int num)
1112 +{
1113 +       if (ctb < bd[num - 1])
1114 +               return ctb_size;
1115 +       else if (width % ctb_size)
1116 +               return width % ctb_size;
1117 +       else
1118 +               return ctb_size;
1119 +}
1120 +
1121 +static void aux_q_free(struct rpivid_ctx *const ctx,
1122 +                      struct rpivid_q_aux *const aq)
1123 +{
1124 +       struct rpivid_dev *const dev = ctx->dev;
1125 +
1126 +       gptr_free(dev, &aq->col);
1127 +       kfree(aq);
1128 +}
1129 +
1130 +static struct rpivid_q_aux *aux_q_alloc(struct rpivid_ctx *const ctx)
1131 +{
1132 +       struct rpivid_dev *const dev = ctx->dev;
1133 +       struct rpivid_q_aux *const aq = kzalloc(sizeof(*aq), GFP_KERNEL);
1134 +
1135 +       if (!aq)
1136 +               return NULL;
1137 +
1138 +       aq->refcount = 1;
1139 +       if (gptr_alloc(dev, &aq->col, ctx->colmv_picsize,
1140 +                      DMA_ATTR_FORCE_CONTIGUOUS | DMA_ATTR_NO_KERNEL_MAPPING))
1141 +               goto fail;
1142 +
1143 +       return aq;
1144 +
1145 +fail:
1146 +       kfree(aq);
1147 +       return NULL;
1148 +}
1149 +
1150 +static struct rpivid_q_aux *aux_q_new(struct rpivid_ctx *const ctx,
1151 +                                     const unsigned int q_index)
1152 +{
1153 +       struct rpivid_q_aux *aq;
1154 +       unsigned long lockflags;
1155 +
1156 +       spin_lock_irqsave(&ctx->aux_lock, lockflags);
1157 +       aq = ctx->aux_free;
1158 +       if (aq) {
1159 +               ctx->aux_free = aq->next;
1160 +               aq->next = NULL;
1161 +               aq->refcount = 1;
1162 +       }
1163 +       spin_unlock_irqrestore(&ctx->aux_lock, lockflags);
1164 +
1165 +       if (!aq) {
1166 +               aq = aux_q_alloc(ctx);
1167 +               if (!aq)
1168 +                       return NULL;
1169 +       }
1170 +
1171 +       aq->q_index = q_index;
1172 +       ctx->aux_ents[q_index] = aq;
1173 +       return aq;
1174 +}
1175 +
1176 +static struct rpivid_q_aux *aux_q_ref(struct rpivid_ctx *const ctx,
1177 +                                     struct rpivid_q_aux *const aq)
1178 +{
1179 +       if (aq) {
1180 +               unsigned long lockflags;
1181 +
1182 +               spin_lock_irqsave(&ctx->aux_lock, lockflags);
1183 +
1184 +               ++aq->refcount;
1185 +
1186 +               spin_unlock_irqrestore(&ctx->aux_lock, lockflags);
1187 +       }
1188 +       return aq;
1189 +}
1190 +
1191 +static void aux_q_release(struct rpivid_ctx *const ctx,
1192 +                         struct rpivid_q_aux **const paq)
1193 +{
1194 +       struct rpivid_q_aux *const aq = *paq;
1195 +       *paq = NULL;
1196 +
1197 +       if (aq) {
1198 +               unsigned long lockflags;
1199 +
1200 +               spin_lock_irqsave(&ctx->aux_lock, lockflags);
1201 +
1202 +               if (--aq->refcount == 0) {
1203 +                       aq->next = ctx->aux_free;
1204 +                       ctx->aux_free = aq;
1205 +                       ctx->aux_ents[aq->q_index] = NULL;
1206 +               }
1207 +
1208 +               spin_unlock_irqrestore(&ctx->aux_lock, lockflags);
1209 +       }
1210 +}
1211 +
1212 +static void aux_q_init(struct rpivid_ctx *const ctx)
1213 +{
1214 +       spin_lock_init(&ctx->aux_lock);
1215 +       ctx->aux_free = NULL;
1216 +}
1217 +
1218 +static void aux_q_uninit(struct rpivid_ctx *const ctx)
1219 +{
1220 +       struct rpivid_q_aux *aq;
1221 +
1222 +       ctx->colmv_picsize = 0;
1223 +       ctx->colmv_stride = 0;
1224 +       while ((aq = ctx->aux_free) != NULL) {
1225 +               ctx->aux_free = aq->next;
1226 +               aux_q_free(ctx, aq);
1227 +       }
1228 +}
1229 +
1230 +//////////////////////////////////////////////////////////////////////////////
1231 +
1232 +/*
1233 + * Initialisation process for context variables (CABAC init)
1234 + * see H.265 9.3.2.2
1235 + *
1236 + * N.B. If comparing with FFmpeg note that this h/w uses slightly different
1237 + * offsets to FFmpegs array
1238 + */
1239 +
1240 +/* Actual number of values */
1241 +#define RPI_PROB_VALS 154U
1242 +/* Rounded up as we copy words */
1243 +#define RPI_PROB_ARRAY_SIZE ((154 + 3) & ~3)
1244 +
1245 +/* Initialiser values - see tables H.265 9-4 through 9-42 */
1246 +static const u8 prob_init[3][156] = {
1247 +       {
1248 +               153, 200, 139, 141, 157, 154, 154, 154, 154, 154, 184, 154, 154,
1249 +               154, 184, 63,  154, 154, 154, 154, 154, 154, 154, 154, 154, 154,
1250 +               154, 154, 154, 153, 138, 138, 111, 141, 94,  138, 182, 154, 154,
1251 +               154, 140, 92,  137, 138, 140, 152, 138, 139, 153, 74,  149, 92,
1252 +               139, 107, 122, 152, 140, 179, 166, 182, 140, 227, 122, 197, 110,
1253 +               110, 124, 125, 140, 153, 125, 127, 140, 109, 111, 143, 127, 111,
1254 +               79,  108, 123, 63,  110, 110, 124, 125, 140, 153, 125, 127, 140,
1255 +               109, 111, 143, 127, 111, 79,  108, 123, 63,  91,  171, 134, 141,
1256 +               138, 153, 136, 167, 152, 152, 139, 139, 111, 111, 125, 110, 110,
1257 +               94,  124, 108, 124, 107, 125, 141, 179, 153, 125, 107, 125, 141,
1258 +               179, 153, 125, 107, 125, 141, 179, 153, 125, 140, 139, 182, 182,
1259 +               152, 136, 152, 136, 153, 136, 139, 111, 136, 139, 111, 0,   0,
1260 +       },
1261 +       {
1262 +               153, 185, 107, 139, 126, 197, 185, 201, 154, 149, 154, 139, 154,
1263 +               154, 154, 152, 110, 122, 95,  79,  63,  31,  31,  153, 153, 168,
1264 +               140, 198, 79,  124, 138, 94,  153, 111, 149, 107, 167, 154, 154,
1265 +               154, 154, 196, 196, 167, 154, 152, 167, 182, 182, 134, 149, 136,
1266 +               153, 121, 136, 137, 169, 194, 166, 167, 154, 167, 137, 182, 125,
1267 +               110, 94,  110, 95,  79,  125, 111, 110, 78,  110, 111, 111, 95,
1268 +               94,  108, 123, 108, 125, 110, 94,  110, 95,  79,  125, 111, 110,
1269 +               78,  110, 111, 111, 95,  94,  108, 123, 108, 121, 140, 61,  154,
1270 +               107, 167, 91,  122, 107, 167, 139, 139, 155, 154, 139, 153, 139,
1271 +               123, 123, 63,  153, 166, 183, 140, 136, 153, 154, 166, 183, 140,
1272 +               136, 153, 154, 166, 183, 140, 136, 153, 154, 170, 153, 123, 123,
1273 +               107, 121, 107, 121, 167, 151, 183, 140, 151, 183, 140, 0,   0,
1274 +       },
1275 +       {
1276 +               153, 160, 107, 139, 126, 197, 185, 201, 154, 134, 154, 139, 154,
1277 +               154, 183, 152, 154, 137, 95,  79,  63,  31,  31,  153, 153, 168,
1278 +               169, 198, 79,  224, 167, 122, 153, 111, 149, 92,  167, 154, 154,
1279 +               154, 154, 196, 167, 167, 154, 152, 167, 182, 182, 134, 149, 136,
1280 +               153, 121, 136, 122, 169, 208, 166, 167, 154, 152, 167, 182, 125,
1281 +               110, 124, 110, 95,  94,  125, 111, 111, 79,  125, 126, 111, 111,
1282 +               79,  108, 123, 93,  125, 110, 124, 110, 95,  94,  125, 111, 111,
1283 +               79,  125, 126, 111, 111, 79,  108, 123, 93,  121, 140, 61,  154,
1284 +               107, 167, 91,  107, 107, 167, 139, 139, 170, 154, 139, 153, 139,
1285 +               123, 123, 63,  124, 166, 183, 140, 136, 153, 154, 166, 183, 140,
1286 +               136, 153, 154, 166, 183, 140, 136, 153, 154, 170, 153, 138, 138,
1287 +               122, 121, 122, 121, 167, 151, 183, 140, 151, 183, 140, 0,   0,
1288 +       },
1289 +};
1290 +
1291 +static void write_prob(struct rpivid_dec_env *const de,
1292 +                      const struct rpivid_dec_state *const s)
1293 +{
1294 +       u8 dst[RPI_PROB_ARRAY_SIZE];
1295 +
1296 +       const unsigned int init_type =
1297 +               ((s->sh->flags & V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT) != 0 &&
1298 +                s->sh->slice_type != HEVC_SLICE_I) ?
1299 +                       s->sh->slice_type + 1 :
1300 +                       2 - s->sh->slice_type;
1301 +       const u8 *p = prob_init[init_type];
1302 +       const int q = clip_int(s->slice_qp, 0, 51);
1303 +       unsigned int i;
1304 +
1305 +       for (i = 0; i < RPI_PROB_VALS; i++) {
1306 +               int init_value = p[i];
1307 +               int m = (init_value >> 4) * 5 - 45;
1308 +               int n = ((init_value & 15) << 3) - 16;
1309 +               int pre = 2 * (((m * q) >> 4) + n) - 127;
1310 +
1311 +               pre ^= pre >> 31;
1312 +               if (pre > 124)
1313 +                       pre = 124 + (pre & 1);
1314 +               dst[i] = pre;
1315 +       }
1316 +       for (i = RPI_PROB_VALS; i != RPI_PROB_ARRAY_SIZE; ++i)
1317 +               dst[i] = 0;
1318 +
1319 +       for (i = 0; i < RPI_PROB_ARRAY_SIZE; i += 4)
1320 +               p1_apb_write(de, 0x1000 + i,
1321 +                            dst[i] + (dst[i + 1] << 8) + (dst[i + 2] << 16) +
1322 +                                    (dst[i + 3] << 24));
1323 +}
1324 +
1325 +static void write_scaling_factors(struct rpivid_dec_env *const de)
1326 +{
1327 +       int i;
1328 +       const u8 *p = (u8 *)de->scaling_factors;
1329 +
1330 +       for (i = 0; i < NUM_SCALING_FACTORS; i += 4, p += 4)
1331 +               p1_apb_write(de, 0x2000 + i,
1332 +                            p[0] + (p[1] << 8) + (p[2] << 16) + (p[3] << 24));
1333 +}
1334 +
1335 +static inline __u32 dma_to_axi_addr(dma_addr_t a)
1336 +{
1337 +       return (__u32)(a >> 6);
1338 +}
1339 +
1340 +static void write_bitstream(struct rpivid_dec_env *const de,
1341 +                           const struct rpivid_dec_state *const s)
1342 +{
1343 +       // Note that FFmpeg removes emulation prevention bytes, so this is
1344 +       // matched in the configuration here.
1345 +       // Whether that is the correct behaviour or not is not clear in the
1346 +       // spec.
1347 +       const int rpi_use_emu = 1;
1348 +       unsigned int offset = s->sh->data_bit_offset / 8 + 1;
1349 +       const unsigned int len = (s->sh->bit_size + 7) / 8 - offset;
1350 +       dma_addr_t addr;
1351 +
1352 +       if (s->src_addr != 0) {
1353 +               addr = s->src_addr + offset;
1354 +       } else {
1355 +               memcpy(de->bit_copy_gptr->ptr + de->bit_copy_len,
1356 +                      s->src_buf + offset, len);
1357 +               addr = de->bit_copy_gptr->addr + de->bit_copy_len;
1358 +               de->bit_copy_len += (len + 63) & ~63;
1359 +       }
1360 +       offset = addr & 63;
1361 +
1362 +       p1_apb_write(de, RPI_BFBASE, dma_to_axi_addr(addr));
1363 +       p1_apb_write(de, RPI_BFNUM, len);
1364 +       p1_apb_write(de, RPI_BFCONTROL, offset + (1 << 7)); // Stop
1365 +       p1_apb_write(de, RPI_BFCONTROL, offset + (rpi_use_emu << 6));
1366 +}
1367 +
1368 +//////////////////////////////////////////////////////////////////////////////
1369 +
1370 +static void write_slice(struct rpivid_dec_env *const de,
1371 +                       const struct rpivid_dec_state *const s,
1372 +                       const unsigned int slice_w,
1373 +                       const unsigned int slice_h)
1374 +{
1375 +       u32 u32 = (s->sh->slice_type << 12) +
1376 +                 (((s->sh->flags &
1377 +                    V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA) != 0)
1378 +                  << 14) +
1379 +                 (((s->sh->flags &
1380 +                    V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA) != 0)
1381 +                  << 15) +
1382 +                 (slice_w << 17) + (slice_h << 24);
1383 +
1384 +       u32 |= (s->max_num_merge_cand << 0) + (s->nb_refs[L0] << 4) +
1385 +              (s->nb_refs[L1] << 8);
1386 +
1387 +       if (s->sh->slice_type == HEVC_SLICE_B)
1388 +               u32 |= ((s->sh->flags &
1389 +                        V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO) != 0)
1390 +                      << 16;
1391 +       p1_apb_write(de, RPI_SLICE, u32);
1392 +}
1393 +
1394 +//////////////////////////////////////////////////////////////////////////////
1395 +// Tiles mode
1396 +
1397 +static void new_entry_point(struct rpivid_dec_env *const de,
1398 +                           const struct rpivid_dec_state *const s,
1399 +                           const int do_bte,
1400 +                           const int reset_qp_y, const int ctb_addr_ts)
1401 +{
1402 +       int ctb_col = s->ctb_addr_ts_to_rs[ctb_addr_ts] %
1403 +                                                       de->pic_width_in_ctbs_y;
1404 +       int ctb_row = s->ctb_addr_ts_to_rs[ctb_addr_ts] /
1405 +                                                       de->pic_width_in_ctbs_y;
1406 +
1407 +       int tile_x = ctb_to_tile(ctb_col, s->col_bd, s->num_tile_columns);
1408 +       int tile_y = ctb_to_tile(ctb_row, s->row_bd, s->num_tile_rows);
1409 +
1410 +       int endx = s->col_bd[tile_x + 1] - 1;
1411 +       int endy = s->row_bd[tile_y + 1] - 1;
1412 +
1413 +       u8 slice_w = ctb_to_slice_w_h(ctb_col, 1 << s->log2_ctb_size,
1414 +                                     s->sps.pic_width_in_luma_samples,
1415 +                                     s->col_bd, s->num_tile_columns);
1416 +       u8 slice_h = ctb_to_slice_w_h(ctb_row, 1 << s->log2_ctb_size,
1417 +                                     s->sps.pic_height_in_luma_samples,
1418 +                                     s->row_bd, s->num_tile_rows);
1419 +
1420 +       p1_apb_write(de, RPI_TILESTART,
1421 +                    s->col_bd[tile_x] + (s->row_bd[tile_y] << 16));
1422 +       p1_apb_write(de, RPI_TILEEND, endx + (endy << 16));
1423 +
1424 +       if (do_bte)
1425 +               p1_apb_write(de, RPI_BEGINTILEEND, endx + (endy << 16));
1426 +
1427 +       write_slice(de, s, slice_w, slice_h);
1428 +
1429 +       if (reset_qp_y) {
1430 +               unsigned int sps_qp_bd_offset =
1431 +                       6 * s->sps.bit_depth_luma_minus8;
1432 +
1433 +               p1_apb_write(de, RPI_QP, sps_qp_bd_offset + s->slice_qp);
1434 +       }
1435 +
1436 +       p1_apb_write(de, RPI_MODE,
1437 +                    (0xFFFF << 0) + (0x0 << 16) +
1438 +                            ((tile_x == s->num_tile_columns - 1) << 17) +
1439 +                            ((tile_y == s->num_tile_rows - 1) << 18));
1440 +
1441 +       p1_apb_write(de, RPI_CONTROL, (ctb_col << 0) + (ctb_row << 16));
1442 +}
1443 +
1444 +//////////////////////////////////////////////////////////////////////////////
1445 +
1446 +static void new_slice_segment(struct rpivid_dec_env *const de,
1447 +                             const struct rpivid_dec_state *const s)
1448 +{
1449 +       const struct v4l2_ctrl_hevc_sps *const sps = &s->sps;
1450 +       const struct v4l2_ctrl_hevc_pps *const pps = &s->pps;
1451 +
1452 +       p1_apb_write(de,
1453 +                    RPI_SPS0,
1454 +                    ((sps->log2_min_luma_coding_block_size_minus3 + 3) << 0) |
1455 +                    (s->log2_ctb_size << 4) |
1456 +                    ((sps->log2_min_luma_transform_block_size_minus2 + 2)
1457 +                                                       << 8) |
1458 +                    ((sps->log2_min_luma_transform_block_size_minus2 + 2 +
1459 +                      sps->log2_diff_max_min_luma_transform_block_size)
1460 +                                               << 12) |
1461 +                    ((sps->bit_depth_luma_minus8 + 8) << 16) |
1462 +                    ((sps->bit_depth_chroma_minus8 + 8) << 20) |
1463 +                    (sps->max_transform_hierarchy_depth_intra << 24) |
1464 +                    (sps->max_transform_hierarchy_depth_inter << 28));
1465 +
1466 +       p1_apb_write(de,
1467 +                    RPI_SPS1,
1468 +                    ((sps->pcm_sample_bit_depth_luma_minus1 + 1) << 0) |
1469 +                    ((sps->pcm_sample_bit_depth_chroma_minus1 + 1) << 4) |
1470 +                    ((sps->log2_min_pcm_luma_coding_block_size_minus3 + 3)
1471 +                                               << 8) |
1472 +                    ((sps->log2_min_pcm_luma_coding_block_size_minus3 + 3 +
1473 +                      sps->log2_diff_max_min_pcm_luma_coding_block_size)
1474 +                                               << 12) |
1475 +                    (((sps->flags & V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE) ?
1476 +                               0 : sps->chroma_format_idc) << 16) |
1477 +                    ((!!(sps->flags & V4L2_HEVC_SPS_FLAG_AMP_ENABLED)) << 18) |
1478 +                    ((!!(sps->flags & V4L2_HEVC_SPS_FLAG_PCM_ENABLED)) << 19) |
1479 +                    ((!!(sps->flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED))
1480 +                                               << 20) |
1481 +                    ((!!(sps->flags &
1482 +                          V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED))
1483 +                                               << 21));
1484 +
1485 +       p1_apb_write(de,
1486 +                    RPI_PPS,
1487 +                    ((s->log2_ctb_size - pps->diff_cu_qp_delta_depth) << 0) |
1488 +                    ((!!(pps->flags & V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED))
1489 +                                                << 4) |
1490 +                    ((!!(pps->flags &
1491 +                               V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED))
1492 +                                                << 5) |
1493 +                    ((!!(pps->flags & V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED))
1494 +                                                << 6) |
1495 +                    ((!!(pps->flags &
1496 +                               V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED))
1497 +                                               << 7) |
1498 +                    (((pps->pps_cb_qp_offset + s->sh->slice_cb_qp_offset) & 255)
1499 +                                               << 8) |
1500 +                    (((pps->pps_cr_qp_offset + s->sh->slice_cr_qp_offset) & 255)
1501 +                                               << 16) |
1502 +                    ((!!(pps->flags &
1503 +                               V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED))
1504 +                                               << 24));
1505 +
1506 +       if ((sps->flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED) != 0)
1507 +               write_scaling_factors(de);
1508 +
1509 +       if (!s->dependent_slice_segment_flag) {
1510 +               int ctb_col = s->sh->slice_segment_addr %
1511 +                                                       de->pic_width_in_ctbs_y;
1512 +               int ctb_row = s->sh->slice_segment_addr /
1513 +                                                       de->pic_width_in_ctbs_y;
1514 +
1515 +               de->reg_slicestart = (ctb_col << 0) + (ctb_row << 16);
1516 +       }
1517 +
1518 +       p1_apb_write(de, RPI_SLICESTART, de->reg_slicestart);
1519 +}
1520 +
1521 +//////////////////////////////////////////////////////////////////////////////
1522 +// Slice messages
1523 +
1524 +static void msg_slice(struct rpivid_dec_env *const de, const u16 msg)
1525 +{
1526 +       de->slice_msgs[de->num_slice_msgs++] = msg;
1527 +}
1528 +
1529 +static void program_slicecmds(struct rpivid_dec_env *const de,
1530 +                             const int sliceid)
1531 +{
1532 +       int i;
1533 +
1534 +       p1_apb_write(de, RPI_SLICECMDS, de->num_slice_msgs + (sliceid << 8));
1535 +
1536 +       for (i = 0; i < de->num_slice_msgs; i++)
1537 +               p1_apb_write(de, 0x4000 + 4 * i, de->slice_msgs[i] & 0xffff);
1538 +}
1539 +
1540 +// NoBackwardPredictionFlag 8.3.5
1541 +// Simply checks POCs
1542 +static int has_backward(const struct v4l2_hevc_dpb_entry *const dpb,
1543 +                       const __u8 *const idx, const unsigned int n,
1544 +                       const unsigned int cur_poc)
1545 +{
1546 +       unsigned int i;
1547 +
1548 +       for (i = 0; i < n; ++i) {
1549 +               // Compare mod 2^16
1550 +               // We only get u16 pocs & 8.3.1 says
1551 +               // "The bitstream shall not contain data that result in values
1552 +               //  of DiffPicOrderCnt( picA, picB ) used in the decoding
1553 +               //  process that are not in the range of âˆ’2^15 to 2^15 âˆ’ 1,
1554 +               //  inclusive."
1555 +               if (((cur_poc - dpb[idx[i]].pic_order_cnt[0]) & 0x8000) != 0)
1556 +                       return 0;
1557 +       }
1558 +       return 1;
1559 +}
1560 +
1561 +static void pre_slice_decode(struct rpivid_dec_env *const de,
1562 +                            const struct rpivid_dec_state *const s)
1563 +{
1564 +       const struct v4l2_ctrl_hevc_slice_params *const sh = s->sh;
1565 +       int weighted_pred_flag, idx;
1566 +       u16 cmd_slice;
1567 +       unsigned int collocated_from_l0_flag;
1568 +
1569 +       de->num_slice_msgs = 0;
1570 +
1571 +       cmd_slice = 0;
1572 +       if (sh->slice_type == HEVC_SLICE_I)
1573 +               cmd_slice = 1;
1574 +       if (sh->slice_type == HEVC_SLICE_P)
1575 +               cmd_slice = 2;
1576 +       if (sh->slice_type == HEVC_SLICE_B)
1577 +               cmd_slice = 3;
1578 +
1579 +       cmd_slice |= (s->nb_refs[L0] << 2) | (s->nb_refs[L1] << 6) |
1580 +                    (s->max_num_merge_cand << 11);
1581 +
1582 +       collocated_from_l0_flag =
1583 +               !s->slice_temporal_mvp ||
1584 +               sh->slice_type != HEVC_SLICE_B ||
1585 +               (sh->flags & V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0);
1586 +       cmd_slice |= collocated_from_l0_flag << 14;
1587 +
1588 +       if (sh->slice_type == HEVC_SLICE_P || sh->slice_type == HEVC_SLICE_B) {
1589 +               // Flag to say all reference pictures are from the past
1590 +               const int no_backward_pred_flag =
1591 +                       has_backward(sh->dpb, sh->ref_idx_l0, s->nb_refs[L0],
1592 +                                    sh->slice_pic_order_cnt) &&
1593 +                       has_backward(sh->dpb, sh->ref_idx_l1, s->nb_refs[L1],
1594 +                                    sh->slice_pic_order_cnt);
1595 +               cmd_slice |= no_backward_pred_flag << 10;
1596 +               msg_slice(de, cmd_slice);
1597 +
1598 +               if (s->slice_temporal_mvp) {
1599 +                       const __u8 *const rpl = collocated_from_l0_flag ?
1600 +                                               sh->ref_idx_l0 : sh->ref_idx_l1;
1601 +                       de->dpbno_col = rpl[sh->collocated_ref_idx];
1602 +                       //v4l2_info(&de->ctx->dev->v4l2_dev,
1603 +                       //          "L0=%d col_ref_idx=%d,
1604 +                       //          dpb_no=%d\n", collocated_from_l0_flag,
1605 +                       //          sh->collocated_ref_idx, de->dpbno_col);
1606 +               }
1607 +
1608 +               // Write reference picture descriptions
1609 +               weighted_pred_flag =
1610 +                       sh->slice_type == HEVC_SLICE_P ?
1611 +                               !!(s->pps.flags & V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED) :
1612 +                               !!(s->pps.flags & V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED);
1613 +
1614 +               for (idx = 0; idx < s->nb_refs[L0]; ++idx) {
1615 +                       unsigned int dpb_no = sh->ref_idx_l0[idx];
1616 +                       //v4l2_info(&de->ctx->dev->v4l2_dev,
1617 +                       //        "L0[%d]=dpb[%d]\n", idx, dpb_no);
1618 +
1619 +                       msg_slice(de,
1620 +                                 dpb_no |
1621 +                                 (sh->dpb[dpb_no].rps ==
1622 +                                       V4L2_HEVC_DPB_ENTRY_RPS_LT_CURR ?
1623 +                                                (1 << 4) : 0) |
1624 +                                 (weighted_pred_flag ? (3 << 5) : 0));
1625 +                       msg_slice(de, sh->dpb[dpb_no].pic_order_cnt[0]);
1626 +
1627 +                       if (weighted_pred_flag) {
1628 +                               const struct v4l2_hevc_pred_weight_table
1629 +                                       *const w = &sh->pred_weight_table;
1630 +                               const int luma_weight_denom =
1631 +                                       (1 << w->luma_log2_weight_denom);
1632 +                               const unsigned int chroma_log2_weight_denom =
1633 +                                       (w->luma_log2_weight_denom +
1634 +                                        w->delta_chroma_log2_weight_denom);
1635 +                               const int chroma_weight_denom =
1636 +                                       (1 << chroma_log2_weight_denom);
1637 +
1638 +                               msg_slice(de,
1639 +                                         w->luma_log2_weight_denom |
1640 +                                         (((w->delta_luma_weight_l0[idx] +
1641 +                                            luma_weight_denom) & 0x1ff)
1642 +                                                << 3));
1643 +                               msg_slice(de, w->luma_offset_l0[idx] & 0xff);
1644 +                               msg_slice(de,
1645 +                                         chroma_log2_weight_denom |
1646 +                                         (((w->delta_chroma_weight_l0[idx][0] +
1647 +                                            chroma_weight_denom) & 0x1ff)
1648 +                                                  << 3));
1649 +                               msg_slice(de,
1650 +                                         w->chroma_offset_l0[idx][0] & 0xff);
1651 +                               msg_slice(de,
1652 +                                         chroma_log2_weight_denom |
1653 +                                         (((w->delta_chroma_weight_l0[idx][1] +
1654 +                                            chroma_weight_denom) & 0x1ff)
1655 +                                                  << 3));
1656 +                               msg_slice(de,
1657 +                                         w->chroma_offset_l0[idx][1] & 0xff);
1658 +                       }
1659 +               }
1660 +
1661 +               for (idx = 0; idx < s->nb_refs[L1]; ++idx) {
1662 +                       unsigned int dpb_no = sh->ref_idx_l1[idx];
1663 +                       //v4l2_info(&de->ctx->dev->v4l2_dev,
1664 +                       //          "L1[%d]=dpb[%d]\n", idx, dpb_no);
1665 +                       msg_slice(de,
1666 +                                 dpb_no |
1667 +                                 (sh->dpb[dpb_no].rps ==
1668 +                                        V4L2_HEVC_DPB_ENTRY_RPS_LT_CURR ?
1669 +                                                (1 << 4) : 0) |
1670 +                                       (weighted_pred_flag ? (3 << 5) : 0));
1671 +                       msg_slice(de, sh->dpb[dpb_no].pic_order_cnt[0]);
1672 +                       if (weighted_pred_flag) {
1673 +                               const struct v4l2_hevc_pred_weight_table
1674 +                                       *const w = &sh->pred_weight_table;
1675 +                               const int luma_weight_denom =
1676 +                                       (1 << w->luma_log2_weight_denom);
1677 +                               const unsigned int chroma_log2_weight_denom =
1678 +                                       (w->luma_log2_weight_denom +
1679 +                                        w->delta_chroma_log2_weight_denom);
1680 +                               const int chroma_weight_denom =
1681 +                                       (1 << chroma_log2_weight_denom);
1682 +
1683 +                               msg_slice(de,
1684 +                                         w->luma_log2_weight_denom |
1685 +                                         (((w->delta_luma_weight_l1[idx] +
1686 +                                            luma_weight_denom) & 0x1ff) << 3));
1687 +                               msg_slice(de, w->luma_offset_l1[idx] & 0xff);
1688 +                               msg_slice(de,
1689 +                                         chroma_log2_weight_denom |
1690 +                                         (((w->delta_chroma_weight_l1[idx][0] +
1691 +                                            chroma_weight_denom) & 0x1ff)
1692 +                                                       << 3));
1693 +                               msg_slice(de,
1694 +                                         w->chroma_offset_l1[idx][0] & 0xff);
1695 +                               msg_slice(de,
1696 +                                         chroma_log2_weight_denom |
1697 +                                         (((w->delta_chroma_weight_l1[idx][1] +
1698 +                                            chroma_weight_denom) & 0x1ff)
1699 +                                                  << 3));
1700 +                               msg_slice(de,
1701 +                                         w->chroma_offset_l1[idx][1] & 0xff);
1702 +                       }
1703 +               }
1704 +       } else {
1705 +               msg_slice(de, cmd_slice);
1706 +       }
1707 +
1708 +       msg_slice(de,
1709 +                 (sh->slice_beta_offset_div2 & 15) |
1710 +                 ((sh->slice_tc_offset_div2 & 15) << 4) |
1711 +                 ((sh->flags &
1712 +                   V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED) ?
1713 +                                               1 << 8 : 0) |
1714 +                 ((sh->flags &
1715 +                         V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED) ?
1716 +                                               1 << 9 : 0) |
1717 +                 ((s->pps.flags &
1718 +                         V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED) ?
1719 +                                               1 << 10 : 0));
1720 +
1721 +       msg_slice(de, ((sh->slice_cr_qp_offset & 31) << 5) +
1722 +                      (sh->slice_cb_qp_offset & 31)); // CMD_QPOFF
1723 +}
1724 +
1725 +//////////////////////////////////////////////////////////////////////////////
1726 +// Write STATUS register with expected end CTU address of previous slice
1727 +
1728 +static void end_previous_slice(struct rpivid_dec_env *const de,
1729 +                              const struct rpivid_dec_state *const s,
1730 +                              const int ctb_addr_ts)
1731 +{
1732 +       int last_x =
1733 +               s->ctb_addr_ts_to_rs[ctb_addr_ts - 1] % de->pic_width_in_ctbs_y;
1734 +       int last_y =
1735 +               s->ctb_addr_ts_to_rs[ctb_addr_ts - 1] / de->pic_width_in_ctbs_y;
1736 +
1737 +       p1_apb_write(de, RPI_STATUS, 1 + (last_x << 5) + (last_y << 18));
1738 +}
1739 +
1740 +static void wpp_pause(struct rpivid_dec_env *const de, int ctb_row)
1741 +{
1742 +       p1_apb_write(de, RPI_STATUS, (ctb_row << 18) + 0x25);
1743 +       p1_apb_write(de, RPI_TRANSFER, PROB_BACKUP);
1744 +       p1_apb_write(de, RPI_MODE,
1745 +                    ctb_row == de->pic_height_in_ctbs_y - 1 ?
1746 +                                                       0x70000 : 0x30000);
1747 +       p1_apb_write(de, RPI_CONTROL, (ctb_row << 16) + 2);
1748 +}
1749 +
1750 +static void wpp_end_previous_slice(struct rpivid_dec_env *const de,
1751 +                                  const struct rpivid_dec_state *const s,
1752 +                                  int ctb_addr_ts)
1753 +{
1754 +       int new_x = s->sh->slice_segment_addr % de->pic_width_in_ctbs_y;
1755 +       int new_y = s->sh->slice_segment_addr / de->pic_width_in_ctbs_y;
1756 +       int last_x =
1757 +               s->ctb_addr_ts_to_rs[ctb_addr_ts - 1] % de->pic_width_in_ctbs_y;
1758 +       int last_y =
1759 +               s->ctb_addr_ts_to_rs[ctb_addr_ts - 1] / de->pic_width_in_ctbs_y;
1760 +
1761 +       if (de->wpp_entry_x < 2 && (de->wpp_entry_y < new_y || new_x > 2) &&
1762 +           de->pic_width_in_ctbs_y > 2)
1763 +               wpp_pause(de, last_y);
1764 +       p1_apb_write(de, RPI_STATUS, 1 + (last_x << 5) + (last_y << 18));
1765 +       if (new_x == 2 || (de->pic_width_in_ctbs_y == 2 &&
1766 +                          de->wpp_entry_y < new_y))
1767 +               p1_apb_write(de, RPI_TRANSFER, PROB_BACKUP);
1768 +}
1769 +
1770 +//////////////////////////////////////////////////////////////////////////////
1771 +// Wavefront mode
1772 +
1773 +static void wpp_entry_point(struct rpivid_dec_env *const de,
1774 +                           const struct rpivid_dec_state *const s,
1775 +                           const int do_bte,
1776 +                           const int reset_qp_y, const int ctb_addr_ts)
1777 +{
1778 +       int ctb_size = 1 << s->log2_ctb_size;
1779 +       int ctb_addr_rs = s->ctb_addr_ts_to_rs[ctb_addr_ts];
1780 +
1781 +       int ctb_col = de->wpp_entry_x = ctb_addr_rs % de->pic_width_in_ctbs_y;
1782 +       int ctb_row = de->wpp_entry_y = ctb_addr_rs / de->pic_width_in_ctbs_y;
1783 +
1784 +       int endx = de->pic_width_in_ctbs_y - 1;
1785 +       int endy = ctb_row;
1786 +
1787 +       u8 slice_w = ctb_to_slice_w_h(ctb_col, ctb_size,
1788 +                                     s->sps.pic_width_in_luma_samples,
1789 +                                     s->col_bd, s->num_tile_columns);
1790 +       u8 slice_h = ctb_to_slice_w_h(ctb_row, ctb_size,
1791 +                                     s->sps.pic_height_in_luma_samples,
1792 +                                     s->row_bd, s->num_tile_rows);
1793 +
1794 +       p1_apb_write(de, RPI_TILESTART, 0);
1795 +       p1_apb_write(de, RPI_TILEEND, endx + (endy << 16));
1796 +
1797 +       if (do_bte)
1798 +               p1_apb_write(de, RPI_BEGINTILEEND, endx + (endy << 16));
1799 +
1800 +       write_slice(de, s, slice_w,
1801 +                   ctb_row == de->pic_height_in_ctbs_y - 1 ?
1802 +                                                       slice_h : ctb_size);
1803 +
1804 +       if (reset_qp_y) {
1805 +               unsigned int sps_qp_bd_offset =
1806 +                       6 * s->sps.bit_depth_luma_minus8;
1807 +
1808 +               p1_apb_write(de, RPI_QP, sps_qp_bd_offset + s->slice_qp);
1809 +       }
1810 +
1811 +       p1_apb_write(de, RPI_MODE,
1812 +                    ctb_row == de->pic_height_in_ctbs_y - 1 ?
1813 +                                                       0x60001 : 0x20001);
1814 +       p1_apb_write(de, RPI_CONTROL, (ctb_col << 0) + (ctb_row << 16));
1815 +}
1816 +
1817 +//////////////////////////////////////////////////////////////////////////////
1818 +// Wavefront mode
1819 +
1820 +static void wpp_decode_slice(struct rpivid_dec_env *const de,
1821 +                            const struct rpivid_dec_state *const s,
1822 +                            const struct v4l2_ctrl_hevc_slice_params *sh,
1823 +                            int ctb_addr_ts)
1824 +{
1825 +       int i, reset_qp_y = 1;
1826 +       int indep = !s->dependent_slice_segment_flag;
1827 +       int ctb_col = s->sh->slice_segment_addr % de->pic_width_in_ctbs_y;
1828 +
1829 +       if (ctb_addr_ts)
1830 +               wpp_end_previous_slice(de, s, ctb_addr_ts);
1831 +       pre_slice_decode(de, s);
1832 +       write_bitstream(de, s);
1833 +       if (ctb_addr_ts == 0 || indep || de->pic_width_in_ctbs_y == 1)
1834 +               write_prob(de, s);
1835 +       else if (ctb_col == 0)
1836 +               p1_apb_write(de, RPI_TRANSFER, PROB_RELOAD);
1837 +       else
1838 +               reset_qp_y = 0;
1839 +       program_slicecmds(de, s->slice_idx);
1840 +       new_slice_segment(de, s);
1841 +       wpp_entry_point(de, s, indep, reset_qp_y, ctb_addr_ts);
1842 +
1843 +       for (i = 0; i < s->sh->num_entry_point_offsets; i++) {
1844 +               int ctb_addr_rs = s->ctb_addr_ts_to_rs[ctb_addr_ts];
1845 +               int ctb_row = ctb_addr_rs / de->pic_width_in_ctbs_y;
1846 +               int last_x = de->pic_width_in_ctbs_y - 1;
1847 +
1848 +               if (de->pic_width_in_ctbs_y > 2)
1849 +                       wpp_pause(de, ctb_row);
1850 +               p1_apb_write(de, RPI_STATUS,
1851 +                            (ctb_row << 18) + (last_x << 5) + 2);
1852 +               if (de->pic_width_in_ctbs_y == 2)
1853 +                       p1_apb_write(de, RPI_TRANSFER, PROB_BACKUP);
1854 +               if (de->pic_width_in_ctbs_y == 1)
1855 +                       write_prob(de, s);
1856 +               else
1857 +                       p1_apb_write(de, RPI_TRANSFER, PROB_RELOAD);
1858 +               ctb_addr_ts += s->column_width[0];
1859 +               wpp_entry_point(de, s, 0, 1, ctb_addr_ts);
1860 +       }
1861 +}
1862 +
1863 +//////////////////////////////////////////////////////////////////////////////
1864 +// Tiles mode
1865 +
1866 +static void decode_slice(struct rpivid_dec_env *const de,
1867 +                        const struct rpivid_dec_state *const s,
1868 +                        const struct v4l2_ctrl_hevc_slice_params *const sh,
1869 +                        int ctb_addr_ts)
1870 +{
1871 +       int i, reset_qp_y;
1872 +
1873 +       if (ctb_addr_ts)
1874 +               end_previous_slice(de, s, ctb_addr_ts);
1875 +
1876 +       pre_slice_decode(de, s);
1877 +       write_bitstream(de, s);
1878 +
1879 +#if DEBUG_TRACE_P1_CMD
1880 +       if (p1_z < 256) {
1881 +               v4l2_info(&de->ctx->dev->v4l2_dev,
1882 +                         "TS=%d, tile=%d/%d, dss=%d, flags=%#llx\n",
1883 +                         ctb_addr_ts, s->tile_id[ctb_addr_ts],
1884 +                         s->tile_id[ctb_addr_ts - 1],
1885 +                         s->dependent_slice_segment_flag, sh->flags);
1886 +       }
1887 +#endif
1888 +
1889 +       reset_qp_y = ctb_addr_ts == 0 ||
1890 +                  s->tile_id[ctb_addr_ts] != s->tile_id[ctb_addr_ts - 1] ||
1891 +                  !s->dependent_slice_segment_flag;
1892 +       if (reset_qp_y)
1893 +               write_prob(de, s);
1894 +
1895 +       program_slicecmds(de, s->slice_idx);
1896 +       new_slice_segment(de, s);
1897 +       new_entry_point(de, s, !s->dependent_slice_segment_flag, reset_qp_y,
1898 +                       ctb_addr_ts);
1899 +
1900 +       for (i = 0; i < s->sh->num_entry_point_offsets; i++) {
1901 +               int ctb_addr_rs = s->ctb_addr_ts_to_rs[ctb_addr_ts];
1902 +               int ctb_col = ctb_addr_rs % de->pic_width_in_ctbs_y;
1903 +               int ctb_row = ctb_addr_rs / de->pic_width_in_ctbs_y;
1904 +               int tile_x = ctb_to_tile(ctb_col, s->col_bd,
1905 +                                        s->num_tile_columns - 1);
1906 +               int tile_y =
1907 +                       ctb_to_tile(ctb_row, s->row_bd, s->num_tile_rows - 1);
1908 +               int last_x = s->col_bd[tile_x + 1] - 1;
1909 +               int last_y = s->row_bd[tile_y + 1] - 1;
1910 +
1911 +               p1_apb_write(de, RPI_STATUS,
1912 +                            2 + (last_x << 5) + (last_y << 18));
1913 +               write_prob(de, s);
1914 +               ctb_addr_ts += s->column_width[tile_x] * s->row_height[tile_y];
1915 +               new_entry_point(de, s, 0, 1, ctb_addr_ts);
1916 +       }
1917 +}
1918 +
1919 +//////////////////////////////////////////////////////////////////////////////
1920 +// Scaling factors
1921 +
1922 +static void expand_scaling_list(const unsigned int size_id,
1923 +                               const unsigned int matrix_id, u8 *const dst0,
1924 +                               const u8 *const src0, uint8_t dc)
1925 +{
1926 +       u8 *d;
1927 +       unsigned int x, y;
1928 +
1929 +       // FIXME: matrix_id is unused ?
1930 +       switch (size_id) {
1931 +       case 0:
1932 +               memcpy(dst0, src0, 16);
1933 +               break;
1934 +       case 1:
1935 +               memcpy(dst0, src0, 64);
1936 +               break;
1937 +       case 2:
1938 +               d = dst0;
1939 +
1940 +               for (y = 0; y != 16; y++) {
1941 +                       const u8 *s = src0 + (y >> 1) * 8;
1942 +
1943 +                       for (x = 0; x != 8; ++x) {
1944 +                               *d++ = *s;
1945 +                               *d++ = *s++;
1946 +                       }
1947 +               }
1948 +               dst0[0] = dc;
1949 +               break;
1950 +       default:
1951 +               d = dst0;
1952 +
1953 +               for (y = 0; y != 32; y++) {
1954 +                       const u8 *s = src0 + (y >> 2) * 8;
1955 +
1956 +                       for (x = 0; x != 8; ++x) {
1957 +                               *d++ = *s;
1958 +                               *d++ = *s;
1959 +                               *d++ = *s;
1960 +                               *d++ = *s++;
1961 +                       }
1962 +               }
1963 +               dst0[0] = dc;
1964 +               break;
1965 +       }
1966 +}
1967 +
1968 +static void populate_scaling_factors(const struct rpivid_run *const run,
1969 +                                    struct rpivid_dec_env *const de,
1970 +                                    const struct rpivid_dec_state *const s)
1971 +{
1972 +       const struct v4l2_ctrl_hevc_scaling_matrix *const sl =
1973 +               run->h265.scaling_matrix;
1974 +       // Array of constants for scaling factors
1975 +       static const u32 scaling_factor_offsets[4][6] = {
1976 +               // MID0    MID1    MID2    MID3    MID4    MID5
1977 +               // SID0 (4x4)
1978 +               { 0x0000, 0x0010, 0x0020, 0x0030, 0x0040, 0x0050 },
1979 +               // SID1 (8x8)
1980 +               { 0x0060, 0x00A0, 0x00E0, 0x0120, 0x0160, 0x01A0 },
1981 +               // SID2 (16x16)
1982 +               { 0x01E0, 0x02E0, 0x03E0, 0x04E0, 0x05E0, 0x06E0 },
1983 +               // SID3 (32x32)
1984 +               { 0x07E0, 0x0BE0, 0x0000, 0x0000, 0x0000, 0x0000 }
1985 +       };
1986 +
1987 +       unsigned int mid;
1988 +
1989 +       for (mid = 0; mid < 6; mid++)
1990 +               expand_scaling_list(0, mid,
1991 +                                   de->scaling_factors +
1992 +                                           scaling_factor_offsets[0][mid],
1993 +                                   sl->scaling_list_4x4[mid], 0);
1994 +       for (mid = 0; mid < 6; mid++)
1995 +               expand_scaling_list(1, mid,
1996 +                                   de->scaling_factors +
1997 +                                           scaling_factor_offsets[1][mid],
1998 +                                   sl->scaling_list_8x8[mid], 0);
1999 +       for (mid = 0; mid < 6; mid++)
2000 +               expand_scaling_list(2, mid,
2001 +                                   de->scaling_factors +
2002 +                                           scaling_factor_offsets[2][mid],
2003 +                                   sl->scaling_list_16x16[mid],
2004 +                                   sl->scaling_list_dc_coef_16x16[mid]);
2005 +       for (mid = 0; mid < 2; mid += 1)
2006 +               expand_scaling_list(3, mid,
2007 +                                   de->scaling_factors +
2008 +                                           scaling_factor_offsets[3][mid],
2009 +                                   sl->scaling_list_32x32[mid],
2010 +                                   sl->scaling_list_dc_coef_32x32[mid]);
2011 +}
2012 +
2013 +static void free_ps_info(struct rpivid_dec_state *const s)
2014 +{
2015 +       kfree(s->ctb_addr_rs_to_ts);
2016 +       s->ctb_addr_rs_to_ts = NULL;
2017 +       kfree(s->ctb_addr_ts_to_rs);
2018 +       s->ctb_addr_ts_to_rs = NULL;
2019 +       kfree(s->tile_id);
2020 +       s->tile_id = NULL;
2021 +
2022 +       kfree(s->col_bd);
2023 +       s->col_bd = NULL;
2024 +       kfree(s->row_bd);
2025 +       s->row_bd = NULL;
2026 +}
2027 +
2028 +static int updated_ps(struct rpivid_dec_state *const s)
2029 +{
2030 +       unsigned int ctb_addr_rs;
2031 +       int j, x, y, tile_id;
2032 +       unsigned int i;
2033 +
2034 +       free_ps_info(s);
2035 +
2036 +       // Inferred parameters
2037 +       s->log2_ctb_size = s->sps.log2_min_luma_coding_block_size_minus3 + 3 +
2038 +                          s->sps.log2_diff_max_min_luma_coding_block_size;
2039 +
2040 +       s->ctb_width = (s->sps.pic_width_in_luma_samples +
2041 +                       (1 << s->log2_ctb_size) - 1) >>
2042 +                      s->log2_ctb_size;
2043 +       s->ctb_height = (s->sps.pic_height_in_luma_samples +
2044 +                        (1 << s->log2_ctb_size) - 1) >>
2045 +                       s->log2_ctb_size;
2046 +       s->ctb_size = s->ctb_width * s->ctb_height;
2047 +
2048 +       // Inferred parameters
2049 +
2050 +       if (!(s->pps.flags & V4L2_HEVC_PPS_FLAG_TILES_ENABLED)) {
2051 +               s->num_tile_columns = 1;
2052 +               s->num_tile_rows = 1;
2053 +               s->column_width[0] = s->ctb_width;
2054 +               s->row_height[0] = s->ctb_height;
2055 +       } else {
2056 +               s->num_tile_columns = s->pps.num_tile_columns_minus1 + 1;
2057 +               s->num_tile_rows = s->pps.num_tile_rows_minus1 + 1;
2058 +               for (i = 0; i < s->num_tile_columns; ++i)
2059 +                       s->column_width[i] = s->pps.column_width_minus1[i] + 1;
2060 +               for (i = 0; i < s->num_tile_rows; ++i)
2061 +                       s->row_height[i] = s->pps.row_height_minus1[i] + 1;
2062 +       }
2063 +
2064 +       s->col_bd = kmalloc((s->num_tile_columns + 1) * sizeof(*s->col_bd),
2065 +                           GFP_KERNEL);
2066 +       s->row_bd = kmalloc((s->num_tile_rows + 1) * sizeof(*s->row_bd),
2067 +                           GFP_KERNEL);
2068 +
2069 +       s->col_bd[0] = 0;
2070 +       for (i = 0; i < s->num_tile_columns; i++)
2071 +               s->col_bd[i + 1] = s->col_bd[i] + s->column_width[i];
2072 +
2073 +       s->row_bd[0] = 0;
2074 +       for (i = 0; i < s->num_tile_rows; i++)
2075 +               s->row_bd[i + 1] = s->row_bd[i] + s->row_height[i];
2076 +
2077 +       s->ctb_addr_rs_to_ts = kmalloc_array(s->ctb_size,
2078 +                                            sizeof(*s->ctb_addr_rs_to_ts),
2079 +                                            GFP_KERNEL);
2080 +       s->ctb_addr_ts_to_rs = kmalloc_array(s->ctb_size,
2081 +                                            sizeof(*s->ctb_addr_ts_to_rs),
2082 +                                            GFP_KERNEL);
2083 +       s->tile_id = kmalloc_array(s->ctb_size, sizeof(*s->tile_id),
2084 +                                  GFP_KERNEL);
2085 +
2086 +       for (ctb_addr_rs = 0; ctb_addr_rs < s->ctb_size; ctb_addr_rs++) {
2087 +               int tb_x = ctb_addr_rs % s->ctb_width;
2088 +               int tb_y = ctb_addr_rs / s->ctb_width;
2089 +               int tile_x = 0;
2090 +               int tile_y = 0;
2091 +               int val = 0;
2092 +
2093 +               for (i = 0; i < s->num_tile_columns; i++) {
2094 +                       if (tb_x < s->col_bd[i + 1]) {
2095 +                               tile_x = i;
2096 +                               break;
2097 +                       }
2098 +               }
2099 +
2100 +               for (i = 0; i < s->num_tile_rows; i++) {
2101 +                       if (tb_y < s->row_bd[i + 1]) {
2102 +                               tile_y = i;
2103 +                               break;
2104 +                       }
2105 +               }
2106 +
2107 +               for (i = 0; i < tile_x; i++)
2108 +                       val += s->row_height[tile_y] * s->column_width[i];
2109 +               for (i = 0; i < tile_y; i++)
2110 +                       val += s->ctb_width * s->row_height[i];
2111 +
2112 +               val += (tb_y - s->row_bd[tile_y]) * s->column_width[tile_x] +
2113 +                      tb_x - s->col_bd[tile_x];
2114 +
2115 +               s->ctb_addr_rs_to_ts[ctb_addr_rs] = val;
2116 +               s->ctb_addr_ts_to_rs[val] = ctb_addr_rs;
2117 +       }
2118 +
2119 +       for (j = 0, tile_id = 0; j < s->num_tile_rows; j++)
2120 +               for (i = 0; i < s->num_tile_columns; i++, tile_id++)
2121 +                       for (y = s->row_bd[j]; y < s->row_bd[j + 1]; y++)
2122 +                               for (x = s->col_bd[i];
2123 +                                    x < s->col_bd[i + 1];
2124 +                                    x++)
2125 +                                       s->tile_id[s->ctb_addr_rs_to_ts
2126 +                                                          [y * s->ctb_width +
2127 +                                                           x]] = tile_id;
2128 +
2129 +       return 0;
2130 +}
2131 +
2132 +static int frame_end(struct rpivid_dev *const dev,
2133 +                    struct rpivid_dec_env *const de,
2134 +                    const struct rpivid_dec_state *const s)
2135 +{
2136 +       const unsigned int last_x = s->col_bd[s->num_tile_columns] - 1;
2137 +       const unsigned int last_y = s->row_bd[s->num_tile_rows] - 1;
2138 +       size_t cmd_size;
2139 +
2140 +       if (s->pps.flags & V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED) {
2141 +               if (de->wpp_entry_x < 2 && de->pic_width_in_ctbs_y > 2)
2142 +                       wpp_pause(de, last_y);
2143 +       }
2144 +       p1_apb_write(de, RPI_STATUS, 1 + (last_x << 5) + (last_y << 18));
2145 +
2146 +       // Copy commands out to dma buf
2147 +       cmd_size = de->cmd_len * sizeof(de->cmd_fifo[0]);
2148 +
2149 +       if (!de->cmd_copy_gptr->ptr || cmd_size > de->cmd_copy_gptr->size) {
2150 +               size_t cmd_alloc = round_up_size(cmd_size);
2151 +
2152 +               if (gptr_realloc_new(dev, de->cmd_copy_gptr, cmd_alloc)) {
2153 +                       v4l2_err(&dev->v4l2_dev,
2154 +                                "Alloc cmd buffer (%d): FAILED\n", cmd_alloc);
2155 +                       return -ENOMEM;
2156 +               }
2157 +               v4l2_info(&dev->v4l2_dev, "Alloc cmd buffer (%d): OK\n",
2158 +                         cmd_alloc);
2159 +       }
2160 +
2161 +       memcpy(de->cmd_copy_gptr->ptr, de->cmd_fifo, cmd_size);
2162 +       return 0;
2163 +}
2164 +
2165 +static void setup_colmv(struct rpivid_ctx *const ctx, struct rpivid_run *run,
2166 +                       struct rpivid_dec_state *const s)
2167 +{
2168 +       ctx->colmv_stride = ALIGN(s->sps.pic_width_in_luma_samples, 64);
2169 +       ctx->colmv_picsize = ctx->colmv_stride *
2170 +               (ALIGN(s->sps.pic_height_in_luma_samples, 64) >> 4);
2171 +}
2172 +
2173 +// Can be called from irq context
2174 +static struct rpivid_dec_env *dec_env_new(struct rpivid_ctx *const ctx)
2175 +{
2176 +       struct rpivid_dec_env *de;
2177 +       unsigned long lock_flags;
2178 +
2179 +       spin_lock_irqsave(&ctx->dec_lock, lock_flags);
2180 +
2181 +       de = ctx->dec_free;
2182 +       if (de) {
2183 +               ctx->dec_free = de->next;
2184 +               de->next = NULL;
2185 +               de->state = RPIVID_DECODE_SLICE_START;
2186 +       }
2187 +
2188 +       spin_unlock_irqrestore(&ctx->dec_lock, lock_flags);
2189 +       return de;
2190 +}
2191 +
2192 +// Can be called from irq context
2193 +static void dec_env_delete(struct rpivid_dec_env *const de)
2194 +{
2195 +       struct rpivid_ctx * const ctx = de->ctx;
2196 +       unsigned long lock_flags;
2197 +
2198 +       aux_q_release(ctx, &de->frame_aux);
2199 +       aux_q_release(ctx, &de->col_aux);
2200 +
2201 +       spin_lock_irqsave(&ctx->dec_lock, lock_flags);
2202 +
2203 +       de->state = RPIVID_DECODE_END;
2204 +       de->next = ctx->dec_free;
2205 +       ctx->dec_free = de;
2206 +
2207 +       spin_unlock_irqrestore(&ctx->dec_lock, lock_flags);
2208 +}
2209 +
2210 +static void dec_env_uninit(struct rpivid_ctx *const ctx)
2211 +{
2212 +       unsigned int i;
2213 +
2214 +       if (ctx->dec_pool) {
2215 +               for (i = 0; i != RPIVID_DEC_ENV_COUNT; ++i) {
2216 +                       struct rpivid_dec_env *const de = ctx->dec_pool + i;
2217 +
2218 +                       kfree(de->cmd_fifo);
2219 +               }
2220 +
2221 +               kfree(ctx->dec_pool);
2222 +       }
2223 +
2224 +       ctx->dec_pool = NULL;
2225 +       ctx->dec_free = NULL;
2226 +}
2227 +
2228 +static int dec_env_init(struct rpivid_ctx *const ctx)
2229 +{
2230 +       unsigned int i;
2231 +
2232 +       ctx->dec_pool = kzalloc(sizeof(*ctx->dec_pool) * RPIVID_DEC_ENV_COUNT,
2233 +                               GFP_KERNEL);
2234 +       if (!ctx->dec_pool)
2235 +               return -1;
2236 +
2237 +       spin_lock_init(&ctx->dec_lock);
2238 +
2239 +       // Build free chain
2240 +       ctx->dec_free = ctx->dec_pool;
2241 +       for (i = 0; i != RPIVID_DEC_ENV_COUNT - 1; ++i)
2242 +               ctx->dec_pool[i].next = ctx->dec_pool + i + 1;
2243 +
2244 +       // Fill in other bits
2245 +       for (i = 0; i != RPIVID_DEC_ENV_COUNT; ++i) {
2246 +               struct rpivid_dec_env *const de = ctx->dec_pool + i;
2247 +
2248 +               de->ctx = ctx;
2249 +               de->decode_order = i;
2250 +               de->cmd_max = 1024;
2251 +               de->cmd_fifo = kmalloc_array(de->cmd_max,
2252 +                                            sizeof(struct rpi_cmd),
2253 +                                            GFP_KERNEL);
2254 +               if (!de->cmd_fifo)
2255 +                       goto fail;
2256 +       }
2257 +
2258 +       return 0;
2259 +
2260 +fail:
2261 +       dec_env_uninit(ctx);
2262 +       return -1;
2263 +}
2264 +
2265 +// Assume that we get exactly the same DPB for every slice
2266 +// it makes no real sense otherwise
2267 +#if V4L2_HEVC_DPB_ENTRIES_NUM_MAX > 16
2268 +#error HEVC_DPB_ENTRIES > h/w slots
2269 +#endif
2270 +
2271 +static u32 mk_config2(const struct rpivid_dec_state *const s)
2272 +{
2273 +       const struct v4l2_ctrl_hevc_sps *const sps = &s->sps;
2274 +       const struct v4l2_ctrl_hevc_pps *const pps = &s->pps;
2275 +       u32 c;
2276 +       // BitDepthY
2277 +       c = (sps->bit_depth_luma_minus8 + 8) << 0;
2278 +        // BitDepthC
2279 +       c |= (sps->bit_depth_chroma_minus8 + 8) << 4;
2280 +        // BitDepthY
2281 +       if (sps->bit_depth_luma_minus8)
2282 +               c |= BIT(8);
2283 +       // BitDepthC
2284 +       if (sps->bit_depth_chroma_minus8)
2285 +               c |= BIT(9);
2286 +       c |= s->log2_ctb_size << 10;
2287 +       if (pps->flags & V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED)
2288 +               c |= BIT(13);
2289 +       if (sps->flags & V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED)
2290 +               c |= BIT(14);
2291 +       if (sps->flags & V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED)
2292 +               c |= BIT(15); /* Write motion vectors to external memory */
2293 +       c |= (pps->log2_parallel_merge_level_minus2 + 2) << 16;
2294 +       if (s->slice_temporal_mvp)
2295 +               c |= BIT(19);
2296 +       if (sps->flags & V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED)
2297 +               c |= BIT(20);
2298 +       c |= (pps->pps_cb_qp_offset & 31) << 21;
2299 +       c |= (pps->pps_cr_qp_offset & 31) << 26;
2300 +       return c;
2301 +}
2302 +
2303 +static void rpivid_h265_setup(struct rpivid_ctx *ctx, struct rpivid_run *run)
2304 +{
2305 +       struct rpivid_dev *const dev = ctx->dev;
2306 +       const struct v4l2_ctrl_hevc_slice_params *const sh =
2307 +                                               run->h265.slice_params;
2308 +       const struct v4l2_hevc_pred_weight_table *pred_weight_table;
2309 +       struct rpivid_q_aux *dpb_q_aux[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
2310 +       struct rpivid_dec_state *const s = ctx->state;
2311 +       struct vb2_queue *vq;
2312 +       struct rpivid_dec_env *de;
2313 +       int ctb_addr_ts;
2314 +       unsigned int i;
2315 +       int use_aux;
2316 +       bool slice_temporal_mvp;
2317 +
2318 +       pred_weight_table = &sh->pred_weight_table;
2319 +
2320 +       s->frame_end =
2321 +               ((run->src->flags & V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF) == 0);
2322 +
2323 +       de = ctx->dec0;
2324 +       slice_temporal_mvp = (sh->flags &
2325 +                  V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED);
2326 +
2327 +       if (de && de->state != RPIVID_DECODE_END) {
2328 +               ++s->slice_idx;
2329 +
2330 +               switch (de->state) {
2331 +               case RPIVID_DECODE_SLICE_CONTINUE:
2332 +                       // Expected state
2333 +                       break;
2334 +               default:
2335 +                       v4l2_err(&dev->v4l2_dev, "%s: Unexpected state: %d\n",
2336 +                                __func__, de->state);
2337 +               /* FALLTHRU */
2338 +               case RPIVID_DECODE_ERROR_CONTINUE:
2339 +                       // Uncleared error - fail now
2340 +                       goto fail;
2341 +               }
2342 +
2343 +               if (s->slice_temporal_mvp != slice_temporal_mvp) {
2344 +                       v4l2_warn(&dev->v4l2_dev,
2345 +                                 "Slice Temporal MVP non-constant\n");
2346 +                       goto fail;
2347 +               }
2348 +       } else {
2349 +               /* Frame start */
2350 +               unsigned int ctb_size_y;
2351 +               bool sps_changed = false;
2352 +
2353 +               if (memcmp(&s->sps, run->h265.sps, sizeof(s->sps)) != 0) {
2354 +                       /* SPS changed */
2355 +                       v4l2_info(&dev->v4l2_dev, "SPS changed\n");
2356 +                       memcpy(&s->sps, run->h265.sps, sizeof(s->sps));
2357 +                       sps_changed = true;
2358 +               }
2359 +               if (sps_changed ||
2360 +                   memcmp(&s->pps, run->h265.pps, sizeof(s->pps)) != 0) {
2361 +                       /* SPS changed */
2362 +                       v4l2_info(&dev->v4l2_dev, "PPS changed\n");
2363 +                       memcpy(&s->pps, run->h265.pps, sizeof(s->pps));
2364 +
2365 +                       /* Recalc stuff as required */
2366 +                       updated_ps(s);
2367 +               }
2368 +
2369 +               de = dec_env_new(ctx);
2370 +               if (!de) {
2371 +                       v4l2_err(&dev->v4l2_dev,
2372 +                                "Failed to find free decode env\n");
2373 +                       goto fail;
2374 +               }
2375 +               ctx->dec0 = de;
2376 +
2377 +               ctb_size_y =
2378 +                       1U << (s->sps.log2_min_luma_coding_block_size_minus3 +
2379 +                              3 +
2380 +                              s->sps.log2_diff_max_min_luma_coding_block_size);
2381 +
2382 +               de->pic_width_in_ctbs_y =
2383 +                       (s->sps.pic_width_in_luma_samples + ctb_size_y - 1) /
2384 +                               ctb_size_y; // 7-15
2385 +               de->pic_height_in_ctbs_y =
2386 +                       (s->sps.pic_height_in_luma_samples + ctb_size_y - 1) /
2387 +                               ctb_size_y; // 7-17
2388 +               de->cmd_len = 0;
2389 +               de->dpbno_col = ~0U;
2390 +
2391 +               de->bit_copy_gptr = ctx->bitbufs + 0;
2392 +               de->bit_copy_len = 0;
2393 +               de->cmd_copy_gptr = ctx->cmdbufs + 0;
2394 +
2395 +               de->frame_c_offset = ctx->dst_fmt.height * 128;
2396 +               de->frame_stride = ctx->dst_fmt.bytesperline * 128;
2397 +               de->frame_addr =
2398 +                       vb2_dma_contig_plane_dma_addr(&run->dst->vb2_buf, 0);
2399 +               de->frame_aux = NULL;
2400 +
2401 +               if (s->sps.bit_depth_luma_minus8 !=
2402 +                   s->sps.bit_depth_chroma_minus8) {
2403 +                       v4l2_warn(&dev->v4l2_dev,
2404 +                                 "Chroma depth (%d) != Luma depth (%d)\n",
2405 +                                 s->sps.bit_depth_chroma_minus8 + 8,
2406 +                                 s->sps.bit_depth_luma_minus8 + 8);
2407 +                       goto fail;
2408 +               }
2409 +               if (s->sps.bit_depth_luma_minus8 == 0) {
2410 +                       if (ctx->dst_fmt.pixelformat !=
2411 +                                               V4L2_PIX_FMT_NV12_COL128) {
2412 +                               v4l2_err(&dev->v4l2_dev,
2413 +                                        "Pixel format %#x != NV12_COL128 for 8-bit output",
2414 +                                        ctx->dst_fmt.pixelformat);
2415 +                               goto fail;
2416 +                       }
2417 +               } else if (s->sps.bit_depth_luma_minus8 == 2) {
2418 +                       if (ctx->dst_fmt.pixelformat !=
2419 +                                               V4L2_PIX_FMT_NV12_10_COL128) {
2420 +                               v4l2_err(&dev->v4l2_dev,
2421 +                                        "Pixel format %#x != NV12_10_COL128 for 10-bit output",
2422 +                                        ctx->dst_fmt.pixelformat);
2423 +                               goto fail;
2424 +                       }
2425 +               } else {
2426 +                       v4l2_warn(&dev->v4l2_dev,
2427 +                                 "Luma depth (%d) unsupported\n",
2428 +                                 s->sps.bit_depth_luma_minus8 + 8);
2429 +                       goto fail;
2430 +               }
2431 +               if (run->dst->vb2_buf.num_planes != 1) {
2432 +                       v4l2_warn(&dev->v4l2_dev, "Capture planes (%d) != 1\n",
2433 +                                 run->dst->vb2_buf.num_planes);
2434 +                       goto fail;
2435 +               }
2436 +               if (run->dst->planes[0].length <
2437 +                   ctx->dst_fmt.sizeimage) {
2438 +                       v4l2_warn(&dev->v4l2_dev,
2439 +                                 "Capture plane[0] length (%d) < sizeimage (%d)\n",
2440 +                                 run->dst->planes[0].length,
2441 +                                 ctx->dst_fmt.sizeimage);
2442 +                       goto fail;
2443 +               }
2444 +
2445 +               if (s->sps.pic_width_in_luma_samples > 4096 ||
2446 +                   s->sps.pic_height_in_luma_samples > 4096) {
2447 +                       v4l2_warn(&dev->v4l2_dev,
2448 +                                 "Pic dimension (%dx%d) exeeds 4096\n",
2449 +                                 s->sps.pic_width_in_luma_samples,
2450 +                                 s->sps.pic_height_in_luma_samples);
2451 +                       goto fail;
2452 +               }
2453 +
2454 +               // Fill in ref planes with our address s.t. if we mess
2455 +               // up refs somehow then we still have a valid address
2456 +               // entry
2457 +               for (i = 0; i != 16; ++i)
2458 +                       de->ref_addrs[i] = de->frame_addr;
2459 +
2460 +               /*
2461 +                * Stash initial temporal_mvp flag
2462 +                * This must be the same for all pic slices (7.4.7.1)
2463 +                */
2464 +               s->slice_temporal_mvp = slice_temporal_mvp;
2465 +
2466 +               // Phase 2 reg pre-calc
2467 +               de->rpi_config2 = mk_config2(s);
2468 +               de->rpi_framesize = (s->sps.pic_height_in_luma_samples << 16) |
2469 +                                   s->sps.pic_width_in_luma_samples;
2470 +               de->rpi_currpoc = sh->slice_pic_order_cnt;
2471 +
2472 +               if (s->sps.flags &
2473 +                   V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED) {
2474 +                       setup_colmv(ctx, run, s);
2475 +               }
2476 +
2477 +               s->slice_idx = 0;
2478 +
2479 +               if (sh->slice_segment_addr != 0) {
2480 +                       v4l2_warn(&dev->v4l2_dev,
2481 +                                 "New frame but segment_addr=%d\n",
2482 +                                 sh->slice_segment_addr);
2483 +                       goto fail;
2484 +               }
2485 +
2486 +               /* Allocate a bitbuf if we need one - don't need one if single
2487 +                * slice as we can use the src buf directly
2488 +                */
2489 +               if (!s->frame_end && !de->bit_copy_gptr->ptr) {
2490 +                       const size_t wxh = s->sps.pic_width_in_luma_samples *
2491 +                               s->sps.pic_height_in_luma_samples;
2492 +                       size_t bits_alloc;
2493 +
2494 +                       /* Annex A gives a min compression of 2 @ lvl 3.1
2495 +                        * (wxh <= 983040) and min 4 thereafter but avoid
2496 +                        * the odity of 983041 having a lower limit than
2497 +                        * 983040.
2498 +                        * Multiply by 3/2 for 4:2:0
2499 +                        */
2500 +                       bits_alloc = wxh < 983040 ? wxh * 3 / 4 :
2501 +                               wxh < 983040 * 2 ? 983040 * 3 / 4 :
2502 +                               wxh * 3 / 8;
2503 +                       bits_alloc = round_up_size(bits_alloc);
2504 +
2505 +                       if (gptr_alloc(dev, de->bit_copy_gptr,
2506 +                                      bits_alloc,
2507 +                                      DMA_ATTR_FORCE_CONTIGUOUS) != 0) {
2508 +                               v4l2_err(&dev->v4l2_dev,
2509 +                                        "Unable to alloc buf (%d) for bit copy\n",
2510 +                                        bits_alloc);
2511 +                               goto fail;
2512 +                       }
2513 +                       v4l2_info(&dev->v4l2_dev,
2514 +                                 "Alloc buf (%d) for bit copy OK\n",
2515 +                                 bits_alloc);
2516 +               }
2517 +       }
2518 +
2519 +       // Pre calc a few things
2520 +       s->src_addr =
2521 +               !s->frame_end ?
2522 +                       0 :
2523 +                       vb2_dma_contig_plane_dma_addr(&run->src->vb2_buf, 0);
2524 +       s->src_buf = s->src_addr != 0 ? NULL :
2525 +                                       vb2_plane_vaddr(&run->src->vb2_buf, 0);
2526 +       if (!s->src_addr && !s->src_buf) {
2527 +               v4l2_err(&dev->v4l2_dev, "Failed to map src buffer\n");
2528 +               goto fail;
2529 +       }
2530 +
2531 +       s->sh = sh;
2532 +       s->slice_qp = 26 + s->pps.init_qp_minus26 + s->sh->slice_qp_delta;
2533 +       s->max_num_merge_cand = sh->slice_type == HEVC_SLICE_I ?
2534 +                                       0 :
2535 +                                       (5 - sh->five_minus_max_num_merge_cand);
2536 +       // * SH DSS flag invented by me - but clearly needed
2537 +       s->dependent_slice_segment_flag =
2538 +               ((sh->flags &
2539 +                 V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT) != 0);
2540 +
2541 +       s->nb_refs[0] = (sh->slice_type == HEVC_SLICE_I) ?
2542 +                               0 :
2543 +                               sh->num_ref_idx_l0_active_minus1 + 1;
2544 +       s->nb_refs[1] = (sh->slice_type != HEVC_SLICE_B) ?
2545 +                               0 :
2546 +                               sh->num_ref_idx_l1_active_minus1 + 1;
2547 +
2548 +       if (s->sps.flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED)
2549 +               populate_scaling_factors(run, de, s);
2550 +
2551 +       ctb_addr_ts = s->ctb_addr_rs_to_ts[sh->slice_segment_addr];
2552 +
2553 +       if ((s->pps.flags & V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED))
2554 +               wpp_decode_slice(de, s, sh, ctb_addr_ts);
2555 +       else
2556 +               decode_slice(de, s, sh, ctb_addr_ts);
2557 +
2558 +       if (!s->frame_end)
2559 +               return;
2560 +
2561 +       // Frame end
2562 +       memset(dpb_q_aux, 0,
2563 +              sizeof(*dpb_q_aux) * V4L2_HEVC_DPB_ENTRIES_NUM_MAX);
2564 +       /*
2565 +        * Need Aux ents for all (ref) DPB ents if temporal MV could
2566 +        * be enabled for any pic
2567 +        * ** At the moment we have aux ents for all pics whether or not
2568 +        *    they are ref
2569 +        */
2570 +       use_aux = ((s->sps.flags &
2571 +                 V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED) != 0);
2572 +
2573 +       // Locate ref frames
2574 +       // At least in the current implementation this is constant across all
2575 +       // slices. If this changes we will need idx mapping code.
2576 +       // Uses sh so here rather than trigger
2577 +
2578 +       vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
2579 +
2580 +       if (!vq) {
2581 +               v4l2_err(&dev->v4l2_dev, "VQ gone!\n");
2582 +               goto fail;
2583 +       }
2584 +
2585 +       //        v4l2_info(&dev->v4l2_dev, "rpivid_h265_end of frame\n");
2586 +       if (frame_end(dev, de, s))
2587 +               goto fail;
2588 +
2589 +       for (i = 0; i < sh->num_active_dpb_entries; ++i) {
2590 +               int buffer_index =
2591 +                       vb2_find_timestamp(vq, sh->dpb[i].timestamp, 0);
2592 +               struct vb2_buffer *buf = buffer_index < 0 ?
2593 +                                       NULL :
2594 +                                       vb2_get_buffer(vq, buffer_index);
2595 +
2596 +               if (!buf) {
2597 +                       v4l2_warn(&dev->v4l2_dev,
2598 +                                 "Missing DPB ent %d, timestamp=%lld, index=%d\n",
2599 +                                 i, (long long)sh->dpb[i].timestamp,
2600 +                                 buffer_index);
2601 +                       continue;
2602 +               }
2603 +
2604 +               if (use_aux) {
2605 +                       dpb_q_aux[i] = aux_q_ref(ctx,
2606 +                                                ctx->aux_ents[buffer_index]);
2607 +                       if (!dpb_q_aux[i])
2608 +                               v4l2_warn(&dev->v4l2_dev,
2609 +                                         "Missing DPB AUX ent %d index=%d\n",
2610 +                                         i, buffer_index);
2611 +               }
2612 +
2613 +               de->ref_addrs[i] =
2614 +                       vb2_dma_contig_plane_dma_addr(buf, 0);
2615 +       }
2616 +
2617 +       // Move DPB from temp
2618 +       for (i = 0; i != V4L2_HEVC_DPB_ENTRIES_NUM_MAX; ++i) {
2619 +               aux_q_release(ctx, &s->ref_aux[i]);
2620 +               s->ref_aux[i] = dpb_q_aux[i];
2621 +       }
2622 +       // Unref the old frame aux too - it is either in the DPB or not
2623 +       // now
2624 +       aux_q_release(ctx, &s->frame_aux);
2625 +
2626 +       if (use_aux) {
2627 +               // New frame so new aux ent
2628 +               // ??? Do we need this if non-ref ??? can we tell
2629 +               s->frame_aux = aux_q_new(ctx, run->dst->vb2_buf.index);
2630 +
2631 +               if (!s->frame_aux) {
2632 +                       v4l2_err(&dev->v4l2_dev,
2633 +                                "Failed to obtain aux storage for frame\n");
2634 +                       goto fail;
2635 +               }
2636 +
2637 +               de->frame_aux = aux_q_ref(ctx, s->frame_aux);
2638 +       }
2639 +
2640 +       if (de->dpbno_col != ~0U) {
2641 +               if (de->dpbno_col >= sh->num_active_dpb_entries) {
2642 +                       v4l2_err(&dev->v4l2_dev,
2643 +                                "Col ref index %d >= %d\n",
2644 +                                de->dpbno_col,
2645 +                                sh->num_active_dpb_entries);
2646 +               } else {
2647 +                       // Standard requires that the col pic is
2648 +                       // constant for the duration of the pic
2649 +                       // (text of collocated_ref_idx in H265-2 2018
2650 +                       // 7.4.7.1)
2651 +
2652 +                       // Spot the collocated ref in passing
2653 +                       de->col_aux = aux_q_ref(ctx,
2654 +                                               dpb_q_aux[de->dpbno_col]);
2655 +
2656 +                       if (!de->col_aux) {
2657 +                               v4l2_warn(&dev->v4l2_dev,
2658 +                                         "Missing DPB ent for col\n");
2659 +                               // Probably need to abort if this fails
2660 +                               // as P2 may explode on bad data
2661 +                               goto fail;
2662 +                       }
2663 +               }
2664 +       }
2665 +
2666 +       de->state = RPIVID_DECODE_PHASE1;
2667 +       return;
2668 +
2669 +fail:
2670 +       if (de)
2671 +               // Actual error reporting happens in Trigger
2672 +               de->state = s->frame_end ? RPIVID_DECODE_ERROR_DONE :
2673 +                                          RPIVID_DECODE_ERROR_CONTINUE;
2674 +}
2675 +
2676 +//////////////////////////////////////////////////////////////////////////////
2677 +// Handle PU and COEFF stream overflow
2678 +
2679 +// Returns:
2680 +// -1  Phase 1 decode error
2681 +//  0  OK
2682 +// >0  Out of space (bitmask)
2683 +
2684 +#define STATUS_COEFF_EXHAUSTED 8
2685 +#define STATUS_PU_EXHAUSTED    16
2686 +
2687 +static int check_status(const struct rpivid_dev *const dev)
2688 +{
2689 +       const u32 cfstatus = apb_read(dev, RPI_CFSTATUS);
2690 +       const u32 cfnum = apb_read(dev, RPI_CFNUM);
2691 +       u32 status = apb_read(dev, RPI_STATUS);
2692 +
2693 +       // Handle PU and COEFF stream overflow
2694 +
2695 +       // this is the definition of successful completion of phase 1
2696 +       // it assures that status register is zero and all blocks in each tile
2697 +       // have completed
2698 +       if (cfstatus == cfnum)
2699 +               return 0;       //No error
2700 +
2701 +       status &= (STATUS_PU_EXHAUSTED | STATUS_COEFF_EXHAUSTED);
2702 +       if (status)
2703 +               return status;
2704 +
2705 +       return -1;
2706 +}
2707 +
2708 +static void cb_phase2(struct rpivid_dev *const dev, void *v)
2709 +{
2710 +       struct rpivid_dec_env *const de = v;
2711 +       struct rpivid_ctx *const ctx = de->ctx;
2712 +
2713 +       xtrace_in(dev, de);
2714 +
2715 +       v4l2_m2m_cap_buf_return(dev->m2m_dev, ctx->fh.m2m_ctx, de->frame_buf,
2716 +                               VB2_BUF_STATE_DONE);
2717 +       de->frame_buf = NULL;
2718 +
2719 +       /* Delete de before finish as finish might immediately trigger a reuse
2720 +        * of de
2721 +        */
2722 +       dec_env_delete(de);
2723 +
2724 +       if (atomic_add_return(-1, &ctx->p2out) >= RPIVID_P2BUF_COUNT - 1) {
2725 +               xtrace_fin(dev, de);
2726 +               v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx,
2727 +                                                VB2_BUF_STATE_DONE);
2728 +       }
2729 +
2730 +       xtrace_ok(dev, de);
2731 +}
2732 +
2733 +static void phase2_claimed(struct rpivid_dev *const dev, void *v)
2734 +{
2735 +       struct rpivid_dec_env *const de = v;
2736 +       unsigned int i;
2737 +
2738 +       xtrace_in(dev, de);
2739 +
2740 +       apb_write_vc_addr(dev, RPI_PURBASE, de->pu_base_vc);
2741 +       apb_write_vc_len(dev, RPI_PURSTRIDE, de->pu_stride);
2742 +       apb_write_vc_addr(dev, RPI_COEFFRBASE, de->coeff_base_vc);
2743 +       apb_write_vc_len(dev, RPI_COEFFRSTRIDE, de->coeff_stride);
2744 +
2745 +       apb_write_vc_addr(dev, RPI_OUTYBASE, de->frame_addr);
2746 +       apb_write_vc_addr(dev, RPI_OUTCBASE,
2747 +                         de->frame_addr + de->frame_c_offset);
2748 +       apb_write_vc_len(dev, RPI_OUTYSTRIDE, de->frame_stride);
2749 +       apb_write_vc_len(dev, RPI_OUTCSTRIDE, de->frame_stride);
2750 +
2751 +       //    v4l2_info(&dev->v4l2_dev, "Frame: Y=%llx, C=%llx, Stride=%x\n",
2752 +       //              de->frame_addr, de->frame_addr + de->frame_c_offset,
2753 +       //              de->frame_stride);
2754 +
2755 +       for (i = 0; i < 16; i++) {
2756 +               // Strides are in fact unused but fill in anyway
2757 +               apb_write_vc_addr(dev, 0x9000 + 16 * i, de->ref_addrs[i]);
2758 +               apb_write_vc_len(dev, 0x9004 + 16 * i, de->frame_stride);
2759 +               apb_write_vc_addr(dev, 0x9008 + 16 * i,
2760 +                                 de->ref_addrs[i] + de->frame_c_offset);
2761 +               apb_write_vc_len(dev, 0x900C + 16 * i, de->frame_stride);
2762 +       }
2763 +
2764 +       apb_write(dev, RPI_CONFIG2, de->rpi_config2);
2765 +       apb_write(dev, RPI_FRAMESIZE, de->rpi_framesize);
2766 +       apb_write(dev, RPI_CURRPOC, de->rpi_currpoc);
2767 +       //    v4l2_info(&dev->v4l2_dev, "Config2=%#x, FrameSize=%#x, POC=%#x\n",
2768 +       //    de->rpi_config2, de->rpi_framesize, de->rpi_currpoc);
2769 +
2770 +       // collocated reads/writes
2771 +       apb_write_vc_len(dev, RPI_COLSTRIDE,
2772 +                        de->ctx->colmv_stride); // Read vals
2773 +       apb_write_vc_len(dev, RPI_MVSTRIDE,
2774 +                        de->ctx->colmv_stride); // Write vals
2775 +       apb_write_vc_addr(dev, RPI_MVBASE,
2776 +                         !de->frame_aux ? 0 : de->frame_aux->col.addr);
2777 +       apb_write_vc_addr(dev, RPI_COLBASE,
2778 +                         !de->col_aux ? 0 : de->col_aux->col.addr);
2779 +
2780 +       //v4l2_info(&dev->v4l2_dev,
2781 +       //         "Mv=%llx, Col=%llx, Stride=%x, Buf=%llx->%llx\n",
2782 +       //         de->rpi_mvbase, de->rpi_colbase, de->ctx->colmv_stride,
2783 +       //         de->ctx->colmvbuf.addr, de->ctx->colmvbuf.addr +
2784 +       //         de->ctx->colmvbuf.size);
2785 +
2786 +       rpivid_hw_irq_active2_irq(dev, &de->irq_ent, cb_phase2, de);
2787 +
2788 +       apb_write_final(dev, RPI_NUMROWS, de->pic_height_in_ctbs_y);
2789 +
2790 +       xtrace_ok(dev, de);
2791 +}
2792 +
2793 +static void phase1_claimed(struct rpivid_dev *const dev, void *v);
2794 +
2795 +static void phase1_thread(struct rpivid_dev *const dev, void *v)
2796 +{
2797 +       struct rpivid_dec_env *const de = v;
2798 +       struct rpivid_ctx *const ctx = de->ctx;
2799 +
2800 +       struct rpivid_gptr *const pu_gptr = ctx->pu_bufs + ctx->p2idx;
2801 +       struct rpivid_gptr *const coeff_gptr = ctx->coeff_bufs + ctx->p2idx;
2802 +
2803 +       xtrace_in(dev, de);
2804 +
2805 +       if (de->p1_status & STATUS_PU_EXHAUSTED) {
2806 +               if (gptr_realloc_new(dev, pu_gptr, next_size(pu_gptr->size))) {
2807 +                       v4l2_err(&dev->v4l2_dev,
2808 +                                "%s: PU realloc (%#x) failed\n",
2809 +                                __func__, pu_gptr->size);
2810 +                       goto fail;
2811 +               }
2812 +               v4l2_info(&dev->v4l2_dev, "%s: PU realloc (%#x) OK\n",
2813 +                         __func__, pu_gptr->size);
2814 +       }
2815 +
2816 +       if (de->p1_status & STATUS_COEFF_EXHAUSTED) {
2817 +               if (gptr_realloc_new(dev, coeff_gptr,
2818 +                                    next_size(coeff_gptr->size))) {
2819 +                       v4l2_err(&dev->v4l2_dev,
2820 +                                "%s: Coeff realloc (%#x) failed\n",
2821 +                                __func__, coeff_gptr->size);
2822 +                       goto fail;
2823 +               }
2824 +               v4l2_info(&dev->v4l2_dev, "%s: Coeff realloc (%#x) OK\n",
2825 +                         __func__, coeff_gptr->size);
2826 +       }
2827 +
2828 +       phase1_claimed(dev, de);
2829 +       xtrace_ok(dev, de);
2830 +       return;
2831 +
2832 +fail:
2833 +       dec_env_delete(de);
2834 +       xtrace_fin(dev, de);
2835 +       v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx,
2836 +                                        VB2_BUF_STATE_ERROR);
2837 +       xtrace_fail(dev, de);
2838 +}
2839 +
2840 +/* Always called in irq context (this is good) */
2841 +static void cb_phase1(struct rpivid_dev *const dev, void *v)
2842 +{
2843 +       struct rpivid_dec_env *const de = v;
2844 +       struct rpivid_ctx *const ctx = de->ctx;
2845 +
2846 +       xtrace_in(dev, de);
2847 +
2848 +       de->p1_status = check_status(dev);
2849 +       if (de->p1_status != 0) {
2850 +               v4l2_info(&dev->v4l2_dev, "%s: Post wait: %#x\n",
2851 +                         __func__, de->p1_status);
2852 +
2853 +               if (de->p1_status < 0)
2854 +                       goto fail;
2855 +
2856 +               /* Need to realloc - push onto a thread rather than IRQ */
2857 +               rpivid_hw_irq_active1_thread(dev, &de->irq_ent,
2858 +                                            phase1_thread, de);
2859 +               return;
2860 +       }
2861 +
2862 +       /* After the frame-buf is detached it must be returned but from
2863 +        * this point onward (phase2_claimed, cb_phase2) there are no error
2864 +        * paths so the return at the end of cb_phase2 is all that is needed
2865 +        */
2866 +       de->frame_buf = v4l2_m2m_cap_buf_detach(dev->m2m_dev, ctx->fh.m2m_ctx);
2867 +       if (!de->frame_buf) {
2868 +               v4l2_err(&dev->v4l2_dev, "%s: No detached buffer\n", __func__);
2869 +               goto fail;
2870 +       }
2871 +
2872 +       ctx->p2idx =
2873 +               (ctx->p2idx + 1 >= RPIVID_P2BUF_COUNT) ? 0 : ctx->p2idx + 1;
2874 +
2875 +       // Enable the next setup if our Q isn't too big
2876 +       if (atomic_add_return(1, &ctx->p2out) < RPIVID_P2BUF_COUNT) {
2877 +               xtrace_fin(dev, de);
2878 +               v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx,
2879 +                                                VB2_BUF_STATE_DONE);
2880 +       }
2881 +
2882 +       rpivid_hw_irq_active2_claim(dev, &de->irq_ent, phase2_claimed, de);
2883 +
2884 +       xtrace_ok(dev, de);
2885 +       return;
2886 +
2887 +fail:
2888 +       dec_env_delete(de);
2889 +       xtrace_fin(dev, de);
2890 +       v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx,
2891 +                                        VB2_BUF_STATE_ERROR);
2892 +       xtrace_fail(dev, de);
2893 +}
2894 +
2895 +static void phase1_claimed(struct rpivid_dev *const dev, void *v)
2896 +{
2897 +       struct rpivid_dec_env *const de = v;
2898 +       struct rpivid_ctx *const ctx = de->ctx;
2899 +
2900 +       const struct rpivid_gptr * const pu_gptr = ctx->pu_bufs + ctx->p2idx;
2901 +       const struct rpivid_gptr * const coeff_gptr = ctx->coeff_bufs +
2902 +                                                     ctx->p2idx;
2903 +
2904 +       xtrace_in(dev, de);
2905 +
2906 +       de->pu_base_vc = pu_gptr->addr;
2907 +       de->pu_stride =
2908 +               ALIGN_DOWN(pu_gptr->size / de->pic_height_in_ctbs_y, 64);
2909 +
2910 +       de->coeff_base_vc = coeff_gptr->addr;
2911 +       de->coeff_stride =
2912 +               ALIGN_DOWN(coeff_gptr->size / de->pic_height_in_ctbs_y, 64);
2913 +
2914 +       apb_write_vc_addr(dev, RPI_PUWBASE, de->pu_base_vc);
2915 +       apb_write_vc_len(dev, RPI_PUWSTRIDE, de->pu_stride);
2916 +       apb_write_vc_addr(dev, RPI_COEFFWBASE, de->coeff_base_vc);
2917 +       apb_write_vc_len(dev, RPI_COEFFWSTRIDE, de->coeff_stride);
2918 +
2919 +       // Trigger command FIFO
2920 +       apb_write(dev, RPI_CFNUM, de->cmd_len);
2921 +
2922 +       // Claim irq
2923 +       rpivid_hw_irq_active1_irq(dev, &de->irq_ent, cb_phase1, de);
2924 +
2925 +       // And start the h/w
2926 +       apb_write_vc_addr_final(dev, RPI_CFBASE, de->cmd_copy_gptr->addr);
2927 +
2928 +       xtrace_ok(dev, de);
2929 +}
2930 +
2931 +static void dec_state_delete(struct rpivid_ctx *const ctx)
2932 +{
2933 +       unsigned int i;
2934 +       struct rpivid_dec_state *const s = ctx->state;
2935 +
2936 +       if (!s)
2937 +               return;
2938 +       ctx->state = NULL;
2939 +
2940 +       free_ps_info(s);
2941 +
2942 +       for (i = 0; i != HEVC_MAX_REFS; ++i)
2943 +               aux_q_release(ctx, &s->ref_aux[i]);
2944 +       aux_q_release(ctx, &s->frame_aux);
2945 +
2946 +       kfree(s);
2947 +}
2948 +
2949 +static void rpivid_h265_stop(struct rpivid_ctx *ctx)
2950 +{
2951 +       struct rpivid_dev *const dev = ctx->dev;
2952 +       unsigned int i;
2953 +
2954 +       v4l2_info(&dev->v4l2_dev, "%s\n", __func__);
2955 +
2956 +       dec_env_uninit(ctx);
2957 +       dec_state_delete(ctx);
2958 +
2959 +       // dec_env & state must be killed before this to release the buffer to
2960 +       // the free pool
2961 +       aux_q_uninit(ctx);
2962 +
2963 +       for (i = 0; i != ARRAY_SIZE(ctx->bitbufs); ++i)
2964 +               gptr_free(dev, ctx->bitbufs + i);
2965 +       for (i = 0; i != ARRAY_SIZE(ctx->cmdbufs); ++i)
2966 +               gptr_free(dev, ctx->cmdbufs + i);
2967 +       for (i = 0; i != ARRAY_SIZE(ctx->pu_bufs); ++i)
2968 +               gptr_free(dev, ctx->pu_bufs + i);
2969 +       for (i = 0; i != ARRAY_SIZE(ctx->coeff_bufs); ++i)
2970 +               gptr_free(dev, ctx->coeff_bufs + i);
2971 +}
2972 +
2973 +static int rpivid_h265_start(struct rpivid_ctx *ctx)
2974 +{
2975 +       struct rpivid_dev *const dev = ctx->dev;
2976 +       unsigned int i;
2977 +
2978 +       unsigned int w = ctx->dst_fmt.width;
2979 +       unsigned int h = ctx->dst_fmt.height;
2980 +       unsigned int wxh;
2981 +       size_t pu_alloc;
2982 +       size_t coeff_alloc;
2983 +
2984 +       // Generate a sanitised WxH for memory alloc
2985 +       // Assume HD if unset
2986 +       if (w == 0)
2987 +               w = 1920;
2988 +       if (w > 4096)
2989 +               w = 4096;
2990 +       if (h == 0)
2991 +               w = 1088;
2992 +       if (h > 4096)
2993 +               h = 4096;
2994 +       wxh = w * h;
2995 +
2996 +       v4l2_info(&dev->v4l2_dev, "%s: (%dx%d)\n", __func__,
2997 +                 ctx->dst_fmt.width, ctx->dst_fmt.height);
2998 +
2999 +       ctx->dec0 = NULL;
3000 +       ctx->state = kzalloc(sizeof(*ctx->state), GFP_KERNEL);
3001 +       if (!ctx->state) {
3002 +               v4l2_err(&dev->v4l2_dev, "Failed to allocate decode state\n");
3003 +               goto fail;
3004 +       }
3005 +
3006 +       if (dec_env_init(ctx) != 0) {
3007 +               v4l2_err(&dev->v4l2_dev, "Failed to allocate decode envs\n");
3008 +               goto fail;
3009 +       }
3010 +
3011 +       // 16k is plenty for most purposes but we will realloc if needed
3012 +       for (i = 0; i != ARRAY_SIZE(ctx->cmdbufs); ++i) {
3013 +               if (gptr_alloc(dev, ctx->cmdbufs + i, 0x4000,
3014 +                              DMA_ATTR_FORCE_CONTIGUOUS))
3015 +                       goto fail;
3016 +       }
3017 +
3018 +       // Finger in the air PU & Coeff alloc
3019 +       // Will be realloced if too small
3020 +       coeff_alloc = round_up_size(wxh);
3021 +       pu_alloc = round_up_size(wxh / 4);
3022 +       for (i = 0; i != ARRAY_SIZE(ctx->pu_bufs); ++i) {
3023 +               // Don't actually need a kernel mapping here
3024 +               if (gptr_alloc(dev, ctx->pu_bufs + i, pu_alloc,
3025 +                              DMA_ATTR_FORCE_CONTIGUOUS |
3026 +                                       DMA_ATTR_NO_KERNEL_MAPPING))
3027 +                       goto fail;
3028 +               if (gptr_alloc(dev, ctx->coeff_bufs + i, coeff_alloc,
3029 +                              DMA_ATTR_FORCE_CONTIGUOUS |
3030 +                                       DMA_ATTR_NO_KERNEL_MAPPING))
3031 +                       goto fail;
3032 +       }
3033 +       aux_q_init(ctx);
3034 +
3035 +       return 0;
3036 +
3037 +fail:
3038 +       rpivid_h265_stop(ctx);
3039 +       return -ENOMEM;
3040 +}
3041 +
3042 +static void rpivid_h265_trigger(struct rpivid_ctx *ctx)
3043 +{
3044 +       struct rpivid_dev *const dev = ctx->dev;
3045 +       struct rpivid_dec_env *const de = ctx->dec0;
3046 +
3047 +       xtrace_in(dev, de);
3048 +
3049 +       switch (!de ? RPIVID_DECODE_ERROR_CONTINUE : de->state) {
3050 +       case RPIVID_DECODE_SLICE_START:
3051 +               de->state = RPIVID_DECODE_SLICE_CONTINUE;
3052 +       /* FALLTHRU */
3053 +       case RPIVID_DECODE_SLICE_CONTINUE:
3054 +               v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx,
3055 +                                                VB2_BUF_STATE_DONE);
3056 +               break;
3057 +       default:
3058 +               v4l2_err(&dev->v4l2_dev, "%s: Unexpected state: %d\n", __func__,
3059 +                        de->state);
3060 +       /* FALLTHRU */
3061 +       case RPIVID_DECODE_ERROR_DONE:
3062 +               ctx->dec0 = NULL;
3063 +               dec_env_delete(de);
3064 +       /* FALLTHRU */
3065 +       case RPIVID_DECODE_ERROR_CONTINUE:
3066 +               xtrace_fin(dev, de);
3067 +               v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx,
3068 +                                                VB2_BUF_STATE_ERROR);
3069 +               break;
3070 +       case RPIVID_DECODE_PHASE1:
3071 +               ctx->dec0 = NULL;
3072 +               rpivid_hw_irq_active1_claim(dev, &de->irq_ent, phase1_claimed,
3073 +                                           de);
3074 +               break;
3075 +       }
3076 +
3077 +       xtrace_ok(dev, de);
3078 +}
3079 +
3080 +struct rpivid_dec_ops rpivid_dec_ops_h265 = {
3081 +       .setup = rpivid_h265_setup,
3082 +       .start = rpivid_h265_start,
3083 +       .stop = rpivid_h265_stop,
3084 +       .trigger = rpivid_h265_trigger,
3085 +};
3086 --- /dev/null
3087 +++ b/drivers/staging/media/rpivid/rpivid_hw.c
3088 @@ -0,0 +1,321 @@
3089 +// SPDX-License-Identifier: GPL-2.0
3090 +/*
3091 + * Raspberry Pi HEVC driver
3092 + *
3093 + * Copyright (C) 2020 Raspberry Pi (Trading) Ltd
3094 + *
3095 + * Based on the Cedrus VPU driver, that is:
3096 + *
3097 + * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
3098 + * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
3099 + * Copyright (C) 2018 Bootlin
3100 + */
3101 +#include <linux/clk.h>
3102 +#include <linux/component.h>
3103 +#include <linux/dma-mapping.h>
3104 +#include <linux/interrupt.h>
3105 +#include <linux/io.h>
3106 +#include <linux/of_reserved_mem.h>
3107 +#include <linux/of_device.h>
3108 +#include <linux/of_platform.h>
3109 +#include <linux/platform_device.h>
3110 +#include <linux/regmap.h>
3111 +#include <linux/reset.h>
3112 +
3113 +#include <media/videobuf2-core.h>
3114 +#include <media/v4l2-mem2mem.h>
3115 +
3116 +#include "rpivid.h"
3117 +#include "rpivid_hw.h"
3118 +
3119 +static void pre_irq(struct rpivid_dev *dev, struct rpivid_hw_irq_ent *ient,
3120 +                   rpivid_irq_callback cb, void *v,
3121 +                   struct rpivid_hw_irq_ctrl *ictl)
3122 +{
3123 +       unsigned long flags;
3124 +
3125 +       if (ictl->irq) {
3126 +               v4l2_err(&dev->v4l2_dev, "Attempt to claim IRQ when already claimed\n");
3127 +               return;
3128 +       }
3129 +
3130 +       ient->cb = cb;
3131 +       ient->v = v;
3132 +
3133 +       // Not sure this lock is actually required
3134 +       spin_lock_irqsave(&ictl->lock, flags);
3135 +       ictl->irq = ient;
3136 +       spin_unlock_irqrestore(&ictl->lock, flags);
3137 +}
3138 +
3139 +static void sched_claim(struct rpivid_dev * const dev,
3140 +                       struct rpivid_hw_irq_ctrl * const ictl)
3141 +{
3142 +       for (;;) {
3143 +               struct rpivid_hw_irq_ent *ient = NULL;
3144 +               unsigned long flags;
3145 +
3146 +               spin_lock_irqsave(&ictl->lock, flags);
3147 +
3148 +               if (--ictl->no_sched <= 0) {
3149 +                       ient = ictl->claim;
3150 +                       if (!ictl->irq && ient) {
3151 +                               ictl->claim = ient->next;
3152 +                               ictl->no_sched = 1;
3153 +                       }
3154 +               }
3155 +
3156 +               spin_unlock_irqrestore(&ictl->lock, flags);
3157 +
3158 +               if (!ient)
3159 +                       break;
3160 +
3161 +               ient->cb(dev, ient->v);
3162 +       }
3163 +}
3164 +
3165 +/* Should only ever be called from its own IRQ cb so no lock required */
3166 +static void pre_thread(struct rpivid_dev *dev,
3167 +                      struct rpivid_hw_irq_ent *ient,
3168 +                      rpivid_irq_callback cb, void *v,
3169 +                      struct rpivid_hw_irq_ctrl *ictl)
3170 +{
3171 +       ient->cb = cb;
3172 +       ient->v = v;
3173 +       ictl->irq = ient;
3174 +       ictl->thread_reqed = true;
3175 +       ictl->no_sched++;
3176 +}
3177 +
3178 +// Called in irq context
3179 +static void do_irq(struct rpivid_dev * const dev,
3180 +                  struct rpivid_hw_irq_ctrl * const ictl)
3181 +{
3182 +       struct rpivid_hw_irq_ent *ient;
3183 +       unsigned long flags;
3184 +
3185 +       spin_lock_irqsave(&ictl->lock, flags);
3186 +       ient = ictl->irq;
3187 +       if (ient) {
3188 +               ictl->no_sched++;
3189 +               ictl->irq = NULL;
3190 +       }
3191 +       spin_unlock_irqrestore(&ictl->lock, flags);
3192 +
3193 +       if (ient) {
3194 +               ient->cb(dev, ient->v);
3195 +
3196 +               sched_claim(dev, ictl);
3197 +       }
3198 +}
3199 +
3200 +static void do_claim(struct rpivid_dev * const dev,
3201 +                    struct rpivid_hw_irq_ent *ient,
3202 +                    const rpivid_irq_callback cb, void * const v,
3203 +                    struct rpivid_hw_irq_ctrl * const ictl)
3204 +{
3205 +       unsigned long flags;
3206 +
3207 +       ient->next = NULL;
3208 +       ient->cb = cb;
3209 +       ient->v = v;
3210 +
3211 +       spin_lock_irqsave(&ictl->lock, flags);
3212 +
3213 +       if (ictl->claim) {
3214 +               // If we have a Q then add to end
3215 +               ictl->tail->next = ient;
3216 +               ictl->tail = ient;
3217 +               ient = NULL;
3218 +       } else if (ictl->no_sched || ictl->irq) {
3219 +               // Empty Q but other activity in progress so Q
3220 +               ictl->claim = ient;
3221 +               ictl->tail = ient;
3222 +               ient = NULL;
3223 +       } else {
3224 +               // Nothing else going on - schedule immediately and
3225 +               // prevent anything else scheduling claims
3226 +               ictl->no_sched = 1;
3227 +       }
3228 +
3229 +       spin_unlock_irqrestore(&ictl->lock, flags);
3230 +
3231 +       if (ient) {
3232 +               ient->cb(dev, ient->v);
3233 +
3234 +               sched_claim(dev, ictl);
3235 +       }
3236 +}
3237 +
3238 +static void ictl_init(struct rpivid_hw_irq_ctrl * const ictl)
3239 +{
3240 +       spin_lock_init(&ictl->lock);
3241 +       ictl->claim = NULL;
3242 +       ictl->tail = NULL;
3243 +       ictl->irq = NULL;
3244 +       ictl->no_sched = 0;
3245 +}
3246 +
3247 +static void ictl_uninit(struct rpivid_hw_irq_ctrl * const ictl)
3248 +{
3249 +       // Nothing to do
3250 +}
3251 +
3252 +#if !OPT_DEBUG_POLL_IRQ
3253 +static irqreturn_t rpivid_irq_irq(int irq, void *data)
3254 +{
3255 +       struct rpivid_dev * const dev = data;
3256 +       __u32 ictrl;
3257 +
3258 +       ictrl = irq_read(dev, ARG_IC_ICTRL);
3259 +       if (!(ictrl & ARG_IC_ICTRL_ALL_IRQ_MASK)) {
3260 +               v4l2_warn(&dev->v4l2_dev, "IRQ but no IRQ bits set\n");
3261 +               return IRQ_NONE;
3262 +       }
3263 +
3264 +       // Cancel any/all irqs
3265 +       irq_write(dev, ARG_IC_ICTRL, ictrl & ~ARG_IC_ICTRL_SET_ZERO_MASK);
3266 +
3267 +       // Service Active2 before Active1 so Phase 1 can transition to Phase 2
3268 +       // without delay
3269 +       if (ictrl & ARG_IC_ICTRL_ACTIVE2_INT_SET)
3270 +               do_irq(dev, &dev->ic_active2);
3271 +       if (ictrl & ARG_IC_ICTRL_ACTIVE1_INT_SET)
3272 +               do_irq(dev, &dev->ic_active1);
3273 +
3274 +       return dev->ic_active1.thread_reqed || dev->ic_active2.thread_reqed ?
3275 +               IRQ_WAKE_THREAD : IRQ_HANDLED;
3276 +}
3277 +
3278 +static void do_thread(struct rpivid_dev * const dev,
3279 +                     struct rpivid_hw_irq_ctrl *const ictl)
3280 +{
3281 +       unsigned long flags;
3282 +       struct rpivid_hw_irq_ent *ient = NULL;
3283 +
3284 +       spin_lock_irqsave(&ictl->lock, flags);
3285 +
3286 +       if (ictl->thread_reqed) {
3287 +               ient = ictl->irq;
3288 +               ictl->thread_reqed = false;
3289 +               ictl->irq = NULL;
3290 +       }
3291 +
3292 +       spin_unlock_irqrestore(&ictl->lock, flags);
3293 +
3294 +       if (ient) {
3295 +               ient->cb(dev, ient->v);
3296 +
3297 +               sched_claim(dev, ictl);
3298 +       }
3299 +}
3300 +
3301 +static irqreturn_t rpivid_irq_thread(int irq, void *data)
3302 +{
3303 +       struct rpivid_dev * const dev = data;
3304 +
3305 +       do_thread(dev, &dev->ic_active1);
3306 +       do_thread(dev, &dev->ic_active2);
3307 +
3308 +       return IRQ_HANDLED;
3309 +}
3310 +#endif
3311 +
3312 +/* May only be called from Active1 CB
3313 + * IRQs should not be expected until execution continues in the cb
3314 + */
3315 +void rpivid_hw_irq_active1_thread(struct rpivid_dev *dev,
3316 +                                 struct rpivid_hw_irq_ent *ient,
3317 +                                 rpivid_irq_callback thread_cb, void *ctx)
3318 +{
3319 +       pre_thread(dev, ient, thread_cb, ctx, &dev->ic_active1);
3320 +}
3321 +
3322 +void rpivid_hw_irq_active1_claim(struct rpivid_dev *dev,
3323 +                                struct rpivid_hw_irq_ent *ient,
3324 +                                rpivid_irq_callback ready_cb, void *ctx)
3325 +{
3326 +       do_claim(dev, ient, ready_cb, ctx, &dev->ic_active1);
3327 +}
3328 +
3329 +void rpivid_hw_irq_active1_irq(struct rpivid_dev *dev,
3330 +                              struct rpivid_hw_irq_ent *ient,
3331 +                              rpivid_irq_callback irq_cb, void *ctx)
3332 +{
3333 +       pre_irq(dev, ient, irq_cb, ctx, &dev->ic_active1);
3334 +}
3335 +
3336 +void rpivid_hw_irq_active2_claim(struct rpivid_dev *dev,
3337 +                                struct rpivid_hw_irq_ent *ient,
3338 +                                rpivid_irq_callback ready_cb, void *ctx)
3339 +{
3340 +       do_claim(dev, ient, ready_cb, ctx, &dev->ic_active2);
3341 +}
3342 +
3343 +void rpivid_hw_irq_active2_irq(struct rpivid_dev *dev,
3344 +                              struct rpivid_hw_irq_ent *ient,
3345 +                              rpivid_irq_callback irq_cb, void *ctx)
3346 +{
3347 +       pre_irq(dev, ient, irq_cb, ctx, &dev->ic_active2);
3348 +}
3349 +
3350 +int rpivid_hw_probe(struct rpivid_dev *dev)
3351 +{
3352 +       struct resource *res;
3353 +       __u32 irq_stat;
3354 +       int irq_dec;
3355 +       int ret = 0;
3356 +
3357 +       ictl_init(&dev->ic_active1);
3358 +       ictl_init(&dev->ic_active2);
3359 +
3360 +       res = platform_get_resource_byname(dev->pdev, IORESOURCE_MEM, "intc");
3361 +       if (!res)
3362 +               return -ENODEV;
3363 +
3364 +       dev->base_irq = devm_ioremap(dev->dev, res->start, resource_size(res));
3365 +       if (IS_ERR(dev->base_irq))
3366 +               return PTR_ERR(dev->base_irq);
3367 +
3368 +       res = platform_get_resource_byname(dev->pdev, IORESOURCE_MEM, "hevc");
3369 +       if (!res)
3370 +               return -ENODEV;
3371 +
3372 +       dev->base_h265 = devm_ioremap(dev->dev, res->start, resource_size(res));
3373 +       if (IS_ERR(dev->base_h265))
3374 +               return PTR_ERR(dev->base_h265);
3375 +
3376 +       dev->clock = devm_clk_get(&dev->pdev->dev, "hevc");
3377 +       if (IS_ERR(dev->clock))
3378 +               return PTR_ERR(dev->clock);
3379 +
3380 +       // Disable IRQs & reset anything pending
3381 +       irq_write(dev, 0,
3382 +                 ARG_IC_ICTRL_ACTIVE1_EN_SET | ARG_IC_ICTRL_ACTIVE2_EN_SET);
3383 +       irq_stat = irq_read(dev, 0);
3384 +       irq_write(dev, 0, irq_stat);
3385 +
3386 +#if !OPT_DEBUG_POLL_IRQ
3387 +       irq_dec = platform_get_irq(dev->pdev, 0);
3388 +       if (irq_dec <= 0)
3389 +               return irq_dec;
3390 +       ret = devm_request_threaded_irq(dev->dev, irq_dec,
3391 +                                       rpivid_irq_irq,
3392 +                                       rpivid_irq_thread,
3393 +                                       0, dev_name(dev->dev), dev);
3394 +       if (ret) {
3395 +               dev_err(dev->dev, "Failed to request IRQ - %d\n", ret);
3396 +
3397 +               return ret;
3398 +       }
3399 +#endif
3400 +       return ret;
3401 +}
3402 +
3403 +void rpivid_hw_remove(struct rpivid_dev *dev)
3404 +{
3405 +       // IRQ auto freed on unload so no need to do it here
3406 +       ictl_uninit(&dev->ic_active1);
3407 +       ictl_uninit(&dev->ic_active2);
3408 +}
3409 +
3410 --- /dev/null
3411 +++ b/drivers/staging/media/rpivid/rpivid_hw.h
3412 @@ -0,0 +1,300 @@
3413 +/* SPDX-License-Identifier: GPL-2.0 */
3414 +/*
3415 + * Raspberry Pi HEVC driver
3416 + *
3417 + * Copyright (C) 2020 Raspberry Pi (Trading) Ltd
3418 + *
3419 + * Based on the Cedrus VPU driver, that is:
3420 + *
3421 + * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
3422 + * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
3423 + * Copyright (C) 2018 Bootlin
3424 + */
3425 +
3426 +#ifndef _RPIVID_HW_H_
3427 +#define _RPIVID_HW_H_
3428 +
3429 +struct rpivid_hw_irq_ent {
3430 +       struct rpivid_hw_irq_ent *next;
3431 +       rpivid_irq_callback cb;
3432 +       void *v;
3433 +};
3434 +
3435 +/* Phase 1 Register offsets */
3436 +
3437 +#define RPI_SPS0 0
3438 +#define RPI_SPS1 4
3439 +#define RPI_PPS 8
3440 +#define RPI_SLICE 12
3441 +#define RPI_TILESTART 16
3442 +#define RPI_TILEEND 20
3443 +#define RPI_SLICESTART 24
3444 +#define RPI_MODE 28
3445 +#define RPI_LEFT0 32
3446 +#define RPI_LEFT1 36
3447 +#define RPI_LEFT2 40
3448 +#define RPI_LEFT3 44
3449 +#define RPI_QP 48
3450 +#define RPI_CONTROL 52
3451 +#define RPI_STATUS 56
3452 +#define RPI_VERSION 60
3453 +#define RPI_BFBASE 64
3454 +#define RPI_BFNUM 68
3455 +#define RPI_BFCONTROL 72
3456 +#define RPI_BFSTATUS 76
3457 +#define RPI_PUWBASE 80
3458 +#define RPI_PUWSTRIDE 84
3459 +#define RPI_COEFFWBASE 88
3460 +#define RPI_COEFFWSTRIDE 92
3461 +#define RPI_SLICECMDS 96
3462 +#define RPI_BEGINTILEEND 100
3463 +#define RPI_TRANSFER 104
3464 +#define RPI_CFBASE 108
3465 +#define RPI_CFNUM 112
3466 +#define RPI_CFSTATUS 116
3467 +
3468 +/* Phase 2 Register offsets */
3469 +
3470 +#define RPI_PURBASE 0x8000
3471 +#define RPI_PURSTRIDE 0x8004
3472 +#define RPI_COEFFRBASE 0x8008
3473 +#define RPI_COEFFRSTRIDE 0x800C
3474 +#define RPI_NUMROWS 0x8010
3475 +#define RPI_CONFIG2 0x8014
3476 +#define RPI_OUTYBASE 0x8018
3477 +#define RPI_OUTYSTRIDE 0x801C
3478 +#define RPI_OUTCBASE 0x8020
3479 +#define RPI_OUTCSTRIDE 0x8024
3480 +#define RPI_STATUS2 0x8028
3481 +#define RPI_FRAMESIZE 0x802C
3482 +#define RPI_MVBASE 0x8030
3483 +#define RPI_MVSTRIDE 0x8034
3484 +#define RPI_COLBASE 0x8038
3485 +#define RPI_COLSTRIDE 0x803C
3486 +#define RPI_CURRPOC 0x8040
3487 +
3488 +/*
3489 + * Write a general register value
3490 + * Order is unimportant
3491 + */
3492 +static inline void apb_write(const struct rpivid_dev * const dev,
3493 +                            const unsigned int offset, const u32 val)
3494 +{
3495 +       writel_relaxed(val, dev->base_h265 + offset);
3496 +}
3497 +
3498 +/* Write the final register value that actually starts the phase */
3499 +static inline void apb_write_final(const struct rpivid_dev * const dev,
3500 +                                  const unsigned int offset, const u32 val)
3501 +{
3502 +       writel(val, dev->base_h265 + offset);
3503 +}
3504 +
3505 +static inline u32 apb_read(const struct rpivid_dev * const dev,
3506 +                          const unsigned int offset)
3507 +{
3508 +       return readl(dev->base_h265 + offset);
3509 +}
3510 +
3511 +static inline void irq_write(const struct rpivid_dev * const dev,
3512 +                            const unsigned int offset, const u32 val)
3513 +{
3514 +       writel(val, dev->base_irq + offset);
3515 +}
3516 +
3517 +static inline u32 irq_read(const struct rpivid_dev * const dev,
3518 +                          const unsigned int offset)
3519 +{
3520 +       return readl(dev->base_irq + offset);
3521 +}
3522 +
3523 +static inline void apb_write_vc_addr(const struct rpivid_dev * const dev,
3524 +                                    const unsigned int offset,
3525 +                                    const dma_addr_t a)
3526 +{
3527 +       apb_write(dev, offset, (u32)(a >> 6));
3528 +}
3529 +
3530 +static inline void apb_write_vc_addr_final(const struct rpivid_dev * const dev,
3531 +                                          const unsigned int offset,
3532 +                                          const dma_addr_t a)
3533 +{
3534 +       apb_write_final(dev, offset, (u32)(a >> 6));
3535 +}
3536 +
3537 +static inline void apb_write_vc_len(const struct rpivid_dev * const dev,
3538 +                                   const unsigned int offset,
3539 +                                   const unsigned int x)
3540 +{
3541 +       apb_write(dev, offset, (x + 63) >> 6);
3542 +}
3543 +
3544 +/* *ARG_IC_ICTRL - Interrupt control for ARGON Core*
3545 + * Offset (byte space) = 40'h2b10000
3546 + * Physical Address (byte space) = 40'h7eb10000
3547 + * Verilog Macro Address = `ARG_IC_REG_START + `ARGON_INTCTRL_ICTRL
3548 + * Reset Value = 32'b100x100x_100xxxxx_xxxxxxx0_x100x100
3549 + * Access = RW (32-bit only)
3550 + * Interrupt control logic for ARGON Core.
3551 + */
3552 +#define ARG_IC_ICTRL 0
3553 +
3554 +/* acc=LWC ACTIVE1_INT FIELD ACCESS: LWC
3555 + *
3556 + * Interrupt 1
3557 + * This is set and held when an hevc_active1 interrupt edge is detected
3558 + * The polarity of the edge is set by the ACTIVE1_EDGE field
3559 + * Write a 1 to this bit to clear down the latched interrupt
3560 + * The latched interrupt is only enabled out onto the interrupt line if
3561 + * ACTIVE1_EN is set
3562 + * Reset value is *0* decimal.
3563 + */
3564 +#define ARG_IC_ICTRL_ACTIVE1_INT_SET           BIT(0)
3565 +
3566 +/* ACTIVE1_EDGE Sets the polarity of the interrupt edge detection logic
3567 + * This logic detects edges of the hevc_active1 line from the argon core
3568 + * 0 = negedge, 1 = posedge
3569 + * Reset value is *0* decimal.
3570 + */
3571 +#define ARG_IC_ICTRL_ACTIVE1_EDGE_SET          BIT(1)
3572 +
3573 +/* ACTIVE1_EN Enables ACTIVE1_INT out onto the argon interrupt line.
3574 + * If this isn't set, the interrupt logic will work but no interrupt will be
3575 + * set to the interrupt controller
3576 + * Reset value is *1* decimal.
3577 + *
3578 + * [JC] The above appears to be a lie - if unset then b0 is never set
3579 + */
3580 +#define ARG_IC_ICTRL_ACTIVE1_EN_SET            BIT(2)
3581 +
3582 +/* acc=RO ACTIVE1_STATUS FIELD ACCESS: RO
3583 + *
3584 + * The current status of the hevc_active1 signal
3585 + */
3586 +#define ARG_IC_ICTRL_ACTIVE1_STATUS_SET                BIT(3)
3587 +
3588 +/* acc=LWC ACTIVE2_INT FIELD ACCESS: LWC
3589 + *
3590 + * Interrupt 2
3591 + * This is set and held when an hevc_active2 interrupt edge is detected
3592 + * The polarity of the edge is set by the ACTIVE2_EDGE field
3593 + * Write a 1 to this bit to clear down the latched interrupt
3594 + * The latched interrupt is only enabled out onto the interrupt line if
3595 + * ACTIVE2_EN is set
3596 + * Reset value is *0* decimal.
3597 + */
3598 +#define ARG_IC_ICTRL_ACTIVE2_INT_SET           BIT(4)
3599 +
3600 +/* ACTIVE2_EDGE Sets the polarity of the interrupt edge detection logic
3601 + * This logic detects edges of the hevc_active2 line from the argon core
3602 + * 0 = negedge, 1 = posedge
3603 + * Reset value is *0* decimal.
3604 + */
3605 +#define ARG_IC_ICTRL_ACTIVE2_EDGE_SET          BIT(5)
3606 +
3607 +/* ACTIVE2_EN Enables ACTIVE2_INT out onto the argon interrupt line.
3608 + * If this isn't set, the interrupt logic will work but no interrupt will be
3609 + * set to the interrupt controller
3610 + * Reset value is *1* decimal.
3611 + */
3612 +#define ARG_IC_ICTRL_ACTIVE2_EN_SET            BIT(6)
3613 +
3614 +/* acc=RO ACTIVE2_STATUS FIELD ACCESS: RO
3615 + *
3616 + * The current status of the hevc_active2 signal
3617 + */
3618 +#define ARG_IC_ICTRL_ACTIVE2_STATUS_SET                BIT(7)
3619 +
3620 +/* TEST_INT Forces the argon int high for test purposes.
3621 + * Reset value is *0* decimal.
3622 + */
3623 +#define ARG_IC_ICTRL_TEST_INT                  BIT(8)
3624 +#define ARG_IC_ICTRL_SPARE                     BIT(9)
3625 +
3626 +/* acc=RO VP9_INTERRUPT_STATUS FIELD ACCESS: RO
3627 + *
3628 + * The current status of the vp9_interrupt signal
3629 + */
3630 +#define ARG_IC_ICTRL_VP9_INTERRUPT_STATUS      BIT(10)
3631 +
3632 +/* AIO_INT_ENABLE 1 = Or the AIO int in with the Argon int so the VPU can see
3633 + * it
3634 + * 0 = the AIO int is masked. (It should still be connected to the GIC though).
3635 + */
3636 +#define ARG_IC_ICTRL_AIO_INT_ENABLE            BIT(20)
3637 +#define ARG_IC_ICTRL_H264_ACTIVE_INT           BIT(21)
3638 +#define ARG_IC_ICTRL_H264_ACTIVE_EDGE          BIT(22)
3639 +#define ARG_IC_ICTRL_H264_ACTIVE_EN            BIT(23)
3640 +#define ARG_IC_ICTRL_H264_ACTIVE_STATUS                BIT(24)
3641 +#define ARG_IC_ICTRL_H264_INTERRUPT_INT                BIT(25)
3642 +#define ARG_IC_ICTRL_H264_INTERRUPT_EDGE       BIT(26)
3643 +#define ARG_IC_ICTRL_H264_INTERRUPT_EN         BIT(27)
3644 +
3645 +/* acc=RO H264_INTERRUPT_STATUS FIELD ACCESS: RO
3646 + *
3647 + * The current status of the h264_interrupt signal
3648 + */
3649 +#define ARG_IC_ICTRL_H264_INTERRUPT_STATUS     BIT(28)
3650 +
3651 +/* acc=LWC VP9_INTERRUPT_INT FIELD ACCESS: LWC
3652 + *
3653 + * Interrupt 1
3654 + * This is set and held when an vp9_interrupt interrupt edge is detected
3655 + * The polarity of the edge is set by the VP9_INTERRUPT_EDGE field
3656 + * Write a 1 to this bit to clear down the latched interrupt
3657 + * The latched interrupt is only enabled out onto the interrupt line if
3658 + * VP9_INTERRUPT_EN is set
3659 + * Reset value is *0* decimal.
3660 + */
3661 +#define ARG_IC_ICTRL_VP9_INTERRUPT_INT         BIT(29)
3662 +
3663 +/* VP9_INTERRUPT_EDGE Sets the polarity of the interrupt edge detection logic
3664 + * This logic detects edges of the vp9_interrupt line from the argon h264 core
3665 + * 0 = negedge, 1 = posedge
3666 + * Reset value is *0* decimal.
3667 + */
3668 +#define ARG_IC_ICTRL_VP9_INTERRUPT_EDGE                BIT(30)
3669 +
3670 +/* VP9_INTERRUPT_EN Enables VP9_INTERRUPT_INT out onto the argon interrupt line.
3671 + * If this isn't set, the interrupt logic will work but no interrupt will be
3672 + * set to the interrupt controller
3673 + * Reset value is *1* decimal.
3674 + */
3675 +#define ARG_IC_ICTRL_VP9_INTERRUPT_EN          BIT(31)
3676 +
3677 +/* Bits 19:12, 11 reserved - read ?, write 0 */
3678 +#define ARG_IC_ICTRL_SET_ZERO_MASK             ((0xff << 12) | BIT(11))
3679 +
3680 +/* All IRQ bits */
3681 +#define ARG_IC_ICTRL_ALL_IRQ_MASK   (\
3682 +               ARG_IC_ICTRL_VP9_INTERRUPT_INT  |\
3683 +               ARG_IC_ICTRL_H264_INTERRUPT_INT |\
3684 +               ARG_IC_ICTRL_ACTIVE1_INT_SET    |\
3685 +               ARG_IC_ICTRL_ACTIVE2_INT_SET)
3686 +
3687 +/* Auto release once all CBs called */
3688 +void rpivid_hw_irq_active1_claim(struct rpivid_dev *dev,
3689 +                                struct rpivid_hw_irq_ent *ient,
3690 +                                rpivid_irq_callback ready_cb, void *ctx);
3691 +/* May only be called in claim cb */
3692 +void rpivid_hw_irq_active1_irq(struct rpivid_dev *dev,
3693 +                              struct rpivid_hw_irq_ent *ient,
3694 +                              rpivid_irq_callback irq_cb, void *ctx);
3695 +/* May only be called in irq cb */
3696 +void rpivid_hw_irq_active1_thread(struct rpivid_dev *dev,
3697 +                                 struct rpivid_hw_irq_ent *ient,
3698 +                                 rpivid_irq_callback thread_cb, void *ctx);
3699 +
3700 +/* Auto release once all CBs called */
3701 +void rpivid_hw_irq_active2_claim(struct rpivid_dev *dev,
3702 +                                struct rpivid_hw_irq_ent *ient,
3703 +                                rpivid_irq_callback ready_cb, void *ctx);
3704 +/* May only be called in claim cb */
3705 +void rpivid_hw_irq_active2_irq(struct rpivid_dev *dev,
3706 +                              struct rpivid_hw_irq_ent *ient,
3707 +                              rpivid_irq_callback irq_cb, void *ctx);
3708 +
3709 +int rpivid_hw_probe(struct rpivid_dev *dev);
3710 +void rpivid_hw_remove(struct rpivid_dev *dev);
3711 +
3712 +#endif
3713 --- /dev/null
3714 +++ b/drivers/staging/media/rpivid/rpivid_video.c
3715 @@ -0,0 +1,593 @@
3716 +// SPDX-License-Identifier: GPL-2.0
3717 +/*
3718 + * Raspberry Pi HEVC driver
3719 + *
3720 + * Copyright (C) 2020 Raspberry Pi (Trading) Ltd
3721 + *
3722 + * Based on the Cedrus VPU driver, that is:
3723 + *
3724 + * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
3725 + * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
3726 + * Copyright (C) 2018 Bootlin
3727 + */
3728 +
3729 +#include <media/videobuf2-dma-contig.h>
3730 +#include <media/v4l2-device.h>
3731 +#include <media/v4l2-ioctl.h>
3732 +#include <media/v4l2-event.h>
3733 +#include <media/v4l2-mem2mem.h>
3734 +
3735 +#include "rpivid.h"
3736 +#include "rpivid_video.h"
3737 +#include "rpivid_dec.h"
3738 +
3739 +#define RPIVID_DECODE_SRC      BIT(0)
3740 +#define RPIVID_DECODE_DST      BIT(1)
3741 +
3742 +#define RPIVID_MIN_WIDTH       16U
3743 +#define RPIVID_MIN_HEIGHT      16U
3744 +#define RPIVID_MAX_WIDTH       4096U
3745 +#define RPIVID_MAX_HEIGHT      4096U
3746 +
3747 +static inline struct rpivid_ctx *rpivid_file2ctx(struct file *file)
3748 +{
3749 +       return container_of(file->private_data, struct rpivid_ctx, fh);
3750 +}
3751 +
3752 +/* constrain x to y,y*2 */
3753 +static inline unsigned int constrain2x(unsigned int x, unsigned int y)
3754 +{
3755 +       return (x < y) ?
3756 +                       y :
3757 +                       (x > y * 2) ? y : x;
3758 +}
3759 +
3760 +int rpivid_prepare_src_format(struct v4l2_pix_format *pix_fmt)
3761 +{
3762 +       if (pix_fmt->pixelformat != V4L2_PIX_FMT_HEVC_SLICE)
3763 +               return -EINVAL;
3764 +
3765 +       /* Zero bytes per line for encoded source. */
3766 +       pix_fmt->bytesperline = 0;
3767 +       /* Choose some minimum size since this can't be 0 */
3768 +       pix_fmt->sizeimage = max_t(u32, SZ_1K, pix_fmt->sizeimage);
3769 +       pix_fmt->field = V4L2_FIELD_NONE;
3770 +       return 0;
3771 +}
3772 +
3773 +int rpivid_prepare_dst_format(struct v4l2_pix_format *pix_fmt)
3774 +{
3775 +       unsigned int width = pix_fmt->width;
3776 +       unsigned int height = pix_fmt->height;
3777 +       unsigned int sizeimage = pix_fmt->sizeimage;
3778 +       unsigned int bytesperline = pix_fmt->bytesperline;
3779 +
3780 +       switch (pix_fmt->pixelformat) {
3781 +       /* For column formats set bytesperline to column height (stride2) */
3782 +       case V4L2_PIX_FMT_NV12_COL128:
3783 +               /* Width rounds up to columns */
3784 +               width = ALIGN(min(width, RPIVID_MAX_WIDTH), 128);
3785 +
3786 +               /* 16 aligned height - not sure we even need that */
3787 +               height = ALIGN(height, 16);
3788 +               /* column height
3789 +                * Accept suggested shape if at least min & < 2 * min
3790 +                */
3791 +               bytesperline = constrain2x(bytesperline, height * 3 / 2);
3792 +
3793 +               /* image size
3794 +                * Again allow plausible variation in case added padding is
3795 +                * required
3796 +                */
3797 +               sizeimage = constrain2x(sizeimage, bytesperline * width);
3798 +               break;
3799 +
3800 +       case V4L2_PIX_FMT_NV12_10_COL128:
3801 +               /* width in pixels (3 pels = 4 bytes) rounded to 128 byte
3802 +                * columns
3803 +                */
3804 +               width = ALIGN(((min(width, RPIVID_MAX_WIDTH) + 2) / 3), 32) * 3;
3805 +
3806 +               /* 16-aligned height. */
3807 +               height = ALIGN(height, 16);
3808 +
3809 +               /* column height
3810 +                * Accept suggested shape if at least min & < 2 * min
3811 +                */
3812 +               bytesperline = constrain2x(bytesperline, height * 3 / 2);
3813 +
3814 +               /* image size
3815 +                * Again allow plausible variation in case added padding is
3816 +                * required
3817 +                */
3818 +               sizeimage = constrain2x(sizeimage,
3819 +                                       bytesperline * width * 4 / 3);
3820 +               break;
3821 +
3822 +       default:
3823 +               return -EINVAL;
3824 +       }
3825 +
3826 +       pix_fmt->width = width;
3827 +       pix_fmt->height = height;
3828 +
3829 +       pix_fmt->field = V4L2_FIELD_NONE;
3830 +       pix_fmt->bytesperline = bytesperline;
3831 +       pix_fmt->sizeimage = sizeimage;
3832 +       return 0;
3833 +}
3834 +
3835 +static int rpivid_querycap(struct file *file, void *priv,
3836 +                          struct v4l2_capability *cap)
3837 +{
3838 +       strscpy(cap->driver, RPIVID_NAME, sizeof(cap->driver));
3839 +       strscpy(cap->card, RPIVID_NAME, sizeof(cap->card));
3840 +       snprintf(cap->bus_info, sizeof(cap->bus_info),
3841 +                "platform:%s", RPIVID_NAME);
3842 +
3843 +       return 0;
3844 +}
3845 +
3846 +static int rpivid_enum_fmt_vid_out(struct file *file, void *priv,
3847 +                                  struct v4l2_fmtdesc *f)
3848 +{
3849 +       // Input formats
3850 +
3851 +       // H.265 Slice only currently
3852 +       if (f->index == 0) {
3853 +               f->pixelformat = V4L2_PIX_FMT_HEVC_SLICE;
3854 +               return 0;
3855 +       }
3856 +
3857 +       return -EINVAL;
3858 +}
3859 +
3860 +static int rpivid_hevc_validate_sps(const struct v4l2_ctrl_hevc_sps * const sps)
3861 +{
3862 +       const unsigned int ctb_log2_size_y =
3863 +                       sps->log2_min_luma_coding_block_size_minus3 + 3 +
3864 +                       sps->log2_diff_max_min_luma_coding_block_size;
3865 +       const unsigned int min_tb_log2_size_y =
3866 +                       sps->log2_min_luma_transform_block_size_minus2 + 2;
3867 +       const unsigned int max_tb_log2_size_y = min_tb_log2_size_y +
3868 +                       sps->log2_diff_max_min_luma_transform_block_size;
3869 +
3870 +       /* Local limitations */
3871 +       if (sps->pic_width_in_luma_samples < 32 ||
3872 +           sps->pic_width_in_luma_samples > 4096)
3873 +               return 0;
3874 +       if (sps->pic_height_in_luma_samples < 32 ||
3875 +           sps->pic_height_in_luma_samples > 4096)
3876 +               return 0;
3877 +       if (!(sps->bit_depth_luma_minus8 == 0 ||
3878 +             sps->bit_depth_luma_minus8 == 2))
3879 +               return 0;
3880 +       if (sps->bit_depth_luma_minus8 != sps->bit_depth_chroma_minus8)
3881 +               return 0;
3882 +       if (sps->chroma_format_idc != 1)
3883 +               return 0;
3884 +
3885 +       /*  Limits from H.265 7.4.3.2.1 */
3886 +       if (sps->log2_max_pic_order_cnt_lsb_minus4 > 12)
3887 +               return 0;
3888 +       if (sps->sps_max_dec_pic_buffering_minus1 > 15)
3889 +               return 0;
3890 +       if (sps->sps_max_num_reorder_pics >
3891 +                               sps->sps_max_dec_pic_buffering_minus1)
3892 +               return 0;
3893 +       if (ctb_log2_size_y > 6)
3894 +               return 0;
3895 +       if (max_tb_log2_size_y > 5)
3896 +               return 0;
3897 +       if (max_tb_log2_size_y > ctb_log2_size_y)
3898 +               return 0;
3899 +       if (sps->max_transform_hierarchy_depth_inter >
3900 +                               (ctb_log2_size_y - min_tb_log2_size_y))
3901 +               return 0;
3902 +       if (sps->max_transform_hierarchy_depth_intra >
3903 +                               (ctb_log2_size_y - min_tb_log2_size_y))
3904 +               return 0;
3905 +       /* Check pcm stuff */
3906 +       if (sps->num_short_term_ref_pic_sets > 64)
3907 +               return 0;
3908 +       if (sps->num_long_term_ref_pics_sps > 32)
3909 +               return 0;
3910 +       return 1;
3911 +}
3912 +
3913 +static inline int is_sps_set(const struct v4l2_ctrl_hevc_sps * const sps)
3914 +{
3915 +       return sps && sps->pic_width_in_luma_samples != 0;
3916 +}
3917 +
3918 +static u32 pixelformat_from_sps(const struct v4l2_ctrl_hevc_sps * const sps,
3919 +                               const int index)
3920 +{
3921 +       u32 pf = 0;
3922 +
3923 +       // Use width 0 as a signifier of unsetness
3924 +       if (!is_sps_set(sps)) {
3925 +               /* Treat this as an error? For now return both */
3926 +               if (index == 0)
3927 +                       pf = V4L2_PIX_FMT_NV12_COL128;
3928 +               else if (index == 1)
3929 +                       pf = V4L2_PIX_FMT_NV12_10_COL128;
3930 +       } else if (index == 0 && rpivid_hevc_validate_sps(sps)) {
3931 +               if (sps->bit_depth_luma_minus8 == 0)
3932 +                       pf = V4L2_PIX_FMT_NV12_COL128;
3933 +               else if (sps->bit_depth_luma_minus8 == 2)
3934 +                       pf = V4L2_PIX_FMT_NV12_10_COL128;
3935 +       }
3936 +
3937 +       return pf;
3938 +}
3939 +
3940 +static struct v4l2_pix_format
3941 +rpivid_hevc_default_dst_fmt(struct rpivid_ctx * const ctx)
3942 +{
3943 +       const struct v4l2_ctrl_hevc_sps * const sps =
3944 +               rpivid_find_control_data(ctx, V4L2_CID_MPEG_VIDEO_HEVC_SPS);
3945 +       struct v4l2_pix_format pix_fmt = {
3946 +               .width = sps->pic_width_in_luma_samples,
3947 +               .height = sps->pic_height_in_luma_samples,
3948 +               .pixelformat = pixelformat_from_sps(sps, 0)
3949 +       };
3950 +
3951 +       rpivid_prepare_dst_format(&pix_fmt);
3952 +       return pix_fmt;
3953 +}
3954 +
3955 +static u32 rpivid_hevc_get_dst_pixelformat(struct rpivid_ctx * const ctx,
3956 +                                          const int index)
3957 +{
3958 +       const struct v4l2_ctrl_hevc_sps * const sps =
3959 +               rpivid_find_control_data(ctx, V4L2_CID_MPEG_VIDEO_HEVC_SPS);
3960 +
3961 +       return pixelformat_from_sps(sps, index);
3962 +}
3963 +
3964 +static int rpivid_enum_fmt_vid_cap(struct file *file, void *priv,
3965 +                                  struct v4l2_fmtdesc *f)
3966 +{
3967 +       struct rpivid_ctx * const ctx = rpivid_file2ctx(file);
3968 +
3969 +       const u32 pf = rpivid_hevc_get_dst_pixelformat(ctx, f->index);
3970 +
3971 +       if (pf == 0)
3972 +               return -EINVAL;
3973 +
3974 +       f->pixelformat = pf;
3975 +       return 0;
3976 +}
3977 +
3978 +static int rpivid_g_fmt_vid_cap(struct file *file, void *priv,
3979 +                               struct v4l2_format *f)
3980 +{
3981 +       struct rpivid_ctx *ctx = rpivid_file2ctx(file);
3982 +
3983 +       if (!ctx->dst_fmt_set)
3984 +               ctx->dst_fmt = rpivid_hevc_default_dst_fmt(ctx);
3985 +       f->fmt.pix = ctx->dst_fmt;
3986 +       return 0;
3987 +}
3988 +
3989 +static int rpivid_g_fmt_vid_out(struct file *file, void *priv,
3990 +                               struct v4l2_format *f)
3991 +{
3992 +       struct rpivid_ctx *ctx = rpivid_file2ctx(file);
3993 +
3994 +       f->fmt.pix = ctx->src_fmt;
3995 +       return 0;
3996 +}
3997 +
3998 +static inline void copy_color(struct v4l2_pix_format *d,
3999 +                             const struct v4l2_pix_format *s)
4000 +{
4001 +       d->colorspace   = s->colorspace;
4002 +       d->xfer_func    = s->xfer_func;
4003 +       d->ycbcr_enc    = s->ycbcr_enc;
4004 +       d->quantization = s->quantization;
4005 +}
4006 +
4007 +static int rpivid_try_fmt_vid_cap(struct file *file, void *priv,
4008 +                                 struct v4l2_format *f)
4009 +{
4010 +       struct rpivid_ctx *ctx = rpivid_file2ctx(file);
4011 +       const struct v4l2_ctrl_hevc_sps * const sps =
4012 +               rpivid_find_control_data(ctx, V4L2_CID_MPEG_VIDEO_HEVC_SPS);
4013 +       u32 pixelformat;
4014 +       int i;
4015 +
4016 +       /* Reject format types we don't support */
4017 +       if (f->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
4018 +               return -EINVAL;
4019 +
4020 +       for (i = 0; (pixelformat = pixelformat_from_sps(sps, i)) != 0; i++) {
4021 +               if (f->fmt.pix.pixelformat == pixelformat)
4022 +                       break;
4023 +       }
4024 +
4025 +       // If we can't use requested fmt then set to default
4026 +       if (pixelformat == 0) {
4027 +               pixelformat = pixelformat_from_sps(sps, 0);
4028 +               // If we don't have a default then give up
4029 +               if (pixelformat == 0)
4030 +                       return -EINVAL;
4031 +       }
4032 +
4033 +       // We don't have any way of finding out colourspace so believe
4034 +       // anything we are told - take anything set in src as a default
4035 +       if (f->fmt.pix.colorspace == V4L2_COLORSPACE_DEFAULT)
4036 +               copy_color(&f->fmt.pix, &ctx->src_fmt);
4037 +
4038 +       f->fmt.pix.pixelformat = pixelformat;
4039 +       return rpivid_prepare_dst_format(&f->fmt.pix);
4040 +}
4041 +
4042 +static int rpivid_try_fmt_vid_out(struct file *file, void *priv,
4043 +                                 struct v4l2_format *f)
4044 +{
4045 +       if (f->type != V4L2_BUF_TYPE_VIDEO_OUTPUT)
4046 +               return -EINVAL;
4047 +
4048 +       if (rpivid_prepare_src_format(&f->fmt.pix)) {
4049 +               // Set default src format
4050 +               f->fmt.pix.pixelformat = RPIVID_SRC_PIXELFORMAT_DEFAULT;
4051 +               rpivid_prepare_src_format(&f->fmt.pix);
4052 +       }
4053 +       return 0;
4054 +}
4055 +
4056 +static int rpivid_s_fmt_vid_cap(struct file *file, void *priv,
4057 +                               struct v4l2_format *f)
4058 +{
4059 +       struct rpivid_ctx *ctx = rpivid_file2ctx(file);
4060 +       struct vb2_queue *vq;
4061 +       int ret;
4062 +
4063 +       vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, f->type);
4064 +       if (vb2_is_busy(vq))
4065 +               return -EBUSY;
4066 +
4067 +       ret = rpivid_try_fmt_vid_cap(file, priv, f);
4068 +       if (ret)
4069 +               return ret;
4070 +
4071 +       ctx->dst_fmt = f->fmt.pix;
4072 +       ctx->dst_fmt_set = 1;
4073 +
4074 +       return 0;
4075 +}
4076 +
4077 +static int rpivid_s_fmt_vid_out(struct file *file, void *priv,
4078 +                               struct v4l2_format *f)
4079 +{
4080 +       struct rpivid_ctx *ctx = rpivid_file2ctx(file);
4081 +       struct vb2_queue *vq;
4082 +       int ret;
4083 +
4084 +       vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, f->type);
4085 +       if (vb2_is_busy(vq))
4086 +               return -EBUSY;
4087 +
4088 +       ret = rpivid_try_fmt_vid_out(file, priv, f);
4089 +       if (ret)
4090 +               return ret;
4091 +
4092 +       ctx->src_fmt = f->fmt.pix;
4093 +       ctx->dst_fmt_set = 0;  // Setting src invalidates dst
4094 +
4095 +       vq->subsystem_flags |=
4096 +               VB2_V4L2_FL_SUPPORTS_M2M_HOLD_CAPTURE_BUF;
4097 +
4098 +       /* Propagate colorspace information to capture. */
4099 +       copy_color(&ctx->dst_fmt, &f->fmt.pix);
4100 +       return 0;
4101 +}
4102 +
4103 +const struct v4l2_ioctl_ops rpivid_ioctl_ops = {
4104 +       .vidioc_querycap                = rpivid_querycap,
4105 +
4106 +       .vidioc_enum_fmt_vid_cap        = rpivid_enum_fmt_vid_cap,
4107 +       .vidioc_g_fmt_vid_cap           = rpivid_g_fmt_vid_cap,
4108 +       .vidioc_try_fmt_vid_cap         = rpivid_try_fmt_vid_cap,
4109 +       .vidioc_s_fmt_vid_cap           = rpivid_s_fmt_vid_cap,
4110 +
4111 +       .vidioc_enum_fmt_vid_out        = rpivid_enum_fmt_vid_out,
4112 +       .vidioc_g_fmt_vid_out           = rpivid_g_fmt_vid_out,
4113 +       .vidioc_try_fmt_vid_out         = rpivid_try_fmt_vid_out,
4114 +       .vidioc_s_fmt_vid_out           = rpivid_s_fmt_vid_out,
4115 +
4116 +       .vidioc_reqbufs                 = v4l2_m2m_ioctl_reqbufs,
4117 +       .vidioc_querybuf                = v4l2_m2m_ioctl_querybuf,
4118 +       .vidioc_qbuf                    = v4l2_m2m_ioctl_qbuf,
4119 +       .vidioc_dqbuf                   = v4l2_m2m_ioctl_dqbuf,
4120 +       .vidioc_prepare_buf             = v4l2_m2m_ioctl_prepare_buf,
4121 +       .vidioc_create_bufs             = v4l2_m2m_ioctl_create_bufs,
4122 +       .vidioc_expbuf                  = v4l2_m2m_ioctl_expbuf,
4123 +
4124 +       .vidioc_streamon                = v4l2_m2m_ioctl_streamon,
4125 +       .vidioc_streamoff               = v4l2_m2m_ioctl_streamoff,
4126 +
4127 +       .vidioc_try_decoder_cmd         = v4l2_m2m_ioctl_stateless_try_decoder_cmd,
4128 +       .vidioc_decoder_cmd             = v4l2_m2m_ioctl_stateless_decoder_cmd,
4129 +
4130 +       .vidioc_subscribe_event         = v4l2_ctrl_subscribe_event,
4131 +       .vidioc_unsubscribe_event       = v4l2_event_unsubscribe,
4132 +};
4133 +
4134 +static int rpivid_queue_setup(struct vb2_queue *vq, unsigned int *nbufs,
4135 +                             unsigned int *nplanes, unsigned int sizes[],
4136 +                             struct device *alloc_devs[])
4137 +{
4138 +       struct rpivid_ctx *ctx = vb2_get_drv_priv(vq);
4139 +       struct v4l2_pix_format *pix_fmt;
4140 +
4141 +       if (V4L2_TYPE_IS_OUTPUT(vq->type))
4142 +               pix_fmt = &ctx->src_fmt;
4143 +       else
4144 +               pix_fmt = &ctx->dst_fmt;
4145 +
4146 +       if (*nplanes) {
4147 +               if (sizes[0] < pix_fmt->sizeimage)
4148 +                       return -EINVAL;
4149 +       } else {
4150 +               sizes[0] = pix_fmt->sizeimage;
4151 +               *nplanes = 1;
4152 +       }
4153 +
4154 +       return 0;
4155 +}
4156 +
4157 +static void rpivid_queue_cleanup(struct vb2_queue *vq, u32 state)
4158 +{
4159 +       struct rpivid_ctx *ctx = vb2_get_drv_priv(vq);
4160 +       struct vb2_v4l2_buffer *vbuf;
4161 +
4162 +       for (;;) {
4163 +               if (V4L2_TYPE_IS_OUTPUT(vq->type))
4164 +                       vbuf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
4165 +               else
4166 +                       vbuf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
4167 +
4168 +               if (!vbuf)
4169 +                       return;
4170 +
4171 +               v4l2_ctrl_request_complete(vbuf->vb2_buf.req_obj.req,
4172 +                                          &ctx->hdl);
4173 +               v4l2_m2m_buf_done(vbuf, state);
4174 +       }
4175 +}
4176 +
4177 +static int rpivid_buf_out_validate(struct vb2_buffer *vb)
4178 +{
4179 +       struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
4180 +
4181 +       vbuf->field = V4L2_FIELD_NONE;
4182 +       return 0;
4183 +}
4184 +
4185 +static int rpivid_buf_prepare(struct vb2_buffer *vb)
4186 +{
4187 +       struct vb2_queue *vq = vb->vb2_queue;
4188 +       struct rpivid_ctx *ctx = vb2_get_drv_priv(vq);
4189 +       struct v4l2_pix_format *pix_fmt;
4190 +
4191 +       if (V4L2_TYPE_IS_OUTPUT(vq->type))
4192 +               pix_fmt = &ctx->src_fmt;
4193 +       else
4194 +               pix_fmt = &ctx->dst_fmt;
4195 +
4196 +       if (vb2_plane_size(vb, 0) < pix_fmt->sizeimage)
4197 +               return -EINVAL;
4198 +
4199 +       vb2_set_plane_payload(vb, 0, pix_fmt->sizeimage);
4200 +
4201 +       return 0;
4202 +}
4203 +
4204 +static int rpivid_start_streaming(struct vb2_queue *vq, unsigned int count)
4205 +{
4206 +       struct rpivid_ctx *ctx = vb2_get_drv_priv(vq);
4207 +       struct rpivid_dev *dev = ctx->dev;
4208 +       int ret = 0;
4209 +
4210 +       if (ctx->src_fmt.pixelformat != V4L2_PIX_FMT_HEVC_SLICE)
4211 +               return -EINVAL;
4212 +
4213 +       if (V4L2_TYPE_IS_OUTPUT(vq->type) && dev->dec_ops->start)
4214 +               ret = dev->dec_ops->start(ctx);
4215 +
4216 +       ret = clk_set_rate(dev->clock, 500 * 1000 * 1000);
4217 +       if (ret) {
4218 +               dev_err(dev->dev, "Failed to set clock rate\n");
4219 +               goto out;
4220 +       }
4221 +
4222 +       ret = clk_prepare_enable(dev->clock);
4223 +       if (ret)
4224 +               dev_err(dev->dev, "Failed to enable clock\n");
4225 +
4226 +out:
4227 +       if (ret)
4228 +               rpivid_queue_cleanup(vq, VB2_BUF_STATE_QUEUED);
4229 +
4230 +       return ret;
4231 +}
4232 +
4233 +static void rpivid_stop_streaming(struct vb2_queue *vq)
4234 +{
4235 +       struct rpivid_ctx *ctx = vb2_get_drv_priv(vq);
4236 +       struct rpivid_dev *dev = ctx->dev;
4237 +
4238 +       if (V4L2_TYPE_IS_OUTPUT(vq->type) && dev->dec_ops->stop)
4239 +               dev->dec_ops->stop(ctx);
4240 +
4241 +       rpivid_queue_cleanup(vq, VB2_BUF_STATE_ERROR);
4242 +
4243 +       clk_disable_unprepare(dev->clock);
4244 +}
4245 +
4246 +static void rpivid_buf_queue(struct vb2_buffer *vb)
4247 +{
4248 +       struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
4249 +       struct rpivid_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
4250 +
4251 +       v4l2_m2m_buf_queue(ctx->fh.m2m_ctx, vbuf);
4252 +}
4253 +
4254 +static void rpivid_buf_request_complete(struct vb2_buffer *vb)
4255 +{
4256 +       struct rpivid_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
4257 +
4258 +       v4l2_ctrl_request_complete(vb->req_obj.req, &ctx->hdl);
4259 +}
4260 +
4261 +static struct vb2_ops rpivid_qops = {
4262 +       .queue_setup            = rpivid_queue_setup,
4263 +       .buf_prepare            = rpivid_buf_prepare,
4264 +       .buf_queue              = rpivid_buf_queue,
4265 +       .buf_out_validate       = rpivid_buf_out_validate,
4266 +       .buf_request_complete   = rpivid_buf_request_complete,
4267 +       .start_streaming        = rpivid_start_streaming,
4268 +       .stop_streaming         = rpivid_stop_streaming,
4269 +       .wait_prepare           = vb2_ops_wait_prepare,
4270 +       .wait_finish            = vb2_ops_wait_finish,
4271 +};
4272 +
4273 +int rpivid_queue_init(void *priv, struct vb2_queue *src_vq,
4274 +                     struct vb2_queue *dst_vq)
4275 +{
4276 +       struct rpivid_ctx *ctx = priv;
4277 +       int ret;
4278 +
4279 +       src_vq->type = V4L2_BUF_TYPE_VIDEO_OUTPUT;
4280 +       src_vq->io_modes = VB2_MMAP | VB2_DMABUF;
4281 +       src_vq->drv_priv = ctx;
4282 +       src_vq->buf_struct_size = sizeof(struct rpivid_buffer);
4283 +       src_vq->min_buffers_needed = 1;
4284 +       src_vq->ops = &rpivid_qops;
4285 +       src_vq->mem_ops = &vb2_dma_contig_memops;
4286 +       src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
4287 +       src_vq->lock = &ctx->dev->dev_mutex;
4288 +       src_vq->dev = ctx->dev->dev;
4289 +       src_vq->supports_requests = true;
4290 +       src_vq->requires_requests = true;
4291 +
4292 +       ret = vb2_queue_init(src_vq);
4293 +       if (ret)
4294 +               return ret;
4295 +
4296 +       dst_vq->type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
4297 +       dst_vq->io_modes = VB2_MMAP | VB2_DMABUF;
4298 +       dst_vq->drv_priv = ctx;
4299 +       dst_vq->buf_struct_size = sizeof(struct rpivid_buffer);
4300 +       dst_vq->min_buffers_needed = 1;
4301 +       dst_vq->ops = &rpivid_qops;
4302 +       dst_vq->mem_ops = &vb2_dma_contig_memops;
4303 +       dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
4304 +       dst_vq->lock = &ctx->dev->dev_mutex;
4305 +       dst_vq->dev = ctx->dev->dev;
4306 +
4307 +       return vb2_queue_init(dst_vq);
4308 +}
4309 --- /dev/null
4310 +++ b/drivers/staging/media/rpivid/rpivid_video.h
4311 @@ -0,0 +1,30 @@
4312 +/* SPDX-License-Identifier: GPL-2.0 */
4313 +/*
4314 + * Raspberry Pi HEVC driver
4315 + *
4316 + * Copyright (C) 2020 Raspberry Pi (Trading) Ltd
4317 + *
4318 + * Based on the Cedrus VPU driver, that is:
4319 + *
4320 + * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
4321 + * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
4322 + * Copyright (C) 2018 Bootlin
4323 + */
4324 +
4325 +#ifndef _RPIVID_VIDEO_H_
4326 +#define _RPIVID_VIDEO_H_
4327 +
4328 +struct rpivid_format {
4329 +       u32             pixelformat;
4330 +       u32             directions;
4331 +       unsigned int    capabilities;
4332 +};
4333 +
4334 +extern const struct v4l2_ioctl_ops rpivid_ioctl_ops;
4335 +
4336 +int rpivid_queue_init(void *priv, struct vb2_queue *src_vq,
4337 +                     struct vb2_queue *dst_vq);
4338 +int rpivid_prepare_src_format(struct v4l2_pix_format *pix_fmt);
4339 +int rpivid_prepare_dst_format(struct v4l2_pix_format *pix_fmt);
4340 +
4341 +#endif