1 From 53b7fe2fdb7ff96f0d29105c9621c93cfe754c9e Mon Sep 17 00:00:00 2001
2 From: John Cox <jc@kynesim.co.uk>
3 Date: Thu, 5 Mar 2020 18:30:41 +0000
4 Subject: [PATCH] staging: media: Add Raspberry Pi V4L2 H265 decoder
6 This driver is for the HEVC/H265 decoder block on the Raspberry
7 Pi 4, and conforms to the V4L2 stateless decoder API.
9 Signed-off-by: John Cox <jc@kynesim.co.uk>
11 drivers/staging/media/Kconfig | 2 +
12 drivers/staging/media/Makefile | 1 +
13 drivers/staging/media/rpivid/Kconfig | 16 +
14 drivers/staging/media/rpivid/Makefile | 5 +
15 drivers/staging/media/rpivid/rpivid.c | 432 ++++
16 drivers/staging/media/rpivid/rpivid.h | 181 ++
17 drivers/staging/media/rpivid/rpivid_dec.c | 79 +
18 drivers/staging/media/rpivid/rpivid_dec.h | 19 +
19 drivers/staging/media/rpivid/rpivid_h265.c | 2275 +++++++++++++++++++
20 drivers/staging/media/rpivid/rpivid_hw.c | 321 +++
21 drivers/staging/media/rpivid/rpivid_hw.h | 300 +++
22 drivers/staging/media/rpivid/rpivid_video.c | 593 +++++
23 drivers/staging/media/rpivid/rpivid_video.h | 30 +
24 13 files changed, 4254 insertions(+)
25 create mode 100644 drivers/staging/media/rpivid/Kconfig
26 create mode 100644 drivers/staging/media/rpivid/Makefile
27 create mode 100644 drivers/staging/media/rpivid/rpivid.c
28 create mode 100644 drivers/staging/media/rpivid/rpivid.h
29 create mode 100644 drivers/staging/media/rpivid/rpivid_dec.c
30 create mode 100644 drivers/staging/media/rpivid/rpivid_dec.h
31 create mode 100644 drivers/staging/media/rpivid/rpivid_h265.c
32 create mode 100644 drivers/staging/media/rpivid/rpivid_hw.c
33 create mode 100644 drivers/staging/media/rpivid/rpivid_hw.h
34 create mode 100644 drivers/staging/media/rpivid/rpivid_video.c
35 create mode 100644 drivers/staging/media/rpivid/rpivid_video.h
37 --- a/drivers/staging/media/Kconfig
38 +++ b/drivers/staging/media/Kconfig
39 @@ -32,6 +32,8 @@ source "drivers/staging/media/omap4iss/K
41 source "drivers/staging/media/rkvdec/Kconfig"
43 +source "drivers/staging/media/rpivid/Kconfig"
45 source "drivers/staging/media/sunxi/Kconfig"
47 source "drivers/staging/media/tegra-vde/Kconfig"
48 --- a/drivers/staging/media/Makefile
49 +++ b/drivers/staging/media/Makefile
50 @@ -4,6 +4,7 @@ obj-$(CONFIG_VIDEO_IMX_MEDIA) += imx/
51 obj-$(CONFIG_VIDEO_MESON_VDEC) += meson/vdec/
52 obj-$(CONFIG_VIDEO_OMAP4) += omap4iss/
53 obj-$(CONFIG_VIDEO_ROCKCHIP_VDEC) += rkvdec/
54 +obj-$(CONFIG_VIDEO_RPIVID) += rpivid/
55 obj-$(CONFIG_VIDEO_SUNXI) += sunxi/
56 obj-$(CONFIG_VIDEO_TEGRA) += tegra-video/
57 obj-$(CONFIG_TEGRA_VDE) += tegra-vde/
59 +++ b/drivers/staging/media/rpivid/Kconfig
61 +# SPDX-License-Identifier: GPL-2.0
64 + tristate "Rpi H265 driver"
65 + depends on VIDEO_DEV && VIDEO_V4L2
66 + depends on MEDIA_CONTROLLER
68 + depends on MEDIA_CONTROLLER_REQUEST_API
69 + select VIDEOBUF2_DMA_CONTIG
70 + select V4L2_MEM2MEM_DEV
72 + Support for the Rpi H265 h/w decoder.
74 + To compile this driver as a module, choose M here: the module
75 + will be called rpivid-hevc.
78 +++ b/drivers/staging/media/rpivid/Makefile
80 +# SPDX-License-Identifier: GPL-2.0
81 +obj-$(CONFIG_VIDEO_RPIVID) += rpivid-hevc.o
83 +rpivid-hevc-y = rpivid.o rpivid_video.o rpivid_dec.o \
84 + rpivid_hw.o rpivid_h265.o
86 +++ b/drivers/staging/media/rpivid/rpivid.c
88 +// SPDX-License-Identifier: GPL-2.0
90 + * Raspberry Pi HEVC driver
92 + * Copyright (C) 2020 Raspberry Pi (Trading) Ltd
94 + * Based on the Cedrus VPU driver, that is:
96 + * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
97 + * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
98 + * Copyright (C) 2018 Bootlin
101 +#include <linux/platform_device.h>
102 +#include <linux/module.h>
103 +#include <linux/of.h>
105 +#include <media/v4l2-device.h>
106 +#include <media/v4l2-ioctl.h>
107 +#include <media/v4l2-ctrls.h>
108 +#include <media/v4l2-mem2mem.h>
111 +#include "rpivid_video.h"
112 +#include "rpivid_hw.h"
113 +#include "rpivid_dec.h"
116 + * Default /dev/videoN node number.
117 + * Deliberately avoid the very low numbers as these are often taken by webcams
118 + * etc, and simple apps tend to only go for /dev/video0.
120 +static int video_nr = 19;
121 +module_param(video_nr, int, 0644);
122 +MODULE_PARM_DESC(video_nr, "decoder video device number");
124 +static const struct rpivid_control rpivid_ctrls[] = {
127 + .id = V4L2_CID_MPEG_VIDEO_HEVC_SPS,
133 + .id = V4L2_CID_MPEG_VIDEO_HEVC_PPS,
139 + .id = V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX,
145 + .id = V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS,
151 + .id = V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE,
152 + .max = V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED,
153 + .def = V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED,
159 + .id = V4L2_CID_MPEG_VIDEO_HEVC_START_CODE,
160 + .max = V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE,
161 + .def = V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE,
167 +#define rpivid_ctrls_COUNT ARRAY_SIZE(rpivid_ctrls)
169 +void *rpivid_find_control_data(struct rpivid_ctx *ctx, u32 id)
173 + for (i = 0; ctx->ctrls[i]; i++)
174 + if (ctx->ctrls[i]->id == id)
175 + return ctx->ctrls[i]->p_cur.p;
180 +static int rpivid_init_ctrls(struct rpivid_dev *dev, struct rpivid_ctx *ctx)
182 + struct v4l2_ctrl_handler *hdl = &ctx->hdl;
183 + struct v4l2_ctrl *ctrl;
184 + unsigned int ctrl_size;
187 + v4l2_ctrl_handler_init(hdl, rpivid_ctrls_COUNT);
189 + v4l2_err(&dev->v4l2_dev,
190 + "Failed to initialize control handler\n");
194 + ctrl_size = sizeof(ctrl) * rpivid_ctrls_COUNT + 1;
196 + ctx->ctrls = kzalloc(ctrl_size, GFP_KERNEL);
200 + for (i = 0; i < rpivid_ctrls_COUNT; i++) {
201 + ctrl = v4l2_ctrl_new_custom(hdl, &rpivid_ctrls[i].cfg,
204 + v4l2_err(&dev->v4l2_dev,
205 + "Failed to create new custom control id=%#x\n",
206 + rpivid_ctrls[i].cfg.id);
208 + v4l2_ctrl_handler_free(hdl);
213 + ctx->ctrls[i] = ctrl;
216 + ctx->fh.ctrl_handler = hdl;
217 + v4l2_ctrl_handler_setup(hdl);
222 +static int rpivid_request_validate(struct media_request *req)
224 + struct media_request_object *obj;
225 + struct v4l2_ctrl_handler *parent_hdl, *hdl;
226 + struct rpivid_ctx *ctx = NULL;
227 + struct v4l2_ctrl *ctrl_test;
228 + unsigned int count;
231 + list_for_each_entry(obj, &req->objects, list) {
232 + struct vb2_buffer *vb;
234 + if (vb2_request_object_is_buffer(obj)) {
235 + vb = container_of(obj, struct vb2_buffer, req_obj);
236 + ctx = vb2_get_drv_priv(vb->vb2_queue);
245 + count = vb2_request_buffer_cnt(req);
247 + v4l2_info(&ctx->dev->v4l2_dev,
248 + "No buffer was provided with the request\n");
250 + } else if (count > 1) {
251 + v4l2_info(&ctx->dev->v4l2_dev,
252 + "More than one buffer was provided with the request\n");
256 + parent_hdl = &ctx->hdl;
258 + hdl = v4l2_ctrl_request_hdl_find(req, parent_hdl);
260 + v4l2_info(&ctx->dev->v4l2_dev, "Missing codec control(s)\n");
264 + for (i = 0; i < rpivid_ctrls_COUNT; i++) {
265 + if (!rpivid_ctrls[i].required)
269 + v4l2_ctrl_request_hdl_ctrl_find(hdl,
270 + rpivid_ctrls[i].cfg.id);
272 + v4l2_info(&ctx->dev->v4l2_dev,
273 + "Missing required codec control\n");
278 + v4l2_ctrl_request_hdl_put(hdl);
280 + return vb2_request_validate(req);
283 +static int rpivid_open(struct file *file)
285 + struct rpivid_dev *dev = video_drvdata(file);
286 + struct rpivid_ctx *ctx = NULL;
289 + if (mutex_lock_interruptible(&dev->dev_mutex))
290 + return -ERESTARTSYS;
292 + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
294 + mutex_unlock(&dev->dev_mutex);
298 + v4l2_fh_init(&ctx->fh, video_devdata(file));
299 + file->private_data = &ctx->fh;
302 + ret = rpivid_init_ctrls(dev, ctx);
306 + ctx->fh.m2m_ctx = v4l2_m2m_ctx_init(dev->m2m_dev, ctx,
307 + &rpivid_queue_init);
308 + if (IS_ERR(ctx->fh.m2m_ctx)) {
309 + ret = PTR_ERR(ctx->fh.m2m_ctx);
313 + /* The only bit of format info that we can guess now is H265 src
314 + * Everything else we need more info for
316 + ctx->src_fmt.pixelformat = RPIVID_SRC_PIXELFORMAT_DEFAULT;
317 + rpivid_prepare_src_format(&ctx->src_fmt);
319 + v4l2_fh_add(&ctx->fh);
321 + mutex_unlock(&dev->dev_mutex);
326 + v4l2_ctrl_handler_free(&ctx->hdl);
329 + mutex_unlock(&dev->dev_mutex);
334 +static int rpivid_release(struct file *file)
336 + struct rpivid_dev *dev = video_drvdata(file);
337 + struct rpivid_ctx *ctx = container_of(file->private_data,
338 + struct rpivid_ctx, fh);
340 + mutex_lock(&dev->dev_mutex);
342 + v4l2_fh_del(&ctx->fh);
343 + v4l2_m2m_ctx_release(ctx->fh.m2m_ctx);
345 + v4l2_ctrl_handler_free(&ctx->hdl);
348 + v4l2_fh_exit(&ctx->fh);
352 + mutex_unlock(&dev->dev_mutex);
357 +static const struct v4l2_file_operations rpivid_fops = {
358 + .owner = THIS_MODULE,
359 + .open = rpivid_open,
360 + .release = rpivid_release,
361 + .poll = v4l2_m2m_fop_poll,
362 + .unlocked_ioctl = video_ioctl2,
363 + .mmap = v4l2_m2m_fop_mmap,
366 +static const struct video_device rpivid_video_device = {
367 + .name = RPIVID_NAME,
368 + .vfl_dir = VFL_DIR_M2M,
369 + .fops = &rpivid_fops,
370 + .ioctl_ops = &rpivid_ioctl_ops,
372 + .release = video_device_release_empty,
373 + .device_caps = V4L2_CAP_VIDEO_M2M | V4L2_CAP_STREAMING,
376 +static const struct v4l2_m2m_ops rpivid_m2m_ops = {
377 + .device_run = rpivid_device_run,
380 +static const struct media_device_ops rpivid_m2m_media_ops = {
381 + .req_validate = rpivid_request_validate,
382 + .req_queue = v4l2_m2m_request_queue,
385 +static int rpivid_probe(struct platform_device *pdev)
387 + struct rpivid_dev *dev;
388 + struct video_device *vfd;
391 + dev = devm_kzalloc(&pdev->dev, sizeof(*dev), GFP_KERNEL);
395 + dev->vfd = rpivid_video_device;
396 + dev->dev = &pdev->dev;
400 + ret = rpivid_hw_probe(dev);
402 + dev_err(&pdev->dev, "Failed to probe hardware\n");
406 + dev->dec_ops = &rpivid_dec_ops_h265;
408 + mutex_init(&dev->dev_mutex);
410 + ret = v4l2_device_register(&pdev->dev, &dev->v4l2_dev);
412 + dev_err(&pdev->dev, "Failed to register V4L2 device\n");
417 + vfd->lock = &dev->dev_mutex;
418 + vfd->v4l2_dev = &dev->v4l2_dev;
420 + snprintf(vfd->name, sizeof(vfd->name), "%s", rpivid_video_device.name);
421 + video_set_drvdata(vfd, dev);
423 + dev->m2m_dev = v4l2_m2m_init(&rpivid_m2m_ops);
424 + if (IS_ERR(dev->m2m_dev)) {
425 + v4l2_err(&dev->v4l2_dev,
426 + "Failed to initialize V4L2 M2M device\n");
427 + ret = PTR_ERR(dev->m2m_dev);
432 + dev->mdev.dev = &pdev->dev;
433 + strscpy(dev->mdev.model, RPIVID_NAME, sizeof(dev->mdev.model));
434 + strscpy(dev->mdev.bus_info, "platform:" RPIVID_NAME,
435 + sizeof(dev->mdev.bus_info));
437 + media_device_init(&dev->mdev);
438 + dev->mdev.ops = &rpivid_m2m_media_ops;
439 + dev->v4l2_dev.mdev = &dev->mdev;
441 + ret = video_register_device(vfd, VFL_TYPE_VIDEO, video_nr);
443 + v4l2_err(&dev->v4l2_dev, "Failed to register video device\n");
447 + v4l2_info(&dev->v4l2_dev,
448 + "Device registered as /dev/video%d\n", vfd->num);
450 + ret = v4l2_m2m_register_media_controller(dev->m2m_dev, vfd,
451 + MEDIA_ENT_F_PROC_VIDEO_DECODER);
453 + v4l2_err(&dev->v4l2_dev,
454 + "Failed to initialize V4L2 M2M media controller\n");
458 + ret = media_device_register(&dev->mdev);
460 + v4l2_err(&dev->v4l2_dev, "Failed to register media device\n");
464 + platform_set_drvdata(pdev, dev);
469 + v4l2_m2m_unregister_media_controller(dev->m2m_dev);
471 + video_unregister_device(&dev->vfd);
473 + v4l2_m2m_release(dev->m2m_dev);
475 + v4l2_device_unregister(&dev->v4l2_dev);
480 +static int rpivid_remove(struct platform_device *pdev)
482 + struct rpivid_dev *dev = platform_get_drvdata(pdev);
484 + if (media_devnode_is_registered(dev->mdev.devnode)) {
485 + media_device_unregister(&dev->mdev);
486 + v4l2_m2m_unregister_media_controller(dev->m2m_dev);
487 + media_device_cleanup(&dev->mdev);
490 + v4l2_m2m_release(dev->m2m_dev);
491 + video_unregister_device(&dev->vfd);
492 + v4l2_device_unregister(&dev->v4l2_dev);
494 + rpivid_hw_remove(dev);
499 +static const struct of_device_id rpivid_dt_match[] = {
501 + .compatible = "raspberrypi,rpivid-vid-decoder",
505 +MODULE_DEVICE_TABLE(of, rpivid_dt_match);
507 +static struct platform_driver rpivid_driver = {
508 + .probe = rpivid_probe,
509 + .remove = rpivid_remove,
511 + .name = RPIVID_NAME,
512 + .of_match_table = of_match_ptr(rpivid_dt_match),
515 +module_platform_driver(rpivid_driver);
517 +MODULE_LICENSE("GPL v2");
518 +MODULE_AUTHOR("John Cox <jc@kynesim.co.uk>");
519 +MODULE_DESCRIPTION("Raspberry Pi HEVC V4L2 driver");
521 +++ b/drivers/staging/media/rpivid/rpivid.h
523 +/* SPDX-License-Identifier: GPL-2.0 */
525 + * Raspberry Pi HEVC driver
527 + * Copyright (C) 2020 Raspberry Pi (Trading) Ltd
529 + * Based on the Cedrus VPU driver, that is:
531 + * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
532 + * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
533 + * Copyright (C) 2018 Bootlin
539 +#include <linux/clk.h>
540 +#include <linux/platform_device.h>
541 +#include <media/v4l2-ctrls.h>
542 +#include <media/v4l2-device.h>
543 +#include <media/v4l2-mem2mem.h>
544 +#include <media/videobuf2-v4l2.h>
545 +#include <media/videobuf2-dma-contig.h>
547 +#define OPT_DEBUG_POLL_IRQ 0
549 +#define RPIVID_NAME "rpivid"
551 +#define RPIVID_CAPABILITY_UNTILED BIT(0)
552 +#define RPIVID_CAPABILITY_H265_DEC BIT(1)
554 +#define RPIVID_QUIRK_NO_DMA_OFFSET BIT(0)
556 +#define RPIVID_SRC_PIXELFORMAT_DEFAULT V4L2_PIX_FMT_HEVC_SLICE
558 +enum rpivid_irq_status {
564 +struct rpivid_control {
565 + struct v4l2_ctrl_config cfg;
566 + unsigned char required:1;
569 +struct rpivid_h265_run {
570 + const struct v4l2_ctrl_hevc_sps *sps;
571 + const struct v4l2_ctrl_hevc_pps *pps;
572 + const struct v4l2_ctrl_hevc_slice_params *slice_params;
573 + const struct v4l2_ctrl_hevc_scaling_matrix *scaling_matrix;
577 + struct vb2_v4l2_buffer *src;
578 + struct vb2_v4l2_buffer *dst;
580 + struct rpivid_h265_run h265;
583 +struct rpivid_buffer {
584 + struct v4l2_m2m_buffer m2m_buf;
587 +struct rpivid_dec_state;
588 +struct rpivid_dec_env;
589 +#define RPIVID_DEC_ENV_COUNT 3
591 +struct rpivid_gptr {
595 + unsigned long attrs;
599 +typedef void (*rpivid_irq_callback)(struct rpivid_dev *dev, void *ctx);
601 +struct rpivid_q_aux;
602 +#define RPIVID_AUX_ENT_COUNT VB2_MAX_FRAME
604 +#define RPIVID_P2BUF_COUNT 2
608 + struct rpivid_dev *dev;
610 + struct v4l2_pix_format src_fmt;
611 + struct v4l2_pix_format dst_fmt;
614 + struct v4l2_ctrl_handler hdl;
615 + struct v4l2_ctrl **ctrls;
617 + /* Decode state - stateless decoder my *** */
618 + /* state contains stuff that is only needed in phase0
619 + * it could be held in dec_env but that would be wasteful
621 + struct rpivid_dec_state *state;
622 + struct rpivid_dec_env *dec0;
624 + /* Spinlock protecting dec_free */
625 + spinlock_t dec_lock;
626 + struct rpivid_dec_env *dec_free;
628 + struct rpivid_dec_env *dec_pool;
630 + /* Some of these should be in dev */
631 + struct rpivid_gptr bitbufs[1]; /* Will be 2 */
632 + struct rpivid_gptr cmdbufs[1]; /* Will be 2 */
633 + unsigned int p2idx;
635 + struct rpivid_gptr pu_bufs[RPIVID_P2BUF_COUNT];
636 + struct rpivid_gptr coeff_bufs[RPIVID_P2BUF_COUNT];
638 + /* Spinlock protecting aux_free */
639 + spinlock_t aux_lock;
640 + struct rpivid_q_aux *aux_free;
642 + struct rpivid_q_aux *aux_ents[RPIVID_AUX_ENT_COUNT];
644 + unsigned int colmv_stride;
645 + unsigned int colmv_picsize;
648 +struct rpivid_dec_ops {
649 + void (*setup)(struct rpivid_ctx *ctx, struct rpivid_run *run);
650 + int (*start)(struct rpivid_ctx *ctx);
651 + void (*stop)(struct rpivid_ctx *ctx);
652 + void (*trigger)(struct rpivid_ctx *ctx);
655 +struct rpivid_variant {
656 + unsigned int capabilities;
657 + unsigned int quirks;
658 + unsigned int mod_rate;
661 +struct rpivid_hw_irq_ent;
663 +struct rpivid_hw_irq_ctrl {
664 + /* Spinlock protecting claim and tail */
666 + struct rpivid_hw_irq_ent *claim;
667 + struct rpivid_hw_irq_ent *tail;
669 + /* Ent for pending irq - also prevents sched */
670 + struct rpivid_hw_irq_ent *irq;
671 + /* Non-zero => do not start a new job - outer layer sched pending */
673 + /* Thread CB requested */
678 + struct v4l2_device v4l2_dev;
679 + struct video_device vfd;
680 + struct media_device mdev;
681 + struct media_pad pad[2];
682 + struct platform_device *pdev;
683 + struct device *dev;
684 + struct v4l2_m2m_dev *m2m_dev;
685 + struct rpivid_dec_ops *dec_ops;
687 + /* Device file mutex */
688 + struct mutex dev_mutex;
690 + void __iomem *base_irq;
691 + void __iomem *base_h265;
695 + struct rpivid_hw_irq_ctrl ic_active1;
696 + struct rpivid_hw_irq_ctrl ic_active2;
699 +extern struct rpivid_dec_ops rpivid_dec_ops_h265;
701 +void *rpivid_find_control_data(struct rpivid_ctx *ctx, u32 id);
705 +++ b/drivers/staging/media/rpivid/rpivid_dec.c
707 +// SPDX-License-Identifier: GPL-2.0
709 + * Raspberry Pi HEVC driver
711 + * Copyright (C) 2020 Raspberry Pi (Trading) Ltd
713 + * Based on the Cedrus VPU driver, that is:
715 + * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
716 + * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
717 + * Copyright (C) 2018 Bootlin
720 +#include <media/v4l2-device.h>
721 +#include <media/v4l2-ioctl.h>
722 +#include <media/v4l2-event.h>
723 +#include <media/v4l2-mem2mem.h>
726 +#include "rpivid_dec.h"
728 +void rpivid_device_run(void *priv)
730 + struct rpivid_ctx *ctx = priv;
731 + struct rpivid_dev *dev = ctx->dev;
732 + struct rpivid_run run = {};
733 + struct media_request *src_req;
735 + run.src = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
736 + run.dst = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
738 + if (!run.src || !run.dst) {
739 + v4l2_err(&dev->v4l2_dev, "%s: Missing buffer: src=%p, dst=%p\n",
740 + __func__, run.src, run.dst);
741 + /* We are stuffed - this probably won't dig us out of our
742 + * current situation but it is better than nothing
744 + v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx,
745 + VB2_BUF_STATE_ERROR);
749 + /* Apply request(s) controls if needed. */
750 + src_req = run.src->vb2_buf.req_obj.req;
753 + v4l2_ctrl_request_setup(src_req, &ctx->hdl);
755 + switch (ctx->src_fmt.pixelformat) {
756 + case V4L2_PIX_FMT_HEVC_SLICE:
758 + rpivid_find_control_data(ctx,
759 + V4L2_CID_MPEG_VIDEO_HEVC_SPS);
761 + rpivid_find_control_data(ctx,
762 + V4L2_CID_MPEG_VIDEO_HEVC_PPS);
763 + run.h265.slice_params =
764 + rpivid_find_control_data(ctx,
765 + V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS);
766 + run.h265.scaling_matrix =
767 + rpivid_find_control_data(ctx,
768 + V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX);
775 + v4l2_m2m_buf_copy_metadata(run.src, run.dst, true);
777 + dev->dec_ops->setup(ctx, &run);
779 + /* Complete request(s) controls if needed. */
782 + v4l2_ctrl_request_complete(src_req, &ctx->hdl);
784 + dev->dec_ops->trigger(ctx);
787 +++ b/drivers/staging/media/rpivid/rpivid_dec.h
789 +/* SPDX-License-Identifier: GPL-2.0 */
791 + * Raspberry Pi HEVC driver
793 + * Copyright (C) 2020 Raspberry Pi (Trading) Ltd
795 + * Based on the Cedrus VPU driver, that is:
797 + * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
798 + * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
799 + * Copyright (C) 2018 Bootlin
802 +#ifndef _RPIVID_DEC_H_
803 +#define _RPIVID_DEC_H_
805 +void rpivid_device_run(void *priv);
809 +++ b/drivers/staging/media/rpivid/rpivid_h265.c
811 +// SPDX-License-Identifier: GPL-2.0-or-later
813 + * Raspberry Pi HEVC driver
815 + * Copyright (C) 2020 Raspberry Pi (Trading) Ltd
817 + * Based on the Cedrus VPU driver, that is:
819 + * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
820 + * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
821 + * Copyright (C) 2018 Bootlin
824 +#include <linux/delay.h>
825 +#include <linux/types.h>
827 +#include <media/videobuf2-dma-contig.h>
830 +#include "rpivid_hw.h"
832 +#define DEBUG_TRACE_P1_CMD 0
833 +#define DEBUG_TRACE_EXECUTION 0
835 +#if DEBUG_TRACE_EXECUTION
836 +#define xtrace_in(dev_, de_)\
837 + v4l2_info(&(dev_)->v4l2_dev, "%s[%d]: in\n", __func__,\
838 + (de_) == NULL ? -1 : (de_)->decode_order)
839 +#define xtrace_ok(dev_, de_)\
840 + v4l2_info(&(dev_)->v4l2_dev, "%s[%d]: ok\n", __func__,\
841 + (de_) == NULL ? -1 : (de_)->decode_order)
842 +#define xtrace_fin(dev_, de_)\
843 + v4l2_info(&(dev_)->v4l2_dev, "%s[%d]: finish\n", __func__,\
844 + (de_) == NULL ? -1 : (de_)->decode_order)
845 +#define xtrace_fail(dev_, de_)\
846 + v4l2_info(&(dev_)->v4l2_dev, "%s[%d]: FAIL\n", __func__,\
847 + (de_) == NULL ? -1 : (de_)->decode_order)
849 +#define xtrace_in(dev_, de_)
850 +#define xtrace_ok(dev_, de_)
851 +#define xtrace_fin(dev_, de_)
852 +#define xtrace_fail(dev_, de_)
855 +enum hevc_slice_type {
861 +enum hevc_layer { L0 = 0, L1 = 1 };
863 +static int gptr_alloc(struct rpivid_dev *const dev, struct rpivid_gptr *gptr,
864 + size_t size, unsigned long attrs)
867 + gptr->attrs = attrs;
869 + gptr->ptr = dma_alloc_attrs(dev->dev, gptr->size, &gptr->addr,
870 + GFP_KERNEL, gptr->attrs);
871 + return !gptr->ptr ? -ENOMEM : 0;
874 +static void gptr_free(struct rpivid_dev *const dev,
875 + struct rpivid_gptr *const gptr)
878 + dma_free_attrs(dev->dev, gptr->size, gptr->ptr, gptr->addr,
886 +/* Realloc but do not copy */
887 +static int gptr_realloc_new(struct rpivid_dev * const dev,
888 + struct rpivid_gptr * const gptr, size_t size)
890 + if (size == gptr->size)
894 + dma_free_attrs(dev->dev, gptr->size, gptr->ptr,
895 + gptr->addr, gptr->attrs);
899 + gptr->ptr = dma_alloc_attrs(dev->dev, gptr->size,
900 + &gptr->addr, GFP_KERNEL, gptr->attrs);
901 + return gptr->ptr ? 0 : -ENOMEM;
904 +/* floor(log2(x)) */
905 +static unsigned int log2_size(size_t x)
907 + unsigned int n = 0;
925 + return (x & ~1) ? n + 1 : n;
928 +static size_t round_up_size(const size_t x)
930 + /* Admit no size < 256 */
931 + const unsigned int n = x < 256 ? 8 : log2_size(x) - 1;
933 + return x >= (3 << n) ? 4 << n : (3 << n);
936 +static size_t next_size(const size_t x)
938 + return round_up_size(x + 1);
941 +#define NUM_SCALING_FACTORS 4064 /* Not a typo = 0xbe0 + 0x400 */
943 +#define AXI_BASE64 0
945 +#define PROB_BACKUP ((20 << 12) + (20 << 6) + (0 << 0))
946 +#define PROB_RELOAD ((20 << 12) + (20 << 0) + (0 << 6))
948 +#define HEVC_MAX_REFS V4L2_HEVC_DPB_ENTRIES_NUM_MAX
950 +//////////////////////////////////////////////////////////////////////////////
957 +struct rpivid_q_aux {
958 + unsigned int refcount;
959 + unsigned int q_index;
960 + struct rpivid_q_aux *next;
961 + struct rpivid_gptr col;
964 +//////////////////////////////////////////////////////////////////////////////
966 +enum rpivid_decode_state {
967 + RPIVID_DECODE_SLICE_START,
968 + RPIVID_DECODE_SLICE_CONTINUE,
969 + RPIVID_DECODE_ERROR_CONTINUE,
970 + RPIVID_DECODE_ERROR_DONE,
971 + RPIVID_DECODE_PHASE1,
975 +struct rpivid_dec_env {
976 + struct rpivid_ctx *ctx;
977 + struct rpivid_dec_env *next;
979 + enum rpivid_decode_state state;
980 + unsigned int decode_order;
981 + int p1_status; /* P1 status - what to realloc */
983 + struct rpivid_dec_env *phase_wait_q_next;
985 + struct rpi_cmd *cmd_fifo;
986 + unsigned int cmd_len, cmd_max;
987 + unsigned int num_slice_msgs;
988 + unsigned int pic_width_in_ctbs_y;
989 + unsigned int pic_height_in_ctbs_y;
990 + unsigned int dpbno_col;
991 + u32 reg_slicestart;
992 + int collocated_from_l0_flag;
993 + unsigned int wpp_entry_x;
994 + unsigned int wpp_entry_y;
1000 + struct vb2_v4l2_buffer *frame_buf; // Detached dest buffer
1001 + unsigned int frame_c_offset;
1002 + unsigned int frame_stride;
1003 + dma_addr_t frame_addr;
1004 + dma_addr_t ref_addrs[16];
1005 + struct rpivid_q_aux *frame_aux;
1006 + struct rpivid_q_aux *col_aux;
1008 + dma_addr_t pu_base_vc;
1009 + dma_addr_t coeff_base_vc;
1013 + struct rpivid_gptr *bit_copy_gptr;
1014 + size_t bit_copy_len;
1015 + struct rpivid_gptr *cmd_copy_gptr;
1017 + u16 slice_msgs[2 * HEVC_MAX_REFS * 8 + 3];
1018 + u8 scaling_factors[NUM_SCALING_FACTORS];
1020 + struct rpivid_hw_irq_ent irq_ent;
1023 +#define member_size(type, member) sizeof(((type *)0)->member)
1025 +struct rpivid_dec_state {
1026 + struct v4l2_ctrl_hevc_sps sps;
1027 + struct v4l2_ctrl_hevc_pps pps;
1029 + // Helper vars & tables derived from sps/pps
1030 + unsigned int log2_ctb_size; /* log2 width of a CTB */
1031 + unsigned int ctb_width; /* Width in CTBs */
1032 + unsigned int ctb_height; /* Height in CTBs */
1033 + unsigned int ctb_size; /* Pic area in CTBs */
1034 + unsigned int num_tile_columns;
1035 + unsigned int num_tile_rows;
1036 + u8 column_width[member_size(struct v4l2_ctrl_hevc_pps,
1037 + column_width_minus1)];
1038 + u8 row_height[member_size(struct v4l2_ctrl_hevc_pps,
1039 + row_height_minus1)];
1043 + int *ctb_addr_rs_to_ts;
1044 + int *ctb_addr_ts_to_rs;
1047 + // Aux starage for DPB
1049 + struct rpivid_q_aux *ref_aux[HEVC_MAX_REFS];
1050 + struct rpivid_q_aux *frame_aux;
1053 + unsigned int slice_idx;
1055 + bool slice_temporal_mvp; /* Slice flag but constant for frame */
1057 + // Temp vars per run - don't actually need to persist
1059 + dma_addr_t src_addr;
1060 + const struct v4l2_ctrl_hevc_slice_params *sh;
1061 + unsigned int nb_refs[2];
1062 + unsigned int slice_qp;
1063 + unsigned int max_num_merge_cand; // 0 if I-slice
1064 + bool dependent_slice_segment_flag;
1067 +static inline int clip_int(const int x, const int lo, const int hi)
1069 + return x < lo ? lo : x > hi ? hi : x;
1072 +//////////////////////////////////////////////////////////////////////////////
1073 +// Phase 1 command and bit FIFOs
1075 +#if DEBUG_TRACE_P1_CMD
1079 +// ???? u16 addr - put in u32
1080 +static int p1_apb_write(struct rpivid_dec_env *const de, const u16 addr,
1083 + if (de->cmd_len == de->cmd_max)
1085 + krealloc(de->cmd_fifo,
1086 + (de->cmd_max *= 2) * sizeof(struct rpi_cmd),
1088 + de->cmd_fifo[de->cmd_len].addr = addr;
1089 + de->cmd_fifo[de->cmd_len].data = data;
1091 +#if DEBUG_TRACE_P1_CMD
1092 + if (++p1_z < 256) {
1093 + v4l2_info(&de->ctx->dev->v4l2_dev, "[%02x] %x %x\n",
1094 + de->cmd_len, addr, data);
1098 + return de->cmd_len++;
1101 +static int ctb_to_tile(unsigned int ctb, unsigned int *bd, int num)
1105 + for (i = 1; ctb >= bd[i]; i++)
1106 + ; // bd[] has num+1 elements; bd[0]=0;
1110 +static int ctb_to_slice_w_h(unsigned int ctb, int ctb_size, int width,
1111 + unsigned int *bd, int num)
1113 + if (ctb < bd[num - 1])
1115 + else if (width % ctb_size)
1116 + return width % ctb_size;
1121 +static void aux_q_free(struct rpivid_ctx *const ctx,
1122 + struct rpivid_q_aux *const aq)
1124 + struct rpivid_dev *const dev = ctx->dev;
1126 + gptr_free(dev, &aq->col);
1130 +static struct rpivid_q_aux *aux_q_alloc(struct rpivid_ctx *const ctx)
1132 + struct rpivid_dev *const dev = ctx->dev;
1133 + struct rpivid_q_aux *const aq = kzalloc(sizeof(*aq), GFP_KERNEL);
1139 + if (gptr_alloc(dev, &aq->col, ctx->colmv_picsize,
1140 + DMA_ATTR_FORCE_CONTIGUOUS | DMA_ATTR_NO_KERNEL_MAPPING))
1150 +static struct rpivid_q_aux *aux_q_new(struct rpivid_ctx *const ctx,
1151 + const unsigned int q_index)
1153 + struct rpivid_q_aux *aq;
1154 + unsigned long lockflags;
1156 + spin_lock_irqsave(&ctx->aux_lock, lockflags);
1157 + aq = ctx->aux_free;
1159 + ctx->aux_free = aq->next;
1163 + spin_unlock_irqrestore(&ctx->aux_lock, lockflags);
1166 + aq = aux_q_alloc(ctx);
1171 + aq->q_index = q_index;
1172 + ctx->aux_ents[q_index] = aq;
1176 +static struct rpivid_q_aux *aux_q_ref(struct rpivid_ctx *const ctx,
1177 + struct rpivid_q_aux *const aq)
1180 + unsigned long lockflags;
1182 + spin_lock_irqsave(&ctx->aux_lock, lockflags);
1186 + spin_unlock_irqrestore(&ctx->aux_lock, lockflags);
1191 +static void aux_q_release(struct rpivid_ctx *const ctx,
1192 + struct rpivid_q_aux **const paq)
1194 + struct rpivid_q_aux *const aq = *paq;
1198 + unsigned long lockflags;
1200 + spin_lock_irqsave(&ctx->aux_lock, lockflags);
1202 + if (--aq->refcount == 0) {
1203 + aq->next = ctx->aux_free;
1204 + ctx->aux_free = aq;
1205 + ctx->aux_ents[aq->q_index] = NULL;
1208 + spin_unlock_irqrestore(&ctx->aux_lock, lockflags);
1212 +static void aux_q_init(struct rpivid_ctx *const ctx)
1214 + spin_lock_init(&ctx->aux_lock);
1215 + ctx->aux_free = NULL;
1218 +static void aux_q_uninit(struct rpivid_ctx *const ctx)
1220 + struct rpivid_q_aux *aq;
1222 + ctx->colmv_picsize = 0;
1223 + ctx->colmv_stride = 0;
1224 + while ((aq = ctx->aux_free) != NULL) {
1225 + ctx->aux_free = aq->next;
1226 + aux_q_free(ctx, aq);
1230 +//////////////////////////////////////////////////////////////////////////////
1233 + * Initialisation process for context variables (CABAC init)
1234 + * see H.265 9.3.2.2
1236 + * N.B. If comparing with FFmpeg note that this h/w uses slightly different
1237 + * offsets to FFmpegs array
1240 +/* Actual number of values */
1241 +#define RPI_PROB_VALS 154U
1242 +/* Rounded up as we copy words */
1243 +#define RPI_PROB_ARRAY_SIZE ((154 + 3) & ~3)
1245 +/* Initialiser values - see tables H.265 9-4 through 9-42 */
1246 +static const u8 prob_init[3][156] = {
1248 + 153, 200, 139, 141, 157, 154, 154, 154, 154, 154, 184, 154, 154,
1249 + 154, 184, 63, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154,
1250 + 154, 154, 154, 153, 138, 138, 111, 141, 94, 138, 182, 154, 154,
1251 + 154, 140, 92, 137, 138, 140, 152, 138, 139, 153, 74, 149, 92,
1252 + 139, 107, 122, 152, 140, 179, 166, 182, 140, 227, 122, 197, 110,
1253 + 110, 124, 125, 140, 153, 125, 127, 140, 109, 111, 143, 127, 111,
1254 + 79, 108, 123, 63, 110, 110, 124, 125, 140, 153, 125, 127, 140,
1255 + 109, 111, 143, 127, 111, 79, 108, 123, 63, 91, 171, 134, 141,
1256 + 138, 153, 136, 167, 152, 152, 139, 139, 111, 111, 125, 110, 110,
1257 + 94, 124, 108, 124, 107, 125, 141, 179, 153, 125, 107, 125, 141,
1258 + 179, 153, 125, 107, 125, 141, 179, 153, 125, 140, 139, 182, 182,
1259 + 152, 136, 152, 136, 153, 136, 139, 111, 136, 139, 111, 0, 0,
1262 + 153, 185, 107, 139, 126, 197, 185, 201, 154, 149, 154, 139, 154,
1263 + 154, 154, 152, 110, 122, 95, 79, 63, 31, 31, 153, 153, 168,
1264 + 140, 198, 79, 124, 138, 94, 153, 111, 149, 107, 167, 154, 154,
1265 + 154, 154, 196, 196, 167, 154, 152, 167, 182, 182, 134, 149, 136,
1266 + 153, 121, 136, 137, 169, 194, 166, 167, 154, 167, 137, 182, 125,
1267 + 110, 94, 110, 95, 79, 125, 111, 110, 78, 110, 111, 111, 95,
1268 + 94, 108, 123, 108, 125, 110, 94, 110, 95, 79, 125, 111, 110,
1269 + 78, 110, 111, 111, 95, 94, 108, 123, 108, 121, 140, 61, 154,
1270 + 107, 167, 91, 122, 107, 167, 139, 139, 155, 154, 139, 153, 139,
1271 + 123, 123, 63, 153, 166, 183, 140, 136, 153, 154, 166, 183, 140,
1272 + 136, 153, 154, 166, 183, 140, 136, 153, 154, 170, 153, 123, 123,
1273 + 107, 121, 107, 121, 167, 151, 183, 140, 151, 183, 140, 0, 0,
1276 + 153, 160, 107, 139, 126, 197, 185, 201, 154, 134, 154, 139, 154,
1277 + 154, 183, 152, 154, 137, 95, 79, 63, 31, 31, 153, 153, 168,
1278 + 169, 198, 79, 224, 167, 122, 153, 111, 149, 92, 167, 154, 154,
1279 + 154, 154, 196, 167, 167, 154, 152, 167, 182, 182, 134, 149, 136,
1280 + 153, 121, 136, 122, 169, 208, 166, 167, 154, 152, 167, 182, 125,
1281 + 110, 124, 110, 95, 94, 125, 111, 111, 79, 125, 126, 111, 111,
1282 + 79, 108, 123, 93, 125, 110, 124, 110, 95, 94, 125, 111, 111,
1283 + 79, 125, 126, 111, 111, 79, 108, 123, 93, 121, 140, 61, 154,
1284 + 107, 167, 91, 107, 107, 167, 139, 139, 170, 154, 139, 153, 139,
1285 + 123, 123, 63, 124, 166, 183, 140, 136, 153, 154, 166, 183, 140,
1286 + 136, 153, 154, 166, 183, 140, 136, 153, 154, 170, 153, 138, 138,
1287 + 122, 121, 122, 121, 167, 151, 183, 140, 151, 183, 140, 0, 0,
1291 +static void write_prob(struct rpivid_dec_env *const de,
1292 + const struct rpivid_dec_state *const s)
1294 + u8 dst[RPI_PROB_ARRAY_SIZE];
1296 + const unsigned int init_type =
1297 + ((s->sh->flags & V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT) != 0 &&
1298 + s->sh->slice_type != HEVC_SLICE_I) ?
1299 + s->sh->slice_type + 1 :
1300 + 2 - s->sh->slice_type;
1301 + const u8 *p = prob_init[init_type];
1302 + const int q = clip_int(s->slice_qp, 0, 51);
1305 + for (i = 0; i < RPI_PROB_VALS; i++) {
1306 + int init_value = p[i];
1307 + int m = (init_value >> 4) * 5 - 45;
1308 + int n = ((init_value & 15) << 3) - 16;
1309 + int pre = 2 * (((m * q) >> 4) + n) - 127;
1313 + pre = 124 + (pre & 1);
1316 + for (i = RPI_PROB_VALS; i != RPI_PROB_ARRAY_SIZE; ++i)
1319 + for (i = 0; i < RPI_PROB_ARRAY_SIZE; i += 4)
1320 + p1_apb_write(de, 0x1000 + i,
1321 + dst[i] + (dst[i + 1] << 8) + (dst[i + 2] << 16) +
1322 + (dst[i + 3] << 24));
1325 +static void write_scaling_factors(struct rpivid_dec_env *const de)
1328 + const u8 *p = (u8 *)de->scaling_factors;
1330 + for (i = 0; i < NUM_SCALING_FACTORS; i += 4, p += 4)
1331 + p1_apb_write(de, 0x2000 + i,
1332 + p[0] + (p[1] << 8) + (p[2] << 16) + (p[3] << 24));
1335 +static inline __u32 dma_to_axi_addr(dma_addr_t a)
1337 + return (__u32)(a >> 6);
1340 +static void write_bitstream(struct rpivid_dec_env *const de,
1341 + const struct rpivid_dec_state *const s)
1343 + // Note that FFmpeg removes emulation prevention bytes, so this is
1344 + // matched in the configuration here.
1345 + // Whether that is the correct behaviour or not is not clear in the
1347 + const int rpi_use_emu = 1;
1348 + unsigned int offset = s->sh->data_bit_offset / 8 + 1;
1349 + const unsigned int len = (s->sh->bit_size + 7) / 8 - offset;
1352 + if (s->src_addr != 0) {
1353 + addr = s->src_addr + offset;
1355 + memcpy(de->bit_copy_gptr->ptr + de->bit_copy_len,
1356 + s->src_buf + offset, len);
1357 + addr = de->bit_copy_gptr->addr + de->bit_copy_len;
1358 + de->bit_copy_len += (len + 63) & ~63;
1360 + offset = addr & 63;
1362 + p1_apb_write(de, RPI_BFBASE, dma_to_axi_addr(addr));
1363 + p1_apb_write(de, RPI_BFNUM, len);
1364 + p1_apb_write(de, RPI_BFCONTROL, offset + (1 << 7)); // Stop
1365 + p1_apb_write(de, RPI_BFCONTROL, offset + (rpi_use_emu << 6));
1368 +//////////////////////////////////////////////////////////////////////////////
1370 +static void write_slice(struct rpivid_dec_env *const de,
1371 + const struct rpivid_dec_state *const s,
1372 + const unsigned int slice_w,
1373 + const unsigned int slice_h)
1375 + u32 u32 = (s->sh->slice_type << 12) +
1377 + V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA) != 0)
1380 + V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA) != 0)
1382 + (slice_w << 17) + (slice_h << 24);
1384 + u32 |= (s->max_num_merge_cand << 0) + (s->nb_refs[L0] << 4) +
1385 + (s->nb_refs[L1] << 8);
1387 + if (s->sh->slice_type == HEVC_SLICE_B)
1388 + u32 |= ((s->sh->flags &
1389 + V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO) != 0)
1391 + p1_apb_write(de, RPI_SLICE, u32);
1394 +//////////////////////////////////////////////////////////////////////////////
1397 +static void new_entry_point(struct rpivid_dec_env *const de,
1398 + const struct rpivid_dec_state *const s,
1400 + const int reset_qp_y, const int ctb_addr_ts)
1402 + int ctb_col = s->ctb_addr_ts_to_rs[ctb_addr_ts] %
1403 + de->pic_width_in_ctbs_y;
1404 + int ctb_row = s->ctb_addr_ts_to_rs[ctb_addr_ts] /
1405 + de->pic_width_in_ctbs_y;
1407 + int tile_x = ctb_to_tile(ctb_col, s->col_bd, s->num_tile_columns);
1408 + int tile_y = ctb_to_tile(ctb_row, s->row_bd, s->num_tile_rows);
1410 + int endx = s->col_bd[tile_x + 1] - 1;
1411 + int endy = s->row_bd[tile_y + 1] - 1;
1413 + u8 slice_w = ctb_to_slice_w_h(ctb_col, 1 << s->log2_ctb_size,
1414 + s->sps.pic_width_in_luma_samples,
1415 + s->col_bd, s->num_tile_columns);
1416 + u8 slice_h = ctb_to_slice_w_h(ctb_row, 1 << s->log2_ctb_size,
1417 + s->sps.pic_height_in_luma_samples,
1418 + s->row_bd, s->num_tile_rows);
1420 + p1_apb_write(de, RPI_TILESTART,
1421 + s->col_bd[tile_x] + (s->row_bd[tile_y] << 16));
1422 + p1_apb_write(de, RPI_TILEEND, endx + (endy << 16));
1425 + p1_apb_write(de, RPI_BEGINTILEEND, endx + (endy << 16));
1427 + write_slice(de, s, slice_w, slice_h);
1430 + unsigned int sps_qp_bd_offset =
1431 + 6 * s->sps.bit_depth_luma_minus8;
1433 + p1_apb_write(de, RPI_QP, sps_qp_bd_offset + s->slice_qp);
1436 + p1_apb_write(de, RPI_MODE,
1437 + (0xFFFF << 0) + (0x0 << 16) +
1438 + ((tile_x == s->num_tile_columns - 1) << 17) +
1439 + ((tile_y == s->num_tile_rows - 1) << 18));
1441 + p1_apb_write(de, RPI_CONTROL, (ctb_col << 0) + (ctb_row << 16));
1444 +//////////////////////////////////////////////////////////////////////////////
1446 +static void new_slice_segment(struct rpivid_dec_env *const de,
1447 + const struct rpivid_dec_state *const s)
1449 + const struct v4l2_ctrl_hevc_sps *const sps = &s->sps;
1450 + const struct v4l2_ctrl_hevc_pps *const pps = &s->pps;
1454 + ((sps->log2_min_luma_coding_block_size_minus3 + 3) << 0) |
1455 + (s->log2_ctb_size << 4) |
1456 + ((sps->log2_min_luma_transform_block_size_minus2 + 2)
1458 + ((sps->log2_min_luma_transform_block_size_minus2 + 2 +
1459 + sps->log2_diff_max_min_luma_transform_block_size)
1461 + ((sps->bit_depth_luma_minus8 + 8) << 16) |
1462 + ((sps->bit_depth_chroma_minus8 + 8) << 20) |
1463 + (sps->max_transform_hierarchy_depth_intra << 24) |
1464 + (sps->max_transform_hierarchy_depth_inter << 28));
1468 + ((sps->pcm_sample_bit_depth_luma_minus1 + 1) << 0) |
1469 + ((sps->pcm_sample_bit_depth_chroma_minus1 + 1) << 4) |
1470 + ((sps->log2_min_pcm_luma_coding_block_size_minus3 + 3)
1472 + ((sps->log2_min_pcm_luma_coding_block_size_minus3 + 3 +
1473 + sps->log2_diff_max_min_pcm_luma_coding_block_size)
1475 + (((sps->flags & V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE) ?
1476 + 0 : sps->chroma_format_idc) << 16) |
1477 + ((!!(sps->flags & V4L2_HEVC_SPS_FLAG_AMP_ENABLED)) << 18) |
1478 + ((!!(sps->flags & V4L2_HEVC_SPS_FLAG_PCM_ENABLED)) << 19) |
1479 + ((!!(sps->flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED))
1482 + V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED))
1487 + ((s->log2_ctb_size - pps->diff_cu_qp_delta_depth) << 0) |
1488 + ((!!(pps->flags & V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED))
1491 + V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED))
1493 + ((!!(pps->flags & V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED))
1496 + V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED))
1498 + (((pps->pps_cb_qp_offset + s->sh->slice_cb_qp_offset) & 255)
1500 + (((pps->pps_cr_qp_offset + s->sh->slice_cr_qp_offset) & 255)
1503 + V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED))
1506 + if ((sps->flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED) != 0)
1507 + write_scaling_factors(de);
1509 + if (!s->dependent_slice_segment_flag) {
1510 + int ctb_col = s->sh->slice_segment_addr %
1511 + de->pic_width_in_ctbs_y;
1512 + int ctb_row = s->sh->slice_segment_addr /
1513 + de->pic_width_in_ctbs_y;
1515 + de->reg_slicestart = (ctb_col << 0) + (ctb_row << 16);
1518 + p1_apb_write(de, RPI_SLICESTART, de->reg_slicestart);
1521 +//////////////////////////////////////////////////////////////////////////////
1524 +static void msg_slice(struct rpivid_dec_env *const de, const u16 msg)
1526 + de->slice_msgs[de->num_slice_msgs++] = msg;
1529 +static void program_slicecmds(struct rpivid_dec_env *const de,
1530 + const int sliceid)
1534 + p1_apb_write(de, RPI_SLICECMDS, de->num_slice_msgs + (sliceid << 8));
1536 + for (i = 0; i < de->num_slice_msgs; i++)
1537 + p1_apb_write(de, 0x4000 + 4 * i, de->slice_msgs[i] & 0xffff);
1540 +// NoBackwardPredictionFlag 8.3.5
1541 +// Simply checks POCs
1542 +static int has_backward(const struct v4l2_hevc_dpb_entry *const dpb,
1543 + const __u8 *const idx, const unsigned int n,
1544 + const unsigned int cur_poc)
1548 + for (i = 0; i < n; ++i) {
1549 + // Compare mod 2^16
1550 + // We only get u16 pocs & 8.3.1 says
1551 + // "The bitstream shall not contain data that result in values
1552 + // of DiffPicOrderCnt( picA, picB ) used in the decoding
1553 + // process that are not in the range of −2^15 to 2^15 − 1,
1555 + if (((cur_poc - dpb[idx[i]].pic_order_cnt[0]) & 0x8000) != 0)
1561 +static void pre_slice_decode(struct rpivid_dec_env *const de,
1562 + const struct rpivid_dec_state *const s)
1564 + const struct v4l2_ctrl_hevc_slice_params *const sh = s->sh;
1565 + int weighted_pred_flag, idx;
1567 + unsigned int collocated_from_l0_flag;
1569 + de->num_slice_msgs = 0;
1572 + if (sh->slice_type == HEVC_SLICE_I)
1574 + if (sh->slice_type == HEVC_SLICE_P)
1576 + if (sh->slice_type == HEVC_SLICE_B)
1579 + cmd_slice |= (s->nb_refs[L0] << 2) | (s->nb_refs[L1] << 6) |
1580 + (s->max_num_merge_cand << 11);
1582 + collocated_from_l0_flag =
1583 + !s->slice_temporal_mvp ||
1584 + sh->slice_type != HEVC_SLICE_B ||
1585 + (sh->flags & V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0);
1586 + cmd_slice |= collocated_from_l0_flag << 14;
1588 + if (sh->slice_type == HEVC_SLICE_P || sh->slice_type == HEVC_SLICE_B) {
1589 + // Flag to say all reference pictures are from the past
1590 + const int no_backward_pred_flag =
1591 + has_backward(sh->dpb, sh->ref_idx_l0, s->nb_refs[L0],
1592 + sh->slice_pic_order_cnt) &&
1593 + has_backward(sh->dpb, sh->ref_idx_l1, s->nb_refs[L1],
1594 + sh->slice_pic_order_cnt);
1595 + cmd_slice |= no_backward_pred_flag << 10;
1596 + msg_slice(de, cmd_slice);
1598 + if (s->slice_temporal_mvp) {
1599 + const __u8 *const rpl = collocated_from_l0_flag ?
1600 + sh->ref_idx_l0 : sh->ref_idx_l1;
1601 + de->dpbno_col = rpl[sh->collocated_ref_idx];
1602 + //v4l2_info(&de->ctx->dev->v4l2_dev,
1603 + // "L0=%d col_ref_idx=%d,
1604 + // dpb_no=%d\n", collocated_from_l0_flag,
1605 + // sh->collocated_ref_idx, de->dpbno_col);
1608 + // Write reference picture descriptions
1609 + weighted_pred_flag =
1610 + sh->slice_type == HEVC_SLICE_P ?
1611 + !!(s->pps.flags & V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED) :
1612 + !!(s->pps.flags & V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED);
1614 + for (idx = 0; idx < s->nb_refs[L0]; ++idx) {
1615 + unsigned int dpb_no = sh->ref_idx_l0[idx];
1616 + //v4l2_info(&de->ctx->dev->v4l2_dev,
1617 + // "L0[%d]=dpb[%d]\n", idx, dpb_no);
1621 + (sh->dpb[dpb_no].rps ==
1622 + V4L2_HEVC_DPB_ENTRY_RPS_LT_CURR ?
1624 + (weighted_pred_flag ? (3 << 5) : 0));
1625 + msg_slice(de, sh->dpb[dpb_no].pic_order_cnt[0]);
1627 + if (weighted_pred_flag) {
1628 + const struct v4l2_hevc_pred_weight_table
1629 + *const w = &sh->pred_weight_table;
1630 + const int luma_weight_denom =
1631 + (1 << w->luma_log2_weight_denom);
1632 + const unsigned int chroma_log2_weight_denom =
1633 + (w->luma_log2_weight_denom +
1634 + w->delta_chroma_log2_weight_denom);
1635 + const int chroma_weight_denom =
1636 + (1 << chroma_log2_weight_denom);
1639 + w->luma_log2_weight_denom |
1640 + (((w->delta_luma_weight_l0[idx] +
1641 + luma_weight_denom) & 0x1ff)
1643 + msg_slice(de, w->luma_offset_l0[idx] & 0xff);
1645 + chroma_log2_weight_denom |
1646 + (((w->delta_chroma_weight_l0[idx][0] +
1647 + chroma_weight_denom) & 0x1ff)
1650 + w->chroma_offset_l0[idx][0] & 0xff);
1652 + chroma_log2_weight_denom |
1653 + (((w->delta_chroma_weight_l0[idx][1] +
1654 + chroma_weight_denom) & 0x1ff)
1657 + w->chroma_offset_l0[idx][1] & 0xff);
1661 + for (idx = 0; idx < s->nb_refs[L1]; ++idx) {
1662 + unsigned int dpb_no = sh->ref_idx_l1[idx];
1663 + //v4l2_info(&de->ctx->dev->v4l2_dev,
1664 + // "L1[%d]=dpb[%d]\n", idx, dpb_no);
1667 + (sh->dpb[dpb_no].rps ==
1668 + V4L2_HEVC_DPB_ENTRY_RPS_LT_CURR ?
1670 + (weighted_pred_flag ? (3 << 5) : 0));
1671 + msg_slice(de, sh->dpb[dpb_no].pic_order_cnt[0]);
1672 + if (weighted_pred_flag) {
1673 + const struct v4l2_hevc_pred_weight_table
1674 + *const w = &sh->pred_weight_table;
1675 + const int luma_weight_denom =
1676 + (1 << w->luma_log2_weight_denom);
1677 + const unsigned int chroma_log2_weight_denom =
1678 + (w->luma_log2_weight_denom +
1679 + w->delta_chroma_log2_weight_denom);
1680 + const int chroma_weight_denom =
1681 + (1 << chroma_log2_weight_denom);
1684 + w->luma_log2_weight_denom |
1685 + (((w->delta_luma_weight_l1[idx] +
1686 + luma_weight_denom) & 0x1ff) << 3));
1687 + msg_slice(de, w->luma_offset_l1[idx] & 0xff);
1689 + chroma_log2_weight_denom |
1690 + (((w->delta_chroma_weight_l1[idx][0] +
1691 + chroma_weight_denom) & 0x1ff)
1694 + w->chroma_offset_l1[idx][0] & 0xff);
1696 + chroma_log2_weight_denom |
1697 + (((w->delta_chroma_weight_l1[idx][1] +
1698 + chroma_weight_denom) & 0x1ff)
1701 + w->chroma_offset_l1[idx][1] & 0xff);
1705 + msg_slice(de, cmd_slice);
1709 + (sh->slice_beta_offset_div2 & 15) |
1710 + ((sh->slice_tc_offset_div2 & 15) << 4) |
1712 + V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED) ?
1715 + V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED) ?
1718 + V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED) ?
1721 + msg_slice(de, ((sh->slice_cr_qp_offset & 31) << 5) +
1722 + (sh->slice_cb_qp_offset & 31)); // CMD_QPOFF
1725 +//////////////////////////////////////////////////////////////////////////////
1726 +// Write STATUS register with expected end CTU address of previous slice
1728 +static void end_previous_slice(struct rpivid_dec_env *const de,
1729 + const struct rpivid_dec_state *const s,
1730 + const int ctb_addr_ts)
1733 + s->ctb_addr_ts_to_rs[ctb_addr_ts - 1] % de->pic_width_in_ctbs_y;
1735 + s->ctb_addr_ts_to_rs[ctb_addr_ts - 1] / de->pic_width_in_ctbs_y;
1737 + p1_apb_write(de, RPI_STATUS, 1 + (last_x << 5) + (last_y << 18));
1740 +static void wpp_pause(struct rpivid_dec_env *const de, int ctb_row)
1742 + p1_apb_write(de, RPI_STATUS, (ctb_row << 18) + 0x25);
1743 + p1_apb_write(de, RPI_TRANSFER, PROB_BACKUP);
1744 + p1_apb_write(de, RPI_MODE,
1745 + ctb_row == de->pic_height_in_ctbs_y - 1 ?
1746 + 0x70000 : 0x30000);
1747 + p1_apb_write(de, RPI_CONTROL, (ctb_row << 16) + 2);
1750 +static void wpp_end_previous_slice(struct rpivid_dec_env *const de,
1751 + const struct rpivid_dec_state *const s,
1754 + int new_x = s->sh->slice_segment_addr % de->pic_width_in_ctbs_y;
1755 + int new_y = s->sh->slice_segment_addr / de->pic_width_in_ctbs_y;
1757 + s->ctb_addr_ts_to_rs[ctb_addr_ts - 1] % de->pic_width_in_ctbs_y;
1759 + s->ctb_addr_ts_to_rs[ctb_addr_ts - 1] / de->pic_width_in_ctbs_y;
1761 + if (de->wpp_entry_x < 2 && (de->wpp_entry_y < new_y || new_x > 2) &&
1762 + de->pic_width_in_ctbs_y > 2)
1763 + wpp_pause(de, last_y);
1764 + p1_apb_write(de, RPI_STATUS, 1 + (last_x << 5) + (last_y << 18));
1765 + if (new_x == 2 || (de->pic_width_in_ctbs_y == 2 &&
1766 + de->wpp_entry_y < new_y))
1767 + p1_apb_write(de, RPI_TRANSFER, PROB_BACKUP);
1770 +//////////////////////////////////////////////////////////////////////////////
1773 +static void wpp_entry_point(struct rpivid_dec_env *const de,
1774 + const struct rpivid_dec_state *const s,
1776 + const int reset_qp_y, const int ctb_addr_ts)
1778 + int ctb_size = 1 << s->log2_ctb_size;
1779 + int ctb_addr_rs = s->ctb_addr_ts_to_rs[ctb_addr_ts];
1781 + int ctb_col = de->wpp_entry_x = ctb_addr_rs % de->pic_width_in_ctbs_y;
1782 + int ctb_row = de->wpp_entry_y = ctb_addr_rs / de->pic_width_in_ctbs_y;
1784 + int endx = de->pic_width_in_ctbs_y - 1;
1785 + int endy = ctb_row;
1787 + u8 slice_w = ctb_to_slice_w_h(ctb_col, ctb_size,
1788 + s->sps.pic_width_in_luma_samples,
1789 + s->col_bd, s->num_tile_columns);
1790 + u8 slice_h = ctb_to_slice_w_h(ctb_row, ctb_size,
1791 + s->sps.pic_height_in_luma_samples,
1792 + s->row_bd, s->num_tile_rows);
1794 + p1_apb_write(de, RPI_TILESTART, 0);
1795 + p1_apb_write(de, RPI_TILEEND, endx + (endy << 16));
1798 + p1_apb_write(de, RPI_BEGINTILEEND, endx + (endy << 16));
1800 + write_slice(de, s, slice_w,
1801 + ctb_row == de->pic_height_in_ctbs_y - 1 ?
1802 + slice_h : ctb_size);
1805 + unsigned int sps_qp_bd_offset =
1806 + 6 * s->sps.bit_depth_luma_minus8;
1808 + p1_apb_write(de, RPI_QP, sps_qp_bd_offset + s->slice_qp);
1811 + p1_apb_write(de, RPI_MODE,
1812 + ctb_row == de->pic_height_in_ctbs_y - 1 ?
1813 + 0x60001 : 0x20001);
1814 + p1_apb_write(de, RPI_CONTROL, (ctb_col << 0) + (ctb_row << 16));
1817 +//////////////////////////////////////////////////////////////////////////////
1820 +static void wpp_decode_slice(struct rpivid_dec_env *const de,
1821 + const struct rpivid_dec_state *const s,
1822 + const struct v4l2_ctrl_hevc_slice_params *sh,
1825 + int i, reset_qp_y = 1;
1826 + int indep = !s->dependent_slice_segment_flag;
1827 + int ctb_col = s->sh->slice_segment_addr % de->pic_width_in_ctbs_y;
1830 + wpp_end_previous_slice(de, s, ctb_addr_ts);
1831 + pre_slice_decode(de, s);
1832 + write_bitstream(de, s);
1833 + if (ctb_addr_ts == 0 || indep || de->pic_width_in_ctbs_y == 1)
1834 + write_prob(de, s);
1835 + else if (ctb_col == 0)
1836 + p1_apb_write(de, RPI_TRANSFER, PROB_RELOAD);
1839 + program_slicecmds(de, s->slice_idx);
1840 + new_slice_segment(de, s);
1841 + wpp_entry_point(de, s, indep, reset_qp_y, ctb_addr_ts);
1843 + for (i = 0; i < s->sh->num_entry_point_offsets; i++) {
1844 + int ctb_addr_rs = s->ctb_addr_ts_to_rs[ctb_addr_ts];
1845 + int ctb_row = ctb_addr_rs / de->pic_width_in_ctbs_y;
1846 + int last_x = de->pic_width_in_ctbs_y - 1;
1848 + if (de->pic_width_in_ctbs_y > 2)
1849 + wpp_pause(de, ctb_row);
1850 + p1_apb_write(de, RPI_STATUS,
1851 + (ctb_row << 18) + (last_x << 5) + 2);
1852 + if (de->pic_width_in_ctbs_y == 2)
1853 + p1_apb_write(de, RPI_TRANSFER, PROB_BACKUP);
1854 + if (de->pic_width_in_ctbs_y == 1)
1855 + write_prob(de, s);
1857 + p1_apb_write(de, RPI_TRANSFER, PROB_RELOAD);
1858 + ctb_addr_ts += s->column_width[0];
1859 + wpp_entry_point(de, s, 0, 1, ctb_addr_ts);
1863 +//////////////////////////////////////////////////////////////////////////////
1866 +static void decode_slice(struct rpivid_dec_env *const de,
1867 + const struct rpivid_dec_state *const s,
1868 + const struct v4l2_ctrl_hevc_slice_params *const sh,
1871 + int i, reset_qp_y;
1874 + end_previous_slice(de, s, ctb_addr_ts);
1876 + pre_slice_decode(de, s);
1877 + write_bitstream(de, s);
1879 +#if DEBUG_TRACE_P1_CMD
1881 + v4l2_info(&de->ctx->dev->v4l2_dev,
1882 + "TS=%d, tile=%d/%d, dss=%d, flags=%#llx\n",
1883 + ctb_addr_ts, s->tile_id[ctb_addr_ts],
1884 + s->tile_id[ctb_addr_ts - 1],
1885 + s->dependent_slice_segment_flag, sh->flags);
1889 + reset_qp_y = ctb_addr_ts == 0 ||
1890 + s->tile_id[ctb_addr_ts] != s->tile_id[ctb_addr_ts - 1] ||
1891 + !s->dependent_slice_segment_flag;
1893 + write_prob(de, s);
1895 + program_slicecmds(de, s->slice_idx);
1896 + new_slice_segment(de, s);
1897 + new_entry_point(de, s, !s->dependent_slice_segment_flag, reset_qp_y,
1900 + for (i = 0; i < s->sh->num_entry_point_offsets; i++) {
1901 + int ctb_addr_rs = s->ctb_addr_ts_to_rs[ctb_addr_ts];
1902 + int ctb_col = ctb_addr_rs % de->pic_width_in_ctbs_y;
1903 + int ctb_row = ctb_addr_rs / de->pic_width_in_ctbs_y;
1904 + int tile_x = ctb_to_tile(ctb_col, s->col_bd,
1905 + s->num_tile_columns - 1);
1907 + ctb_to_tile(ctb_row, s->row_bd, s->num_tile_rows - 1);
1908 + int last_x = s->col_bd[tile_x + 1] - 1;
1909 + int last_y = s->row_bd[tile_y + 1] - 1;
1911 + p1_apb_write(de, RPI_STATUS,
1912 + 2 + (last_x << 5) + (last_y << 18));
1913 + write_prob(de, s);
1914 + ctb_addr_ts += s->column_width[tile_x] * s->row_height[tile_y];
1915 + new_entry_point(de, s, 0, 1, ctb_addr_ts);
1919 +//////////////////////////////////////////////////////////////////////////////
1922 +static void expand_scaling_list(const unsigned int size_id,
1923 + const unsigned int matrix_id, u8 *const dst0,
1924 + const u8 *const src0, uint8_t dc)
1927 + unsigned int x, y;
1929 + // FIXME: matrix_id is unused ?
1930 + switch (size_id) {
1932 + memcpy(dst0, src0, 16);
1935 + memcpy(dst0, src0, 64);
1940 + for (y = 0; y != 16; y++) {
1941 + const u8 *s = src0 + (y >> 1) * 8;
1943 + for (x = 0; x != 8; ++x) {
1953 + for (y = 0; y != 32; y++) {
1954 + const u8 *s = src0 + (y >> 2) * 8;
1956 + for (x = 0; x != 8; ++x) {
1968 +static void populate_scaling_factors(const struct rpivid_run *const run,
1969 + struct rpivid_dec_env *const de,
1970 + const struct rpivid_dec_state *const s)
1972 + const struct v4l2_ctrl_hevc_scaling_matrix *const sl =
1973 + run->h265.scaling_matrix;
1974 + // Array of constants for scaling factors
1975 + static const u32 scaling_factor_offsets[4][6] = {
1976 + // MID0 MID1 MID2 MID3 MID4 MID5
1978 + { 0x0000, 0x0010, 0x0020, 0x0030, 0x0040, 0x0050 },
1980 + { 0x0060, 0x00A0, 0x00E0, 0x0120, 0x0160, 0x01A0 },
1982 + { 0x01E0, 0x02E0, 0x03E0, 0x04E0, 0x05E0, 0x06E0 },
1984 + { 0x07E0, 0x0BE0, 0x0000, 0x0000, 0x0000, 0x0000 }
1989 + for (mid = 0; mid < 6; mid++)
1990 + expand_scaling_list(0, mid,
1991 + de->scaling_factors +
1992 + scaling_factor_offsets[0][mid],
1993 + sl->scaling_list_4x4[mid], 0);
1994 + for (mid = 0; mid < 6; mid++)
1995 + expand_scaling_list(1, mid,
1996 + de->scaling_factors +
1997 + scaling_factor_offsets[1][mid],
1998 + sl->scaling_list_8x8[mid], 0);
1999 + for (mid = 0; mid < 6; mid++)
2000 + expand_scaling_list(2, mid,
2001 + de->scaling_factors +
2002 + scaling_factor_offsets[2][mid],
2003 + sl->scaling_list_16x16[mid],
2004 + sl->scaling_list_dc_coef_16x16[mid]);
2005 + for (mid = 0; mid < 2; mid += 1)
2006 + expand_scaling_list(3, mid,
2007 + de->scaling_factors +
2008 + scaling_factor_offsets[3][mid],
2009 + sl->scaling_list_32x32[mid],
2010 + sl->scaling_list_dc_coef_32x32[mid]);
2013 +static void free_ps_info(struct rpivid_dec_state *const s)
2015 + kfree(s->ctb_addr_rs_to_ts);
2016 + s->ctb_addr_rs_to_ts = NULL;
2017 + kfree(s->ctb_addr_ts_to_rs);
2018 + s->ctb_addr_ts_to_rs = NULL;
2019 + kfree(s->tile_id);
2020 + s->tile_id = NULL;
2028 +static int updated_ps(struct rpivid_dec_state *const s)
2030 + unsigned int ctb_addr_rs;
2031 + int j, x, y, tile_id;
2036 + // Inferred parameters
2037 + s->log2_ctb_size = s->sps.log2_min_luma_coding_block_size_minus3 + 3 +
2038 + s->sps.log2_diff_max_min_luma_coding_block_size;
2040 + s->ctb_width = (s->sps.pic_width_in_luma_samples +
2041 + (1 << s->log2_ctb_size) - 1) >>
2043 + s->ctb_height = (s->sps.pic_height_in_luma_samples +
2044 + (1 << s->log2_ctb_size) - 1) >>
2046 + s->ctb_size = s->ctb_width * s->ctb_height;
2048 + // Inferred parameters
2050 + if (!(s->pps.flags & V4L2_HEVC_PPS_FLAG_TILES_ENABLED)) {
2051 + s->num_tile_columns = 1;
2052 + s->num_tile_rows = 1;
2053 + s->column_width[0] = s->ctb_width;
2054 + s->row_height[0] = s->ctb_height;
2056 + s->num_tile_columns = s->pps.num_tile_columns_minus1 + 1;
2057 + s->num_tile_rows = s->pps.num_tile_rows_minus1 + 1;
2058 + for (i = 0; i < s->num_tile_columns; ++i)
2059 + s->column_width[i] = s->pps.column_width_minus1[i] + 1;
2060 + for (i = 0; i < s->num_tile_rows; ++i)
2061 + s->row_height[i] = s->pps.row_height_minus1[i] + 1;
2064 + s->col_bd = kmalloc((s->num_tile_columns + 1) * sizeof(*s->col_bd),
2066 + s->row_bd = kmalloc((s->num_tile_rows + 1) * sizeof(*s->row_bd),
2070 + for (i = 0; i < s->num_tile_columns; i++)
2071 + s->col_bd[i + 1] = s->col_bd[i] + s->column_width[i];
2074 + for (i = 0; i < s->num_tile_rows; i++)
2075 + s->row_bd[i + 1] = s->row_bd[i] + s->row_height[i];
2077 + s->ctb_addr_rs_to_ts = kmalloc_array(s->ctb_size,
2078 + sizeof(*s->ctb_addr_rs_to_ts),
2080 + s->ctb_addr_ts_to_rs = kmalloc_array(s->ctb_size,
2081 + sizeof(*s->ctb_addr_ts_to_rs),
2083 + s->tile_id = kmalloc_array(s->ctb_size, sizeof(*s->tile_id),
2086 + for (ctb_addr_rs = 0; ctb_addr_rs < s->ctb_size; ctb_addr_rs++) {
2087 + int tb_x = ctb_addr_rs % s->ctb_width;
2088 + int tb_y = ctb_addr_rs / s->ctb_width;
2093 + for (i = 0; i < s->num_tile_columns; i++) {
2094 + if (tb_x < s->col_bd[i + 1]) {
2100 + for (i = 0; i < s->num_tile_rows; i++) {
2101 + if (tb_y < s->row_bd[i + 1]) {
2107 + for (i = 0; i < tile_x; i++)
2108 + val += s->row_height[tile_y] * s->column_width[i];
2109 + for (i = 0; i < tile_y; i++)
2110 + val += s->ctb_width * s->row_height[i];
2112 + val += (tb_y - s->row_bd[tile_y]) * s->column_width[tile_x] +
2113 + tb_x - s->col_bd[tile_x];
2115 + s->ctb_addr_rs_to_ts[ctb_addr_rs] = val;
2116 + s->ctb_addr_ts_to_rs[val] = ctb_addr_rs;
2119 + for (j = 0, tile_id = 0; j < s->num_tile_rows; j++)
2120 + for (i = 0; i < s->num_tile_columns; i++, tile_id++)
2121 + for (y = s->row_bd[j]; y < s->row_bd[j + 1]; y++)
2122 + for (x = s->col_bd[i];
2123 + x < s->col_bd[i + 1];
2125 + s->tile_id[s->ctb_addr_rs_to_ts
2126 + [y * s->ctb_width +
2132 +static int frame_end(struct rpivid_dev *const dev,
2133 + struct rpivid_dec_env *const de,
2134 + const struct rpivid_dec_state *const s)
2136 + const unsigned int last_x = s->col_bd[s->num_tile_columns] - 1;
2137 + const unsigned int last_y = s->row_bd[s->num_tile_rows] - 1;
2140 + if (s->pps.flags & V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED) {
2141 + if (de->wpp_entry_x < 2 && de->pic_width_in_ctbs_y > 2)
2142 + wpp_pause(de, last_y);
2144 + p1_apb_write(de, RPI_STATUS, 1 + (last_x << 5) + (last_y << 18));
2146 + // Copy commands out to dma buf
2147 + cmd_size = de->cmd_len * sizeof(de->cmd_fifo[0]);
2149 + if (!de->cmd_copy_gptr->ptr || cmd_size > de->cmd_copy_gptr->size) {
2150 + size_t cmd_alloc = round_up_size(cmd_size);
2152 + if (gptr_realloc_new(dev, de->cmd_copy_gptr, cmd_alloc)) {
2153 + v4l2_err(&dev->v4l2_dev,
2154 + "Alloc cmd buffer (%d): FAILED\n", cmd_alloc);
2157 + v4l2_info(&dev->v4l2_dev, "Alloc cmd buffer (%d): OK\n",
2161 + memcpy(de->cmd_copy_gptr->ptr, de->cmd_fifo, cmd_size);
2165 +static void setup_colmv(struct rpivid_ctx *const ctx, struct rpivid_run *run,
2166 + struct rpivid_dec_state *const s)
2168 + ctx->colmv_stride = ALIGN(s->sps.pic_width_in_luma_samples, 64);
2169 + ctx->colmv_picsize = ctx->colmv_stride *
2170 + (ALIGN(s->sps.pic_height_in_luma_samples, 64) >> 4);
2173 +// Can be called from irq context
2174 +static struct rpivid_dec_env *dec_env_new(struct rpivid_ctx *const ctx)
2176 + struct rpivid_dec_env *de;
2177 + unsigned long lock_flags;
2179 + spin_lock_irqsave(&ctx->dec_lock, lock_flags);
2181 + de = ctx->dec_free;
2183 + ctx->dec_free = de->next;
2185 + de->state = RPIVID_DECODE_SLICE_START;
2188 + spin_unlock_irqrestore(&ctx->dec_lock, lock_flags);
2192 +// Can be called from irq context
2193 +static void dec_env_delete(struct rpivid_dec_env *const de)
2195 + struct rpivid_ctx * const ctx = de->ctx;
2196 + unsigned long lock_flags;
2198 + aux_q_release(ctx, &de->frame_aux);
2199 + aux_q_release(ctx, &de->col_aux);
2201 + spin_lock_irqsave(&ctx->dec_lock, lock_flags);
2203 + de->state = RPIVID_DECODE_END;
2204 + de->next = ctx->dec_free;
2205 + ctx->dec_free = de;
2207 + spin_unlock_irqrestore(&ctx->dec_lock, lock_flags);
2210 +static void dec_env_uninit(struct rpivid_ctx *const ctx)
2214 + if (ctx->dec_pool) {
2215 + for (i = 0; i != RPIVID_DEC_ENV_COUNT; ++i) {
2216 + struct rpivid_dec_env *const de = ctx->dec_pool + i;
2218 + kfree(de->cmd_fifo);
2221 + kfree(ctx->dec_pool);
2224 + ctx->dec_pool = NULL;
2225 + ctx->dec_free = NULL;
2228 +static int dec_env_init(struct rpivid_ctx *const ctx)
2232 + ctx->dec_pool = kzalloc(sizeof(*ctx->dec_pool) * RPIVID_DEC_ENV_COUNT,
2234 + if (!ctx->dec_pool)
2237 + spin_lock_init(&ctx->dec_lock);
2239 + // Build free chain
2240 + ctx->dec_free = ctx->dec_pool;
2241 + for (i = 0; i != RPIVID_DEC_ENV_COUNT - 1; ++i)
2242 + ctx->dec_pool[i].next = ctx->dec_pool + i + 1;
2244 + // Fill in other bits
2245 + for (i = 0; i != RPIVID_DEC_ENV_COUNT; ++i) {
2246 + struct rpivid_dec_env *const de = ctx->dec_pool + i;
2249 + de->decode_order = i;
2250 + de->cmd_max = 1024;
2251 + de->cmd_fifo = kmalloc_array(de->cmd_max,
2252 + sizeof(struct rpi_cmd),
2254 + if (!de->cmd_fifo)
2261 + dec_env_uninit(ctx);
2265 +// Assume that we get exactly the same DPB for every slice
2266 +// it makes no real sense otherwise
2267 +#if V4L2_HEVC_DPB_ENTRIES_NUM_MAX > 16
2268 +#error HEVC_DPB_ENTRIES > h/w slots
2271 +static u32 mk_config2(const struct rpivid_dec_state *const s)
2273 + const struct v4l2_ctrl_hevc_sps *const sps = &s->sps;
2274 + const struct v4l2_ctrl_hevc_pps *const pps = &s->pps;
2277 + c = (sps->bit_depth_luma_minus8 + 8) << 0;
2279 + c |= (sps->bit_depth_chroma_minus8 + 8) << 4;
2281 + if (sps->bit_depth_luma_minus8)
2284 + if (sps->bit_depth_chroma_minus8)
2286 + c |= s->log2_ctb_size << 10;
2287 + if (pps->flags & V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED)
2289 + if (sps->flags & V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED)
2291 + if (sps->flags & V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED)
2292 + c |= BIT(15); /* Write motion vectors to external memory */
2293 + c |= (pps->log2_parallel_merge_level_minus2 + 2) << 16;
2294 + if (s->slice_temporal_mvp)
2296 + if (sps->flags & V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED)
2298 + c |= (pps->pps_cb_qp_offset & 31) << 21;
2299 + c |= (pps->pps_cr_qp_offset & 31) << 26;
2303 +static void rpivid_h265_setup(struct rpivid_ctx *ctx, struct rpivid_run *run)
2305 + struct rpivid_dev *const dev = ctx->dev;
2306 + const struct v4l2_ctrl_hevc_slice_params *const sh =
2307 + run->h265.slice_params;
2308 + const struct v4l2_hevc_pred_weight_table *pred_weight_table;
2309 + struct rpivid_q_aux *dpb_q_aux[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
2310 + struct rpivid_dec_state *const s = ctx->state;
2311 + struct vb2_queue *vq;
2312 + struct rpivid_dec_env *de;
2316 + bool slice_temporal_mvp;
2318 + pred_weight_table = &sh->pred_weight_table;
2321 + ((run->src->flags & V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF) == 0);
2324 + slice_temporal_mvp = (sh->flags &
2325 + V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED);
2327 + if (de && de->state != RPIVID_DECODE_END) {
2330 + switch (de->state) {
2331 + case RPIVID_DECODE_SLICE_CONTINUE:
2335 + v4l2_err(&dev->v4l2_dev, "%s: Unexpected state: %d\n",
2336 + __func__, de->state);
2338 + case RPIVID_DECODE_ERROR_CONTINUE:
2339 + // Uncleared error - fail now
2343 + if (s->slice_temporal_mvp != slice_temporal_mvp) {
2344 + v4l2_warn(&dev->v4l2_dev,
2345 + "Slice Temporal MVP non-constant\n");
2350 + unsigned int ctb_size_y;
2351 + bool sps_changed = false;
2353 + if (memcmp(&s->sps, run->h265.sps, sizeof(s->sps)) != 0) {
2355 + v4l2_info(&dev->v4l2_dev, "SPS changed\n");
2356 + memcpy(&s->sps, run->h265.sps, sizeof(s->sps));
2357 + sps_changed = true;
2359 + if (sps_changed ||
2360 + memcmp(&s->pps, run->h265.pps, sizeof(s->pps)) != 0) {
2362 + v4l2_info(&dev->v4l2_dev, "PPS changed\n");
2363 + memcpy(&s->pps, run->h265.pps, sizeof(s->pps));
2365 + /* Recalc stuff as required */
2369 + de = dec_env_new(ctx);
2371 + v4l2_err(&dev->v4l2_dev,
2372 + "Failed to find free decode env\n");
2378 + 1U << (s->sps.log2_min_luma_coding_block_size_minus3 +
2380 + s->sps.log2_diff_max_min_luma_coding_block_size);
2382 + de->pic_width_in_ctbs_y =
2383 + (s->sps.pic_width_in_luma_samples + ctb_size_y - 1) /
2384 + ctb_size_y; // 7-15
2385 + de->pic_height_in_ctbs_y =
2386 + (s->sps.pic_height_in_luma_samples + ctb_size_y - 1) /
2387 + ctb_size_y; // 7-17
2389 + de->dpbno_col = ~0U;
2391 + de->bit_copy_gptr = ctx->bitbufs + 0;
2392 + de->bit_copy_len = 0;
2393 + de->cmd_copy_gptr = ctx->cmdbufs + 0;
2395 + de->frame_c_offset = ctx->dst_fmt.height * 128;
2396 + de->frame_stride = ctx->dst_fmt.bytesperline * 128;
2398 + vb2_dma_contig_plane_dma_addr(&run->dst->vb2_buf, 0);
2399 + de->frame_aux = NULL;
2401 + if (s->sps.bit_depth_luma_minus8 !=
2402 + s->sps.bit_depth_chroma_minus8) {
2403 + v4l2_warn(&dev->v4l2_dev,
2404 + "Chroma depth (%d) != Luma depth (%d)\n",
2405 + s->sps.bit_depth_chroma_minus8 + 8,
2406 + s->sps.bit_depth_luma_minus8 + 8);
2409 + if (s->sps.bit_depth_luma_minus8 == 0) {
2410 + if (ctx->dst_fmt.pixelformat !=
2411 + V4L2_PIX_FMT_NV12_COL128) {
2412 + v4l2_err(&dev->v4l2_dev,
2413 + "Pixel format %#x != NV12_COL128 for 8-bit output",
2414 + ctx->dst_fmt.pixelformat);
2417 + } else if (s->sps.bit_depth_luma_minus8 == 2) {
2418 + if (ctx->dst_fmt.pixelformat !=
2419 + V4L2_PIX_FMT_NV12_10_COL128) {
2420 + v4l2_err(&dev->v4l2_dev,
2421 + "Pixel format %#x != NV12_10_COL128 for 10-bit output",
2422 + ctx->dst_fmt.pixelformat);
2426 + v4l2_warn(&dev->v4l2_dev,
2427 + "Luma depth (%d) unsupported\n",
2428 + s->sps.bit_depth_luma_minus8 + 8);
2431 + if (run->dst->vb2_buf.num_planes != 1) {
2432 + v4l2_warn(&dev->v4l2_dev, "Capture planes (%d) != 1\n",
2433 + run->dst->vb2_buf.num_planes);
2436 + if (run->dst->planes[0].length <
2437 + ctx->dst_fmt.sizeimage) {
2438 + v4l2_warn(&dev->v4l2_dev,
2439 + "Capture plane[0] length (%d) < sizeimage (%d)\n",
2440 + run->dst->planes[0].length,
2441 + ctx->dst_fmt.sizeimage);
2445 + if (s->sps.pic_width_in_luma_samples > 4096 ||
2446 + s->sps.pic_height_in_luma_samples > 4096) {
2447 + v4l2_warn(&dev->v4l2_dev,
2448 + "Pic dimension (%dx%d) exeeds 4096\n",
2449 + s->sps.pic_width_in_luma_samples,
2450 + s->sps.pic_height_in_luma_samples);
2454 + // Fill in ref planes with our address s.t. if we mess
2455 + // up refs somehow then we still have a valid address
2457 + for (i = 0; i != 16; ++i)
2458 + de->ref_addrs[i] = de->frame_addr;
2461 + * Stash initial temporal_mvp flag
2462 + * This must be the same for all pic slices (7.4.7.1)
2464 + s->slice_temporal_mvp = slice_temporal_mvp;
2466 + // Phase 2 reg pre-calc
2467 + de->rpi_config2 = mk_config2(s);
2468 + de->rpi_framesize = (s->sps.pic_height_in_luma_samples << 16) |
2469 + s->sps.pic_width_in_luma_samples;
2470 + de->rpi_currpoc = sh->slice_pic_order_cnt;
2472 + if (s->sps.flags &
2473 + V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED) {
2474 + setup_colmv(ctx, run, s);
2479 + if (sh->slice_segment_addr != 0) {
2480 + v4l2_warn(&dev->v4l2_dev,
2481 + "New frame but segment_addr=%d\n",
2482 + sh->slice_segment_addr);
2486 + /* Allocate a bitbuf if we need one - don't need one if single
2487 + * slice as we can use the src buf directly
2489 + if (!s->frame_end && !de->bit_copy_gptr->ptr) {
2490 + const size_t wxh = s->sps.pic_width_in_luma_samples *
2491 + s->sps.pic_height_in_luma_samples;
2492 + size_t bits_alloc;
2494 + /* Annex A gives a min compression of 2 @ lvl 3.1
2495 + * (wxh <= 983040) and min 4 thereafter but avoid
2496 + * the odity of 983041 having a lower limit than
2498 + * Multiply by 3/2 for 4:2:0
2500 + bits_alloc = wxh < 983040 ? wxh * 3 / 4 :
2501 + wxh < 983040 * 2 ? 983040 * 3 / 4 :
2503 + bits_alloc = round_up_size(bits_alloc);
2505 + if (gptr_alloc(dev, de->bit_copy_gptr,
2507 + DMA_ATTR_FORCE_CONTIGUOUS) != 0) {
2508 + v4l2_err(&dev->v4l2_dev,
2509 + "Unable to alloc buf (%d) for bit copy\n",
2513 + v4l2_info(&dev->v4l2_dev,
2514 + "Alloc buf (%d) for bit copy OK\n",
2519 + // Pre calc a few things
2523 + vb2_dma_contig_plane_dma_addr(&run->src->vb2_buf, 0);
2524 + s->src_buf = s->src_addr != 0 ? NULL :
2525 + vb2_plane_vaddr(&run->src->vb2_buf, 0);
2526 + if (!s->src_addr && !s->src_buf) {
2527 + v4l2_err(&dev->v4l2_dev, "Failed to map src buffer\n");
2532 + s->slice_qp = 26 + s->pps.init_qp_minus26 + s->sh->slice_qp_delta;
2533 + s->max_num_merge_cand = sh->slice_type == HEVC_SLICE_I ?
2535 + (5 - sh->five_minus_max_num_merge_cand);
2536 + // * SH DSS flag invented by me - but clearly needed
2537 + s->dependent_slice_segment_flag =
2539 + V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT) != 0);
2541 + s->nb_refs[0] = (sh->slice_type == HEVC_SLICE_I) ?
2543 + sh->num_ref_idx_l0_active_minus1 + 1;
2544 + s->nb_refs[1] = (sh->slice_type != HEVC_SLICE_B) ?
2546 + sh->num_ref_idx_l1_active_minus1 + 1;
2548 + if (s->sps.flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED)
2549 + populate_scaling_factors(run, de, s);
2551 + ctb_addr_ts = s->ctb_addr_rs_to_ts[sh->slice_segment_addr];
2553 + if ((s->pps.flags & V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED))
2554 + wpp_decode_slice(de, s, sh, ctb_addr_ts);
2556 + decode_slice(de, s, sh, ctb_addr_ts);
2558 + if (!s->frame_end)
2562 + memset(dpb_q_aux, 0,
2563 + sizeof(*dpb_q_aux) * V4L2_HEVC_DPB_ENTRIES_NUM_MAX);
2565 + * Need Aux ents for all (ref) DPB ents if temporal MV could
2566 + * be enabled for any pic
2567 + * ** At the moment we have aux ents for all pics whether or not
2570 + use_aux = ((s->sps.flags &
2571 + V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED) != 0);
2573 + // Locate ref frames
2574 + // At least in the current implementation this is constant across all
2575 + // slices. If this changes we will need idx mapping code.
2576 + // Uses sh so here rather than trigger
2578 + vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
2581 + v4l2_err(&dev->v4l2_dev, "VQ gone!\n");
2585 + // v4l2_info(&dev->v4l2_dev, "rpivid_h265_end of frame\n");
2586 + if (frame_end(dev, de, s))
2589 + for (i = 0; i < sh->num_active_dpb_entries; ++i) {
2590 + int buffer_index =
2591 + vb2_find_timestamp(vq, sh->dpb[i].timestamp, 0);
2592 + struct vb2_buffer *buf = buffer_index < 0 ?
2594 + vb2_get_buffer(vq, buffer_index);
2597 + v4l2_warn(&dev->v4l2_dev,
2598 + "Missing DPB ent %d, timestamp=%lld, index=%d\n",
2599 + i, (long long)sh->dpb[i].timestamp,
2605 + dpb_q_aux[i] = aux_q_ref(ctx,
2606 + ctx->aux_ents[buffer_index]);
2607 + if (!dpb_q_aux[i])
2608 + v4l2_warn(&dev->v4l2_dev,
2609 + "Missing DPB AUX ent %d index=%d\n",
2613 + de->ref_addrs[i] =
2614 + vb2_dma_contig_plane_dma_addr(buf, 0);
2617 + // Move DPB from temp
2618 + for (i = 0; i != V4L2_HEVC_DPB_ENTRIES_NUM_MAX; ++i) {
2619 + aux_q_release(ctx, &s->ref_aux[i]);
2620 + s->ref_aux[i] = dpb_q_aux[i];
2622 + // Unref the old frame aux too - it is either in the DPB or not
2624 + aux_q_release(ctx, &s->frame_aux);
2627 + // New frame so new aux ent
2628 + // ??? Do we need this if non-ref ??? can we tell
2629 + s->frame_aux = aux_q_new(ctx, run->dst->vb2_buf.index);
2631 + if (!s->frame_aux) {
2632 + v4l2_err(&dev->v4l2_dev,
2633 + "Failed to obtain aux storage for frame\n");
2637 + de->frame_aux = aux_q_ref(ctx, s->frame_aux);
2640 + if (de->dpbno_col != ~0U) {
2641 + if (de->dpbno_col >= sh->num_active_dpb_entries) {
2642 + v4l2_err(&dev->v4l2_dev,
2643 + "Col ref index %d >= %d\n",
2645 + sh->num_active_dpb_entries);
2647 + // Standard requires that the col pic is
2648 + // constant for the duration of the pic
2649 + // (text of collocated_ref_idx in H265-2 2018
2652 + // Spot the collocated ref in passing
2653 + de->col_aux = aux_q_ref(ctx,
2654 + dpb_q_aux[de->dpbno_col]);
2656 + if (!de->col_aux) {
2657 + v4l2_warn(&dev->v4l2_dev,
2658 + "Missing DPB ent for col\n");
2659 + // Probably need to abort if this fails
2660 + // as P2 may explode on bad data
2666 + de->state = RPIVID_DECODE_PHASE1;
2671 + // Actual error reporting happens in Trigger
2672 + de->state = s->frame_end ? RPIVID_DECODE_ERROR_DONE :
2673 + RPIVID_DECODE_ERROR_CONTINUE;
2676 +//////////////////////////////////////////////////////////////////////////////
2677 +// Handle PU and COEFF stream overflow
2680 +// -1 Phase 1 decode error
2682 +// >0 Out of space (bitmask)
2684 +#define STATUS_COEFF_EXHAUSTED 8
2685 +#define STATUS_PU_EXHAUSTED 16
2687 +static int check_status(const struct rpivid_dev *const dev)
2689 + const u32 cfstatus = apb_read(dev, RPI_CFSTATUS);
2690 + const u32 cfnum = apb_read(dev, RPI_CFNUM);
2691 + u32 status = apb_read(dev, RPI_STATUS);
2693 + // Handle PU and COEFF stream overflow
2695 + // this is the definition of successful completion of phase 1
2696 + // it assures that status register is zero and all blocks in each tile
2698 + if (cfstatus == cfnum)
2699 + return 0; //No error
2701 + status &= (STATUS_PU_EXHAUSTED | STATUS_COEFF_EXHAUSTED);
2708 +static void cb_phase2(struct rpivid_dev *const dev, void *v)
2710 + struct rpivid_dec_env *const de = v;
2711 + struct rpivid_ctx *const ctx = de->ctx;
2713 + xtrace_in(dev, de);
2715 + v4l2_m2m_cap_buf_return(dev->m2m_dev, ctx->fh.m2m_ctx, de->frame_buf,
2716 + VB2_BUF_STATE_DONE);
2717 + de->frame_buf = NULL;
2719 + /* Delete de before finish as finish might immediately trigger a reuse
2722 + dec_env_delete(de);
2724 + if (atomic_add_return(-1, &ctx->p2out) >= RPIVID_P2BUF_COUNT - 1) {
2725 + xtrace_fin(dev, de);
2726 + v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx,
2727 + VB2_BUF_STATE_DONE);
2730 + xtrace_ok(dev, de);
2733 +static void phase2_claimed(struct rpivid_dev *const dev, void *v)
2735 + struct rpivid_dec_env *const de = v;
2738 + xtrace_in(dev, de);
2740 + apb_write_vc_addr(dev, RPI_PURBASE, de->pu_base_vc);
2741 + apb_write_vc_len(dev, RPI_PURSTRIDE, de->pu_stride);
2742 + apb_write_vc_addr(dev, RPI_COEFFRBASE, de->coeff_base_vc);
2743 + apb_write_vc_len(dev, RPI_COEFFRSTRIDE, de->coeff_stride);
2745 + apb_write_vc_addr(dev, RPI_OUTYBASE, de->frame_addr);
2746 + apb_write_vc_addr(dev, RPI_OUTCBASE,
2747 + de->frame_addr + de->frame_c_offset);
2748 + apb_write_vc_len(dev, RPI_OUTYSTRIDE, de->frame_stride);
2749 + apb_write_vc_len(dev, RPI_OUTCSTRIDE, de->frame_stride);
2751 + // v4l2_info(&dev->v4l2_dev, "Frame: Y=%llx, C=%llx, Stride=%x\n",
2752 + // de->frame_addr, de->frame_addr + de->frame_c_offset,
2753 + // de->frame_stride);
2755 + for (i = 0; i < 16; i++) {
2756 + // Strides are in fact unused but fill in anyway
2757 + apb_write_vc_addr(dev, 0x9000 + 16 * i, de->ref_addrs[i]);
2758 + apb_write_vc_len(dev, 0x9004 + 16 * i, de->frame_stride);
2759 + apb_write_vc_addr(dev, 0x9008 + 16 * i,
2760 + de->ref_addrs[i] + de->frame_c_offset);
2761 + apb_write_vc_len(dev, 0x900C + 16 * i, de->frame_stride);
2764 + apb_write(dev, RPI_CONFIG2, de->rpi_config2);
2765 + apb_write(dev, RPI_FRAMESIZE, de->rpi_framesize);
2766 + apb_write(dev, RPI_CURRPOC, de->rpi_currpoc);
2767 + // v4l2_info(&dev->v4l2_dev, "Config2=%#x, FrameSize=%#x, POC=%#x\n",
2768 + // de->rpi_config2, de->rpi_framesize, de->rpi_currpoc);
2770 + // collocated reads/writes
2771 + apb_write_vc_len(dev, RPI_COLSTRIDE,
2772 + de->ctx->colmv_stride); // Read vals
2773 + apb_write_vc_len(dev, RPI_MVSTRIDE,
2774 + de->ctx->colmv_stride); // Write vals
2775 + apb_write_vc_addr(dev, RPI_MVBASE,
2776 + !de->frame_aux ? 0 : de->frame_aux->col.addr);
2777 + apb_write_vc_addr(dev, RPI_COLBASE,
2778 + !de->col_aux ? 0 : de->col_aux->col.addr);
2780 + //v4l2_info(&dev->v4l2_dev,
2781 + // "Mv=%llx, Col=%llx, Stride=%x, Buf=%llx->%llx\n",
2782 + // de->rpi_mvbase, de->rpi_colbase, de->ctx->colmv_stride,
2783 + // de->ctx->colmvbuf.addr, de->ctx->colmvbuf.addr +
2784 + // de->ctx->colmvbuf.size);
2786 + rpivid_hw_irq_active2_irq(dev, &de->irq_ent, cb_phase2, de);
2788 + apb_write_final(dev, RPI_NUMROWS, de->pic_height_in_ctbs_y);
2790 + xtrace_ok(dev, de);
2793 +static void phase1_claimed(struct rpivid_dev *const dev, void *v);
2795 +static void phase1_thread(struct rpivid_dev *const dev, void *v)
2797 + struct rpivid_dec_env *const de = v;
2798 + struct rpivid_ctx *const ctx = de->ctx;
2800 + struct rpivid_gptr *const pu_gptr = ctx->pu_bufs + ctx->p2idx;
2801 + struct rpivid_gptr *const coeff_gptr = ctx->coeff_bufs + ctx->p2idx;
2803 + xtrace_in(dev, de);
2805 + if (de->p1_status & STATUS_PU_EXHAUSTED) {
2806 + if (gptr_realloc_new(dev, pu_gptr, next_size(pu_gptr->size))) {
2807 + v4l2_err(&dev->v4l2_dev,
2808 + "%s: PU realloc (%#x) failed\n",
2809 + __func__, pu_gptr->size);
2812 + v4l2_info(&dev->v4l2_dev, "%s: PU realloc (%#x) OK\n",
2813 + __func__, pu_gptr->size);
2816 + if (de->p1_status & STATUS_COEFF_EXHAUSTED) {
2817 + if (gptr_realloc_new(dev, coeff_gptr,
2818 + next_size(coeff_gptr->size))) {
2819 + v4l2_err(&dev->v4l2_dev,
2820 + "%s: Coeff realloc (%#x) failed\n",
2821 + __func__, coeff_gptr->size);
2824 + v4l2_info(&dev->v4l2_dev, "%s: Coeff realloc (%#x) OK\n",
2825 + __func__, coeff_gptr->size);
2828 + phase1_claimed(dev, de);
2829 + xtrace_ok(dev, de);
2833 + dec_env_delete(de);
2834 + xtrace_fin(dev, de);
2835 + v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx,
2836 + VB2_BUF_STATE_ERROR);
2837 + xtrace_fail(dev, de);
2840 +/* Always called in irq context (this is good) */
2841 +static void cb_phase1(struct rpivid_dev *const dev, void *v)
2843 + struct rpivid_dec_env *const de = v;
2844 + struct rpivid_ctx *const ctx = de->ctx;
2846 + xtrace_in(dev, de);
2848 + de->p1_status = check_status(dev);
2849 + if (de->p1_status != 0) {
2850 + v4l2_info(&dev->v4l2_dev, "%s: Post wait: %#x\n",
2851 + __func__, de->p1_status);
2853 + if (de->p1_status < 0)
2856 + /* Need to realloc - push onto a thread rather than IRQ */
2857 + rpivid_hw_irq_active1_thread(dev, &de->irq_ent,
2858 + phase1_thread, de);
2862 + /* After the frame-buf is detached it must be returned but from
2863 + * this point onward (phase2_claimed, cb_phase2) there are no error
2864 + * paths so the return at the end of cb_phase2 is all that is needed
2866 + de->frame_buf = v4l2_m2m_cap_buf_detach(dev->m2m_dev, ctx->fh.m2m_ctx);
2867 + if (!de->frame_buf) {
2868 + v4l2_err(&dev->v4l2_dev, "%s: No detached buffer\n", __func__);
2873 + (ctx->p2idx + 1 >= RPIVID_P2BUF_COUNT) ? 0 : ctx->p2idx + 1;
2875 + // Enable the next setup if our Q isn't too big
2876 + if (atomic_add_return(1, &ctx->p2out) < RPIVID_P2BUF_COUNT) {
2877 + xtrace_fin(dev, de);
2878 + v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx,
2879 + VB2_BUF_STATE_DONE);
2882 + rpivid_hw_irq_active2_claim(dev, &de->irq_ent, phase2_claimed, de);
2884 + xtrace_ok(dev, de);
2888 + dec_env_delete(de);
2889 + xtrace_fin(dev, de);
2890 + v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx,
2891 + VB2_BUF_STATE_ERROR);
2892 + xtrace_fail(dev, de);
2895 +static void phase1_claimed(struct rpivid_dev *const dev, void *v)
2897 + struct rpivid_dec_env *const de = v;
2898 + struct rpivid_ctx *const ctx = de->ctx;
2900 + const struct rpivid_gptr * const pu_gptr = ctx->pu_bufs + ctx->p2idx;
2901 + const struct rpivid_gptr * const coeff_gptr = ctx->coeff_bufs +
2904 + xtrace_in(dev, de);
2906 + de->pu_base_vc = pu_gptr->addr;
2908 + ALIGN_DOWN(pu_gptr->size / de->pic_height_in_ctbs_y, 64);
2910 + de->coeff_base_vc = coeff_gptr->addr;
2911 + de->coeff_stride =
2912 + ALIGN_DOWN(coeff_gptr->size / de->pic_height_in_ctbs_y, 64);
2914 + apb_write_vc_addr(dev, RPI_PUWBASE, de->pu_base_vc);
2915 + apb_write_vc_len(dev, RPI_PUWSTRIDE, de->pu_stride);
2916 + apb_write_vc_addr(dev, RPI_COEFFWBASE, de->coeff_base_vc);
2917 + apb_write_vc_len(dev, RPI_COEFFWSTRIDE, de->coeff_stride);
2919 + // Trigger command FIFO
2920 + apb_write(dev, RPI_CFNUM, de->cmd_len);
2923 + rpivid_hw_irq_active1_irq(dev, &de->irq_ent, cb_phase1, de);
2925 + // And start the h/w
2926 + apb_write_vc_addr_final(dev, RPI_CFBASE, de->cmd_copy_gptr->addr);
2928 + xtrace_ok(dev, de);
2931 +static void dec_state_delete(struct rpivid_ctx *const ctx)
2934 + struct rpivid_dec_state *const s = ctx->state;
2938 + ctx->state = NULL;
2942 + for (i = 0; i != HEVC_MAX_REFS; ++i)
2943 + aux_q_release(ctx, &s->ref_aux[i]);
2944 + aux_q_release(ctx, &s->frame_aux);
2949 +static void rpivid_h265_stop(struct rpivid_ctx *ctx)
2951 + struct rpivid_dev *const dev = ctx->dev;
2954 + v4l2_info(&dev->v4l2_dev, "%s\n", __func__);
2956 + dec_env_uninit(ctx);
2957 + dec_state_delete(ctx);
2959 + // dec_env & state must be killed before this to release the buffer to
2961 + aux_q_uninit(ctx);
2963 + for (i = 0; i != ARRAY_SIZE(ctx->bitbufs); ++i)
2964 + gptr_free(dev, ctx->bitbufs + i);
2965 + for (i = 0; i != ARRAY_SIZE(ctx->cmdbufs); ++i)
2966 + gptr_free(dev, ctx->cmdbufs + i);
2967 + for (i = 0; i != ARRAY_SIZE(ctx->pu_bufs); ++i)
2968 + gptr_free(dev, ctx->pu_bufs + i);
2969 + for (i = 0; i != ARRAY_SIZE(ctx->coeff_bufs); ++i)
2970 + gptr_free(dev, ctx->coeff_bufs + i);
2973 +static int rpivid_h265_start(struct rpivid_ctx *ctx)
2975 + struct rpivid_dev *const dev = ctx->dev;
2978 + unsigned int w = ctx->dst_fmt.width;
2979 + unsigned int h = ctx->dst_fmt.height;
2982 + size_t coeff_alloc;
2984 + // Generate a sanitised WxH for memory alloc
2985 + // Assume HD if unset
2996 + v4l2_info(&dev->v4l2_dev, "%s: (%dx%d)\n", __func__,
2997 + ctx->dst_fmt.width, ctx->dst_fmt.height);
3000 + ctx->state = kzalloc(sizeof(*ctx->state), GFP_KERNEL);
3001 + if (!ctx->state) {
3002 + v4l2_err(&dev->v4l2_dev, "Failed to allocate decode state\n");
3006 + if (dec_env_init(ctx) != 0) {
3007 + v4l2_err(&dev->v4l2_dev, "Failed to allocate decode envs\n");
3011 + // 16k is plenty for most purposes but we will realloc if needed
3012 + for (i = 0; i != ARRAY_SIZE(ctx->cmdbufs); ++i) {
3013 + if (gptr_alloc(dev, ctx->cmdbufs + i, 0x4000,
3014 + DMA_ATTR_FORCE_CONTIGUOUS))
3018 + // Finger in the air PU & Coeff alloc
3019 + // Will be realloced if too small
3020 + coeff_alloc = round_up_size(wxh);
3021 + pu_alloc = round_up_size(wxh / 4);
3022 + for (i = 0; i != ARRAY_SIZE(ctx->pu_bufs); ++i) {
3023 + // Don't actually need a kernel mapping here
3024 + if (gptr_alloc(dev, ctx->pu_bufs + i, pu_alloc,
3025 + DMA_ATTR_FORCE_CONTIGUOUS |
3026 + DMA_ATTR_NO_KERNEL_MAPPING))
3028 + if (gptr_alloc(dev, ctx->coeff_bufs + i, coeff_alloc,
3029 + DMA_ATTR_FORCE_CONTIGUOUS |
3030 + DMA_ATTR_NO_KERNEL_MAPPING))
3038 + rpivid_h265_stop(ctx);
3042 +static void rpivid_h265_trigger(struct rpivid_ctx *ctx)
3044 + struct rpivid_dev *const dev = ctx->dev;
3045 + struct rpivid_dec_env *const de = ctx->dec0;
3047 + xtrace_in(dev, de);
3049 + switch (!de ? RPIVID_DECODE_ERROR_CONTINUE : de->state) {
3050 + case RPIVID_DECODE_SLICE_START:
3051 + de->state = RPIVID_DECODE_SLICE_CONTINUE;
3053 + case RPIVID_DECODE_SLICE_CONTINUE:
3054 + v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx,
3055 + VB2_BUF_STATE_DONE);
3058 + v4l2_err(&dev->v4l2_dev, "%s: Unexpected state: %d\n", __func__,
3061 + case RPIVID_DECODE_ERROR_DONE:
3063 + dec_env_delete(de);
3065 + case RPIVID_DECODE_ERROR_CONTINUE:
3066 + xtrace_fin(dev, de);
3067 + v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx,
3068 + VB2_BUF_STATE_ERROR);
3070 + case RPIVID_DECODE_PHASE1:
3072 + rpivid_hw_irq_active1_claim(dev, &de->irq_ent, phase1_claimed,
3077 + xtrace_ok(dev, de);
3080 +struct rpivid_dec_ops rpivid_dec_ops_h265 = {
3081 + .setup = rpivid_h265_setup,
3082 + .start = rpivid_h265_start,
3083 + .stop = rpivid_h265_stop,
3084 + .trigger = rpivid_h265_trigger,
3087 +++ b/drivers/staging/media/rpivid/rpivid_hw.c
3089 +// SPDX-License-Identifier: GPL-2.0
3091 + * Raspberry Pi HEVC driver
3093 + * Copyright (C) 2020 Raspberry Pi (Trading) Ltd
3095 + * Based on the Cedrus VPU driver, that is:
3097 + * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
3098 + * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
3099 + * Copyright (C) 2018 Bootlin
3101 +#include <linux/clk.h>
3102 +#include <linux/component.h>
3103 +#include <linux/dma-mapping.h>
3104 +#include <linux/interrupt.h>
3105 +#include <linux/io.h>
3106 +#include <linux/of_reserved_mem.h>
3107 +#include <linux/of_device.h>
3108 +#include <linux/of_platform.h>
3109 +#include <linux/platform_device.h>
3110 +#include <linux/regmap.h>
3111 +#include <linux/reset.h>
3113 +#include <media/videobuf2-core.h>
3114 +#include <media/v4l2-mem2mem.h>
3116 +#include "rpivid.h"
3117 +#include "rpivid_hw.h"
3119 +static void pre_irq(struct rpivid_dev *dev, struct rpivid_hw_irq_ent *ient,
3120 + rpivid_irq_callback cb, void *v,
3121 + struct rpivid_hw_irq_ctrl *ictl)
3123 + unsigned long flags;
3126 + v4l2_err(&dev->v4l2_dev, "Attempt to claim IRQ when already claimed\n");
3133 + // Not sure this lock is actually required
3134 + spin_lock_irqsave(&ictl->lock, flags);
3136 + spin_unlock_irqrestore(&ictl->lock, flags);
3139 +static void sched_claim(struct rpivid_dev * const dev,
3140 + struct rpivid_hw_irq_ctrl * const ictl)
3143 + struct rpivid_hw_irq_ent *ient = NULL;
3144 + unsigned long flags;
3146 + spin_lock_irqsave(&ictl->lock, flags);
3148 + if (--ictl->no_sched <= 0) {
3149 + ient = ictl->claim;
3150 + if (!ictl->irq && ient) {
3151 + ictl->claim = ient->next;
3152 + ictl->no_sched = 1;
3156 + spin_unlock_irqrestore(&ictl->lock, flags);
3161 + ient->cb(dev, ient->v);
3165 +/* Should only ever be called from its own IRQ cb so no lock required */
3166 +static void pre_thread(struct rpivid_dev *dev,
3167 + struct rpivid_hw_irq_ent *ient,
3168 + rpivid_irq_callback cb, void *v,
3169 + struct rpivid_hw_irq_ctrl *ictl)
3174 + ictl->thread_reqed = true;
3178 +// Called in irq context
3179 +static void do_irq(struct rpivid_dev * const dev,
3180 + struct rpivid_hw_irq_ctrl * const ictl)
3182 + struct rpivid_hw_irq_ent *ient;
3183 + unsigned long flags;
3185 + spin_lock_irqsave(&ictl->lock, flags);
3191 + spin_unlock_irqrestore(&ictl->lock, flags);
3194 + ient->cb(dev, ient->v);
3196 + sched_claim(dev, ictl);
3200 +static void do_claim(struct rpivid_dev * const dev,
3201 + struct rpivid_hw_irq_ent *ient,
3202 + const rpivid_irq_callback cb, void * const v,
3203 + struct rpivid_hw_irq_ctrl * const ictl)
3205 + unsigned long flags;
3207 + ient->next = NULL;
3211 + spin_lock_irqsave(&ictl->lock, flags);
3213 + if (ictl->claim) {
3214 + // If we have a Q then add to end
3215 + ictl->tail->next = ient;
3216 + ictl->tail = ient;
3218 + } else if (ictl->no_sched || ictl->irq) {
3219 + // Empty Q but other activity in progress so Q
3220 + ictl->claim = ient;
3221 + ictl->tail = ient;
3224 + // Nothing else going on - schedule immediately and
3225 + // prevent anything else scheduling claims
3226 + ictl->no_sched = 1;
3229 + spin_unlock_irqrestore(&ictl->lock, flags);
3232 + ient->cb(dev, ient->v);
3234 + sched_claim(dev, ictl);
3238 +static void ictl_init(struct rpivid_hw_irq_ctrl * const ictl)
3240 + spin_lock_init(&ictl->lock);
3241 + ictl->claim = NULL;
3242 + ictl->tail = NULL;
3244 + ictl->no_sched = 0;
3247 +static void ictl_uninit(struct rpivid_hw_irq_ctrl * const ictl)
3252 +#if !OPT_DEBUG_POLL_IRQ
3253 +static irqreturn_t rpivid_irq_irq(int irq, void *data)
3255 + struct rpivid_dev * const dev = data;
3258 + ictrl = irq_read(dev, ARG_IC_ICTRL);
3259 + if (!(ictrl & ARG_IC_ICTRL_ALL_IRQ_MASK)) {
3260 + v4l2_warn(&dev->v4l2_dev, "IRQ but no IRQ bits set\n");
3264 + // Cancel any/all irqs
3265 + irq_write(dev, ARG_IC_ICTRL, ictrl & ~ARG_IC_ICTRL_SET_ZERO_MASK);
3267 + // Service Active2 before Active1 so Phase 1 can transition to Phase 2
3269 + if (ictrl & ARG_IC_ICTRL_ACTIVE2_INT_SET)
3270 + do_irq(dev, &dev->ic_active2);
3271 + if (ictrl & ARG_IC_ICTRL_ACTIVE1_INT_SET)
3272 + do_irq(dev, &dev->ic_active1);
3274 + return dev->ic_active1.thread_reqed || dev->ic_active2.thread_reqed ?
3275 + IRQ_WAKE_THREAD : IRQ_HANDLED;
3278 +static void do_thread(struct rpivid_dev * const dev,
3279 + struct rpivid_hw_irq_ctrl *const ictl)
3281 + unsigned long flags;
3282 + struct rpivid_hw_irq_ent *ient = NULL;
3284 + spin_lock_irqsave(&ictl->lock, flags);
3286 + if (ictl->thread_reqed) {
3288 + ictl->thread_reqed = false;
3292 + spin_unlock_irqrestore(&ictl->lock, flags);
3295 + ient->cb(dev, ient->v);
3297 + sched_claim(dev, ictl);
3301 +static irqreturn_t rpivid_irq_thread(int irq, void *data)
3303 + struct rpivid_dev * const dev = data;
3305 + do_thread(dev, &dev->ic_active1);
3306 + do_thread(dev, &dev->ic_active2);
3308 + return IRQ_HANDLED;
3312 +/* May only be called from Active1 CB
3313 + * IRQs should not be expected until execution continues in the cb
3315 +void rpivid_hw_irq_active1_thread(struct rpivid_dev *dev,
3316 + struct rpivid_hw_irq_ent *ient,
3317 + rpivid_irq_callback thread_cb, void *ctx)
3319 + pre_thread(dev, ient, thread_cb, ctx, &dev->ic_active1);
3322 +void rpivid_hw_irq_active1_claim(struct rpivid_dev *dev,
3323 + struct rpivid_hw_irq_ent *ient,
3324 + rpivid_irq_callback ready_cb, void *ctx)
3326 + do_claim(dev, ient, ready_cb, ctx, &dev->ic_active1);
3329 +void rpivid_hw_irq_active1_irq(struct rpivid_dev *dev,
3330 + struct rpivid_hw_irq_ent *ient,
3331 + rpivid_irq_callback irq_cb, void *ctx)
3333 + pre_irq(dev, ient, irq_cb, ctx, &dev->ic_active1);
3336 +void rpivid_hw_irq_active2_claim(struct rpivid_dev *dev,
3337 + struct rpivid_hw_irq_ent *ient,
3338 + rpivid_irq_callback ready_cb, void *ctx)
3340 + do_claim(dev, ient, ready_cb, ctx, &dev->ic_active2);
3343 +void rpivid_hw_irq_active2_irq(struct rpivid_dev *dev,
3344 + struct rpivid_hw_irq_ent *ient,
3345 + rpivid_irq_callback irq_cb, void *ctx)
3347 + pre_irq(dev, ient, irq_cb, ctx, &dev->ic_active2);
3350 +int rpivid_hw_probe(struct rpivid_dev *dev)
3352 + struct resource *res;
3357 + ictl_init(&dev->ic_active1);
3358 + ictl_init(&dev->ic_active2);
3360 + res = platform_get_resource_byname(dev->pdev, IORESOURCE_MEM, "intc");
3364 + dev->base_irq = devm_ioremap(dev->dev, res->start, resource_size(res));
3365 + if (IS_ERR(dev->base_irq))
3366 + return PTR_ERR(dev->base_irq);
3368 + res = platform_get_resource_byname(dev->pdev, IORESOURCE_MEM, "hevc");
3372 + dev->base_h265 = devm_ioremap(dev->dev, res->start, resource_size(res));
3373 + if (IS_ERR(dev->base_h265))
3374 + return PTR_ERR(dev->base_h265);
3376 + dev->clock = devm_clk_get(&dev->pdev->dev, "hevc");
3377 + if (IS_ERR(dev->clock))
3378 + return PTR_ERR(dev->clock);
3380 + // Disable IRQs & reset anything pending
3382 + ARG_IC_ICTRL_ACTIVE1_EN_SET | ARG_IC_ICTRL_ACTIVE2_EN_SET);
3383 + irq_stat = irq_read(dev, 0);
3384 + irq_write(dev, 0, irq_stat);
3386 +#if !OPT_DEBUG_POLL_IRQ
3387 + irq_dec = platform_get_irq(dev->pdev, 0);
3390 + ret = devm_request_threaded_irq(dev->dev, irq_dec,
3392 + rpivid_irq_thread,
3393 + 0, dev_name(dev->dev), dev);
3395 + dev_err(dev->dev, "Failed to request IRQ - %d\n", ret);
3403 +void rpivid_hw_remove(struct rpivid_dev *dev)
3405 + // IRQ auto freed on unload so no need to do it here
3406 + ictl_uninit(&dev->ic_active1);
3407 + ictl_uninit(&dev->ic_active2);
3411 +++ b/drivers/staging/media/rpivid/rpivid_hw.h
3413 +/* SPDX-License-Identifier: GPL-2.0 */
3415 + * Raspberry Pi HEVC driver
3417 + * Copyright (C) 2020 Raspberry Pi (Trading) Ltd
3419 + * Based on the Cedrus VPU driver, that is:
3421 + * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
3422 + * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
3423 + * Copyright (C) 2018 Bootlin
3426 +#ifndef _RPIVID_HW_H_
3427 +#define _RPIVID_HW_H_
3429 +struct rpivid_hw_irq_ent {
3430 + struct rpivid_hw_irq_ent *next;
3431 + rpivid_irq_callback cb;
3435 +/* Phase 1 Register offsets */
3440 +#define RPI_SLICE 12
3441 +#define RPI_TILESTART 16
3442 +#define RPI_TILEEND 20
3443 +#define RPI_SLICESTART 24
3444 +#define RPI_MODE 28
3445 +#define RPI_LEFT0 32
3446 +#define RPI_LEFT1 36
3447 +#define RPI_LEFT2 40
3448 +#define RPI_LEFT3 44
3450 +#define RPI_CONTROL 52
3451 +#define RPI_STATUS 56
3452 +#define RPI_VERSION 60
3453 +#define RPI_BFBASE 64
3454 +#define RPI_BFNUM 68
3455 +#define RPI_BFCONTROL 72
3456 +#define RPI_BFSTATUS 76
3457 +#define RPI_PUWBASE 80
3458 +#define RPI_PUWSTRIDE 84
3459 +#define RPI_COEFFWBASE 88
3460 +#define RPI_COEFFWSTRIDE 92
3461 +#define RPI_SLICECMDS 96
3462 +#define RPI_BEGINTILEEND 100
3463 +#define RPI_TRANSFER 104
3464 +#define RPI_CFBASE 108
3465 +#define RPI_CFNUM 112
3466 +#define RPI_CFSTATUS 116
3468 +/* Phase 2 Register offsets */
3470 +#define RPI_PURBASE 0x8000
3471 +#define RPI_PURSTRIDE 0x8004
3472 +#define RPI_COEFFRBASE 0x8008
3473 +#define RPI_COEFFRSTRIDE 0x800C
3474 +#define RPI_NUMROWS 0x8010
3475 +#define RPI_CONFIG2 0x8014
3476 +#define RPI_OUTYBASE 0x8018
3477 +#define RPI_OUTYSTRIDE 0x801C
3478 +#define RPI_OUTCBASE 0x8020
3479 +#define RPI_OUTCSTRIDE 0x8024
3480 +#define RPI_STATUS2 0x8028
3481 +#define RPI_FRAMESIZE 0x802C
3482 +#define RPI_MVBASE 0x8030
3483 +#define RPI_MVSTRIDE 0x8034
3484 +#define RPI_COLBASE 0x8038
3485 +#define RPI_COLSTRIDE 0x803C
3486 +#define RPI_CURRPOC 0x8040
3489 + * Write a general register value
3490 + * Order is unimportant
3492 +static inline void apb_write(const struct rpivid_dev * const dev,
3493 + const unsigned int offset, const u32 val)
3495 + writel_relaxed(val, dev->base_h265 + offset);
3498 +/* Write the final register value that actually starts the phase */
3499 +static inline void apb_write_final(const struct rpivid_dev * const dev,
3500 + const unsigned int offset, const u32 val)
3502 + writel(val, dev->base_h265 + offset);
3505 +static inline u32 apb_read(const struct rpivid_dev * const dev,
3506 + const unsigned int offset)
3508 + return readl(dev->base_h265 + offset);
3511 +static inline void irq_write(const struct rpivid_dev * const dev,
3512 + const unsigned int offset, const u32 val)
3514 + writel(val, dev->base_irq + offset);
3517 +static inline u32 irq_read(const struct rpivid_dev * const dev,
3518 + const unsigned int offset)
3520 + return readl(dev->base_irq + offset);
3523 +static inline void apb_write_vc_addr(const struct rpivid_dev * const dev,
3524 + const unsigned int offset,
3525 + const dma_addr_t a)
3527 + apb_write(dev, offset, (u32)(a >> 6));
3530 +static inline void apb_write_vc_addr_final(const struct rpivid_dev * const dev,
3531 + const unsigned int offset,
3532 + const dma_addr_t a)
3534 + apb_write_final(dev, offset, (u32)(a >> 6));
3537 +static inline void apb_write_vc_len(const struct rpivid_dev * const dev,
3538 + const unsigned int offset,
3539 + const unsigned int x)
3541 + apb_write(dev, offset, (x + 63) >> 6);
3544 +/* *ARG_IC_ICTRL - Interrupt control for ARGON Core*
3545 + * Offset (byte space) = 40'h2b10000
3546 + * Physical Address (byte space) = 40'h7eb10000
3547 + * Verilog Macro Address = `ARG_IC_REG_START + `ARGON_INTCTRL_ICTRL
3548 + * Reset Value = 32'b100x100x_100xxxxx_xxxxxxx0_x100x100
3549 + * Access = RW (32-bit only)
3550 + * Interrupt control logic for ARGON Core.
3552 +#define ARG_IC_ICTRL 0
3554 +/* acc=LWC ACTIVE1_INT FIELD ACCESS: LWC
3557 + * This is set and held when an hevc_active1 interrupt edge is detected
3558 + * The polarity of the edge is set by the ACTIVE1_EDGE field
3559 + * Write a 1 to this bit to clear down the latched interrupt
3560 + * The latched interrupt is only enabled out onto the interrupt line if
3561 + * ACTIVE1_EN is set
3562 + * Reset value is *0* decimal.
3564 +#define ARG_IC_ICTRL_ACTIVE1_INT_SET BIT(0)
3566 +/* ACTIVE1_EDGE Sets the polarity of the interrupt edge detection logic
3567 + * This logic detects edges of the hevc_active1 line from the argon core
3568 + * 0 = negedge, 1 = posedge
3569 + * Reset value is *0* decimal.
3571 +#define ARG_IC_ICTRL_ACTIVE1_EDGE_SET BIT(1)
3573 +/* ACTIVE1_EN Enables ACTIVE1_INT out onto the argon interrupt line.
3574 + * If this isn't set, the interrupt logic will work but no interrupt will be
3575 + * set to the interrupt controller
3576 + * Reset value is *1* decimal.
3578 + * [JC] The above appears to be a lie - if unset then b0 is never set
3580 +#define ARG_IC_ICTRL_ACTIVE1_EN_SET BIT(2)
3582 +/* acc=RO ACTIVE1_STATUS FIELD ACCESS: RO
3584 + * The current status of the hevc_active1 signal
3586 +#define ARG_IC_ICTRL_ACTIVE1_STATUS_SET BIT(3)
3588 +/* acc=LWC ACTIVE2_INT FIELD ACCESS: LWC
3591 + * This is set and held when an hevc_active2 interrupt edge is detected
3592 + * The polarity of the edge is set by the ACTIVE2_EDGE field
3593 + * Write a 1 to this bit to clear down the latched interrupt
3594 + * The latched interrupt is only enabled out onto the interrupt line if
3595 + * ACTIVE2_EN is set
3596 + * Reset value is *0* decimal.
3598 +#define ARG_IC_ICTRL_ACTIVE2_INT_SET BIT(4)
3600 +/* ACTIVE2_EDGE Sets the polarity of the interrupt edge detection logic
3601 + * This logic detects edges of the hevc_active2 line from the argon core
3602 + * 0 = negedge, 1 = posedge
3603 + * Reset value is *0* decimal.
3605 +#define ARG_IC_ICTRL_ACTIVE2_EDGE_SET BIT(5)
3607 +/* ACTIVE2_EN Enables ACTIVE2_INT out onto the argon interrupt line.
3608 + * If this isn't set, the interrupt logic will work but no interrupt will be
3609 + * set to the interrupt controller
3610 + * Reset value is *1* decimal.
3612 +#define ARG_IC_ICTRL_ACTIVE2_EN_SET BIT(6)
3614 +/* acc=RO ACTIVE2_STATUS FIELD ACCESS: RO
3616 + * The current status of the hevc_active2 signal
3618 +#define ARG_IC_ICTRL_ACTIVE2_STATUS_SET BIT(7)
3620 +/* TEST_INT Forces the argon int high for test purposes.
3621 + * Reset value is *0* decimal.
3623 +#define ARG_IC_ICTRL_TEST_INT BIT(8)
3624 +#define ARG_IC_ICTRL_SPARE BIT(9)
3626 +/* acc=RO VP9_INTERRUPT_STATUS FIELD ACCESS: RO
3628 + * The current status of the vp9_interrupt signal
3630 +#define ARG_IC_ICTRL_VP9_INTERRUPT_STATUS BIT(10)
3632 +/* AIO_INT_ENABLE 1 = Or the AIO int in with the Argon int so the VPU can see
3634 + * 0 = the AIO int is masked. (It should still be connected to the GIC though).
3636 +#define ARG_IC_ICTRL_AIO_INT_ENABLE BIT(20)
3637 +#define ARG_IC_ICTRL_H264_ACTIVE_INT BIT(21)
3638 +#define ARG_IC_ICTRL_H264_ACTIVE_EDGE BIT(22)
3639 +#define ARG_IC_ICTRL_H264_ACTIVE_EN BIT(23)
3640 +#define ARG_IC_ICTRL_H264_ACTIVE_STATUS BIT(24)
3641 +#define ARG_IC_ICTRL_H264_INTERRUPT_INT BIT(25)
3642 +#define ARG_IC_ICTRL_H264_INTERRUPT_EDGE BIT(26)
3643 +#define ARG_IC_ICTRL_H264_INTERRUPT_EN BIT(27)
3645 +/* acc=RO H264_INTERRUPT_STATUS FIELD ACCESS: RO
3647 + * The current status of the h264_interrupt signal
3649 +#define ARG_IC_ICTRL_H264_INTERRUPT_STATUS BIT(28)
3651 +/* acc=LWC VP9_INTERRUPT_INT FIELD ACCESS: LWC
3654 + * This is set and held when an vp9_interrupt interrupt edge is detected
3655 + * The polarity of the edge is set by the VP9_INTERRUPT_EDGE field
3656 + * Write a 1 to this bit to clear down the latched interrupt
3657 + * The latched interrupt is only enabled out onto the interrupt line if
3658 + * VP9_INTERRUPT_EN is set
3659 + * Reset value is *0* decimal.
3661 +#define ARG_IC_ICTRL_VP9_INTERRUPT_INT BIT(29)
3663 +/* VP9_INTERRUPT_EDGE Sets the polarity of the interrupt edge detection logic
3664 + * This logic detects edges of the vp9_interrupt line from the argon h264 core
3665 + * 0 = negedge, 1 = posedge
3666 + * Reset value is *0* decimal.
3668 +#define ARG_IC_ICTRL_VP9_INTERRUPT_EDGE BIT(30)
3670 +/* VP9_INTERRUPT_EN Enables VP9_INTERRUPT_INT out onto the argon interrupt line.
3671 + * If this isn't set, the interrupt logic will work but no interrupt will be
3672 + * set to the interrupt controller
3673 + * Reset value is *1* decimal.
3675 +#define ARG_IC_ICTRL_VP9_INTERRUPT_EN BIT(31)
3677 +/* Bits 19:12, 11 reserved - read ?, write 0 */
3678 +#define ARG_IC_ICTRL_SET_ZERO_MASK ((0xff << 12) | BIT(11))
3681 +#define ARG_IC_ICTRL_ALL_IRQ_MASK (\
3682 + ARG_IC_ICTRL_VP9_INTERRUPT_INT |\
3683 + ARG_IC_ICTRL_H264_INTERRUPT_INT |\
3684 + ARG_IC_ICTRL_ACTIVE1_INT_SET |\
3685 + ARG_IC_ICTRL_ACTIVE2_INT_SET)
3687 +/* Auto release once all CBs called */
3688 +void rpivid_hw_irq_active1_claim(struct rpivid_dev *dev,
3689 + struct rpivid_hw_irq_ent *ient,
3690 + rpivid_irq_callback ready_cb, void *ctx);
3691 +/* May only be called in claim cb */
3692 +void rpivid_hw_irq_active1_irq(struct rpivid_dev *dev,
3693 + struct rpivid_hw_irq_ent *ient,
3694 + rpivid_irq_callback irq_cb, void *ctx);
3695 +/* May only be called in irq cb */
3696 +void rpivid_hw_irq_active1_thread(struct rpivid_dev *dev,
3697 + struct rpivid_hw_irq_ent *ient,
3698 + rpivid_irq_callback thread_cb, void *ctx);
3700 +/* Auto release once all CBs called */
3701 +void rpivid_hw_irq_active2_claim(struct rpivid_dev *dev,
3702 + struct rpivid_hw_irq_ent *ient,
3703 + rpivid_irq_callback ready_cb, void *ctx);
3704 +/* May only be called in claim cb */
3705 +void rpivid_hw_irq_active2_irq(struct rpivid_dev *dev,
3706 + struct rpivid_hw_irq_ent *ient,
3707 + rpivid_irq_callback irq_cb, void *ctx);
3709 +int rpivid_hw_probe(struct rpivid_dev *dev);
3710 +void rpivid_hw_remove(struct rpivid_dev *dev);
3714 +++ b/drivers/staging/media/rpivid/rpivid_video.c
3716 +// SPDX-License-Identifier: GPL-2.0
3718 + * Raspberry Pi HEVC driver
3720 + * Copyright (C) 2020 Raspberry Pi (Trading) Ltd
3722 + * Based on the Cedrus VPU driver, that is:
3724 + * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
3725 + * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
3726 + * Copyright (C) 2018 Bootlin
3729 +#include <media/videobuf2-dma-contig.h>
3730 +#include <media/v4l2-device.h>
3731 +#include <media/v4l2-ioctl.h>
3732 +#include <media/v4l2-event.h>
3733 +#include <media/v4l2-mem2mem.h>
3735 +#include "rpivid.h"
3736 +#include "rpivid_video.h"
3737 +#include "rpivid_dec.h"
3739 +#define RPIVID_DECODE_SRC BIT(0)
3740 +#define RPIVID_DECODE_DST BIT(1)
3742 +#define RPIVID_MIN_WIDTH 16U
3743 +#define RPIVID_MIN_HEIGHT 16U
3744 +#define RPIVID_MAX_WIDTH 4096U
3745 +#define RPIVID_MAX_HEIGHT 4096U
3747 +static inline struct rpivid_ctx *rpivid_file2ctx(struct file *file)
3749 + return container_of(file->private_data, struct rpivid_ctx, fh);
3752 +/* constrain x to y,y*2 */
3753 +static inline unsigned int constrain2x(unsigned int x, unsigned int y)
3757 + (x > y * 2) ? y : x;
3760 +int rpivid_prepare_src_format(struct v4l2_pix_format *pix_fmt)
3762 + if (pix_fmt->pixelformat != V4L2_PIX_FMT_HEVC_SLICE)
3765 + /* Zero bytes per line for encoded source. */
3766 + pix_fmt->bytesperline = 0;
3767 + /* Choose some minimum size since this can't be 0 */
3768 + pix_fmt->sizeimage = max_t(u32, SZ_1K, pix_fmt->sizeimage);
3769 + pix_fmt->field = V4L2_FIELD_NONE;
3773 +int rpivid_prepare_dst_format(struct v4l2_pix_format *pix_fmt)
3775 + unsigned int width = pix_fmt->width;
3776 + unsigned int height = pix_fmt->height;
3777 + unsigned int sizeimage = pix_fmt->sizeimage;
3778 + unsigned int bytesperline = pix_fmt->bytesperline;
3780 + switch (pix_fmt->pixelformat) {
3781 + /* For column formats set bytesperline to column height (stride2) */
3782 + case V4L2_PIX_FMT_NV12_COL128:
3783 + /* Width rounds up to columns */
3784 + width = ALIGN(min(width, RPIVID_MAX_WIDTH), 128);
3786 + /* 16 aligned height - not sure we even need that */
3787 + height = ALIGN(height, 16);
3789 + * Accept suggested shape if at least min & < 2 * min
3791 + bytesperline = constrain2x(bytesperline, height * 3 / 2);
3794 + * Again allow plausible variation in case added padding is
3797 + sizeimage = constrain2x(sizeimage, bytesperline * width);
3800 + case V4L2_PIX_FMT_NV12_10_COL128:
3801 + /* width in pixels (3 pels = 4 bytes) rounded to 128 byte
3804 + width = ALIGN(((min(width, RPIVID_MAX_WIDTH) + 2) / 3), 32) * 3;
3806 + /* 16-aligned height. */
3807 + height = ALIGN(height, 16);
3810 + * Accept suggested shape if at least min & < 2 * min
3812 + bytesperline = constrain2x(bytesperline, height * 3 / 2);
3815 + * Again allow plausible variation in case added padding is
3818 + sizeimage = constrain2x(sizeimage,
3819 + bytesperline * width * 4 / 3);
3826 + pix_fmt->width = width;
3827 + pix_fmt->height = height;
3829 + pix_fmt->field = V4L2_FIELD_NONE;
3830 + pix_fmt->bytesperline = bytesperline;
3831 + pix_fmt->sizeimage = sizeimage;
3835 +static int rpivid_querycap(struct file *file, void *priv,
3836 + struct v4l2_capability *cap)
3838 + strscpy(cap->driver, RPIVID_NAME, sizeof(cap->driver));
3839 + strscpy(cap->card, RPIVID_NAME, sizeof(cap->card));
3840 + snprintf(cap->bus_info, sizeof(cap->bus_info),
3841 + "platform:%s", RPIVID_NAME);
3846 +static int rpivid_enum_fmt_vid_out(struct file *file, void *priv,
3847 + struct v4l2_fmtdesc *f)
3851 + // H.265 Slice only currently
3852 + if (f->index == 0) {
3853 + f->pixelformat = V4L2_PIX_FMT_HEVC_SLICE;
3860 +static int rpivid_hevc_validate_sps(const struct v4l2_ctrl_hevc_sps * const sps)
3862 + const unsigned int ctb_log2_size_y =
3863 + sps->log2_min_luma_coding_block_size_minus3 + 3 +
3864 + sps->log2_diff_max_min_luma_coding_block_size;
3865 + const unsigned int min_tb_log2_size_y =
3866 + sps->log2_min_luma_transform_block_size_minus2 + 2;
3867 + const unsigned int max_tb_log2_size_y = min_tb_log2_size_y +
3868 + sps->log2_diff_max_min_luma_transform_block_size;
3870 + /* Local limitations */
3871 + if (sps->pic_width_in_luma_samples < 32 ||
3872 + sps->pic_width_in_luma_samples > 4096)
3874 + if (sps->pic_height_in_luma_samples < 32 ||
3875 + sps->pic_height_in_luma_samples > 4096)
3877 + if (!(sps->bit_depth_luma_minus8 == 0 ||
3878 + sps->bit_depth_luma_minus8 == 2))
3880 + if (sps->bit_depth_luma_minus8 != sps->bit_depth_chroma_minus8)
3882 + if (sps->chroma_format_idc != 1)
3885 + /* Limits from H.265 7.4.3.2.1 */
3886 + if (sps->log2_max_pic_order_cnt_lsb_minus4 > 12)
3888 + if (sps->sps_max_dec_pic_buffering_minus1 > 15)
3890 + if (sps->sps_max_num_reorder_pics >
3891 + sps->sps_max_dec_pic_buffering_minus1)
3893 + if (ctb_log2_size_y > 6)
3895 + if (max_tb_log2_size_y > 5)
3897 + if (max_tb_log2_size_y > ctb_log2_size_y)
3899 + if (sps->max_transform_hierarchy_depth_inter >
3900 + (ctb_log2_size_y - min_tb_log2_size_y))
3902 + if (sps->max_transform_hierarchy_depth_intra >
3903 + (ctb_log2_size_y - min_tb_log2_size_y))
3905 + /* Check pcm stuff */
3906 + if (sps->num_short_term_ref_pic_sets > 64)
3908 + if (sps->num_long_term_ref_pics_sps > 32)
3913 +static inline int is_sps_set(const struct v4l2_ctrl_hevc_sps * const sps)
3915 + return sps && sps->pic_width_in_luma_samples != 0;
3918 +static u32 pixelformat_from_sps(const struct v4l2_ctrl_hevc_sps * const sps,
3923 + // Use width 0 as a signifier of unsetness
3924 + if (!is_sps_set(sps)) {
3925 + /* Treat this as an error? For now return both */
3927 + pf = V4L2_PIX_FMT_NV12_COL128;
3928 + else if (index == 1)
3929 + pf = V4L2_PIX_FMT_NV12_10_COL128;
3930 + } else if (index == 0 && rpivid_hevc_validate_sps(sps)) {
3931 + if (sps->bit_depth_luma_minus8 == 0)
3932 + pf = V4L2_PIX_FMT_NV12_COL128;
3933 + else if (sps->bit_depth_luma_minus8 == 2)
3934 + pf = V4L2_PIX_FMT_NV12_10_COL128;
3940 +static struct v4l2_pix_format
3941 +rpivid_hevc_default_dst_fmt(struct rpivid_ctx * const ctx)
3943 + const struct v4l2_ctrl_hevc_sps * const sps =
3944 + rpivid_find_control_data(ctx, V4L2_CID_MPEG_VIDEO_HEVC_SPS);
3945 + struct v4l2_pix_format pix_fmt = {
3946 + .width = sps->pic_width_in_luma_samples,
3947 + .height = sps->pic_height_in_luma_samples,
3948 + .pixelformat = pixelformat_from_sps(sps, 0)
3951 + rpivid_prepare_dst_format(&pix_fmt);
3955 +static u32 rpivid_hevc_get_dst_pixelformat(struct rpivid_ctx * const ctx,
3958 + const struct v4l2_ctrl_hevc_sps * const sps =
3959 + rpivid_find_control_data(ctx, V4L2_CID_MPEG_VIDEO_HEVC_SPS);
3961 + return pixelformat_from_sps(sps, index);
3964 +static int rpivid_enum_fmt_vid_cap(struct file *file, void *priv,
3965 + struct v4l2_fmtdesc *f)
3967 + struct rpivid_ctx * const ctx = rpivid_file2ctx(file);
3969 + const u32 pf = rpivid_hevc_get_dst_pixelformat(ctx, f->index);
3974 + f->pixelformat = pf;
3978 +static int rpivid_g_fmt_vid_cap(struct file *file, void *priv,
3979 + struct v4l2_format *f)
3981 + struct rpivid_ctx *ctx = rpivid_file2ctx(file);
3983 + if (!ctx->dst_fmt_set)
3984 + ctx->dst_fmt = rpivid_hevc_default_dst_fmt(ctx);
3985 + f->fmt.pix = ctx->dst_fmt;
3989 +static int rpivid_g_fmt_vid_out(struct file *file, void *priv,
3990 + struct v4l2_format *f)
3992 + struct rpivid_ctx *ctx = rpivid_file2ctx(file);
3994 + f->fmt.pix = ctx->src_fmt;
3998 +static inline void copy_color(struct v4l2_pix_format *d,
3999 + const struct v4l2_pix_format *s)
4001 + d->colorspace = s->colorspace;
4002 + d->xfer_func = s->xfer_func;
4003 + d->ycbcr_enc = s->ycbcr_enc;
4004 + d->quantization = s->quantization;
4007 +static int rpivid_try_fmt_vid_cap(struct file *file, void *priv,
4008 + struct v4l2_format *f)
4010 + struct rpivid_ctx *ctx = rpivid_file2ctx(file);
4011 + const struct v4l2_ctrl_hevc_sps * const sps =
4012 + rpivid_find_control_data(ctx, V4L2_CID_MPEG_VIDEO_HEVC_SPS);
4016 + /* Reject format types we don't support */
4017 + if (f->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
4020 + for (i = 0; (pixelformat = pixelformat_from_sps(sps, i)) != 0; i++) {
4021 + if (f->fmt.pix.pixelformat == pixelformat)
4025 + // If we can't use requested fmt then set to default
4026 + if (pixelformat == 0) {
4027 + pixelformat = pixelformat_from_sps(sps, 0);
4028 + // If we don't have a default then give up
4029 + if (pixelformat == 0)
4033 + // We don't have any way of finding out colourspace so believe
4034 + // anything we are told - take anything set in src as a default
4035 + if (f->fmt.pix.colorspace == V4L2_COLORSPACE_DEFAULT)
4036 + copy_color(&f->fmt.pix, &ctx->src_fmt);
4038 + f->fmt.pix.pixelformat = pixelformat;
4039 + return rpivid_prepare_dst_format(&f->fmt.pix);
4042 +static int rpivid_try_fmt_vid_out(struct file *file, void *priv,
4043 + struct v4l2_format *f)
4045 + if (f->type != V4L2_BUF_TYPE_VIDEO_OUTPUT)
4048 + if (rpivid_prepare_src_format(&f->fmt.pix)) {
4049 + // Set default src format
4050 + f->fmt.pix.pixelformat = RPIVID_SRC_PIXELFORMAT_DEFAULT;
4051 + rpivid_prepare_src_format(&f->fmt.pix);
4056 +static int rpivid_s_fmt_vid_cap(struct file *file, void *priv,
4057 + struct v4l2_format *f)
4059 + struct rpivid_ctx *ctx = rpivid_file2ctx(file);
4060 + struct vb2_queue *vq;
4063 + vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, f->type);
4064 + if (vb2_is_busy(vq))
4067 + ret = rpivid_try_fmt_vid_cap(file, priv, f);
4071 + ctx->dst_fmt = f->fmt.pix;
4072 + ctx->dst_fmt_set = 1;
4077 +static int rpivid_s_fmt_vid_out(struct file *file, void *priv,
4078 + struct v4l2_format *f)
4080 + struct rpivid_ctx *ctx = rpivid_file2ctx(file);
4081 + struct vb2_queue *vq;
4084 + vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, f->type);
4085 + if (vb2_is_busy(vq))
4088 + ret = rpivid_try_fmt_vid_out(file, priv, f);
4092 + ctx->src_fmt = f->fmt.pix;
4093 + ctx->dst_fmt_set = 0; // Setting src invalidates dst
4095 + vq->subsystem_flags |=
4096 + VB2_V4L2_FL_SUPPORTS_M2M_HOLD_CAPTURE_BUF;
4098 + /* Propagate colorspace information to capture. */
4099 + copy_color(&ctx->dst_fmt, &f->fmt.pix);
4103 +const struct v4l2_ioctl_ops rpivid_ioctl_ops = {
4104 + .vidioc_querycap = rpivid_querycap,
4106 + .vidioc_enum_fmt_vid_cap = rpivid_enum_fmt_vid_cap,
4107 + .vidioc_g_fmt_vid_cap = rpivid_g_fmt_vid_cap,
4108 + .vidioc_try_fmt_vid_cap = rpivid_try_fmt_vid_cap,
4109 + .vidioc_s_fmt_vid_cap = rpivid_s_fmt_vid_cap,
4111 + .vidioc_enum_fmt_vid_out = rpivid_enum_fmt_vid_out,
4112 + .vidioc_g_fmt_vid_out = rpivid_g_fmt_vid_out,
4113 + .vidioc_try_fmt_vid_out = rpivid_try_fmt_vid_out,
4114 + .vidioc_s_fmt_vid_out = rpivid_s_fmt_vid_out,
4116 + .vidioc_reqbufs = v4l2_m2m_ioctl_reqbufs,
4117 + .vidioc_querybuf = v4l2_m2m_ioctl_querybuf,
4118 + .vidioc_qbuf = v4l2_m2m_ioctl_qbuf,
4119 + .vidioc_dqbuf = v4l2_m2m_ioctl_dqbuf,
4120 + .vidioc_prepare_buf = v4l2_m2m_ioctl_prepare_buf,
4121 + .vidioc_create_bufs = v4l2_m2m_ioctl_create_bufs,
4122 + .vidioc_expbuf = v4l2_m2m_ioctl_expbuf,
4124 + .vidioc_streamon = v4l2_m2m_ioctl_streamon,
4125 + .vidioc_streamoff = v4l2_m2m_ioctl_streamoff,
4127 + .vidioc_try_decoder_cmd = v4l2_m2m_ioctl_stateless_try_decoder_cmd,
4128 + .vidioc_decoder_cmd = v4l2_m2m_ioctl_stateless_decoder_cmd,
4130 + .vidioc_subscribe_event = v4l2_ctrl_subscribe_event,
4131 + .vidioc_unsubscribe_event = v4l2_event_unsubscribe,
4134 +static int rpivid_queue_setup(struct vb2_queue *vq, unsigned int *nbufs,
4135 + unsigned int *nplanes, unsigned int sizes[],
4136 + struct device *alloc_devs[])
4138 + struct rpivid_ctx *ctx = vb2_get_drv_priv(vq);
4139 + struct v4l2_pix_format *pix_fmt;
4141 + if (V4L2_TYPE_IS_OUTPUT(vq->type))
4142 + pix_fmt = &ctx->src_fmt;
4144 + pix_fmt = &ctx->dst_fmt;
4147 + if (sizes[0] < pix_fmt->sizeimage)
4150 + sizes[0] = pix_fmt->sizeimage;
4157 +static void rpivid_queue_cleanup(struct vb2_queue *vq, u32 state)
4159 + struct rpivid_ctx *ctx = vb2_get_drv_priv(vq);
4160 + struct vb2_v4l2_buffer *vbuf;
4163 + if (V4L2_TYPE_IS_OUTPUT(vq->type))
4164 + vbuf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
4166 + vbuf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
4171 + v4l2_ctrl_request_complete(vbuf->vb2_buf.req_obj.req,
4173 + v4l2_m2m_buf_done(vbuf, state);
4177 +static int rpivid_buf_out_validate(struct vb2_buffer *vb)
4179 + struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
4181 + vbuf->field = V4L2_FIELD_NONE;
4185 +static int rpivid_buf_prepare(struct vb2_buffer *vb)
4187 + struct vb2_queue *vq = vb->vb2_queue;
4188 + struct rpivid_ctx *ctx = vb2_get_drv_priv(vq);
4189 + struct v4l2_pix_format *pix_fmt;
4191 + if (V4L2_TYPE_IS_OUTPUT(vq->type))
4192 + pix_fmt = &ctx->src_fmt;
4194 + pix_fmt = &ctx->dst_fmt;
4196 + if (vb2_plane_size(vb, 0) < pix_fmt->sizeimage)
4199 + vb2_set_plane_payload(vb, 0, pix_fmt->sizeimage);
4204 +static int rpivid_start_streaming(struct vb2_queue *vq, unsigned int count)
4206 + struct rpivid_ctx *ctx = vb2_get_drv_priv(vq);
4207 + struct rpivid_dev *dev = ctx->dev;
4210 + if (ctx->src_fmt.pixelformat != V4L2_PIX_FMT_HEVC_SLICE)
4213 + if (V4L2_TYPE_IS_OUTPUT(vq->type) && dev->dec_ops->start)
4214 + ret = dev->dec_ops->start(ctx);
4216 + ret = clk_set_rate(dev->clock, 500 * 1000 * 1000);
4218 + dev_err(dev->dev, "Failed to set clock rate\n");
4222 + ret = clk_prepare_enable(dev->clock);
4224 + dev_err(dev->dev, "Failed to enable clock\n");
4228 + rpivid_queue_cleanup(vq, VB2_BUF_STATE_QUEUED);
4233 +static void rpivid_stop_streaming(struct vb2_queue *vq)
4235 + struct rpivid_ctx *ctx = vb2_get_drv_priv(vq);
4236 + struct rpivid_dev *dev = ctx->dev;
4238 + if (V4L2_TYPE_IS_OUTPUT(vq->type) && dev->dec_ops->stop)
4239 + dev->dec_ops->stop(ctx);
4241 + rpivid_queue_cleanup(vq, VB2_BUF_STATE_ERROR);
4243 + clk_disable_unprepare(dev->clock);
4246 +static void rpivid_buf_queue(struct vb2_buffer *vb)
4248 + struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
4249 + struct rpivid_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
4251 + v4l2_m2m_buf_queue(ctx->fh.m2m_ctx, vbuf);
4254 +static void rpivid_buf_request_complete(struct vb2_buffer *vb)
4256 + struct rpivid_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
4258 + v4l2_ctrl_request_complete(vb->req_obj.req, &ctx->hdl);
4261 +static struct vb2_ops rpivid_qops = {
4262 + .queue_setup = rpivid_queue_setup,
4263 + .buf_prepare = rpivid_buf_prepare,
4264 + .buf_queue = rpivid_buf_queue,
4265 + .buf_out_validate = rpivid_buf_out_validate,
4266 + .buf_request_complete = rpivid_buf_request_complete,
4267 + .start_streaming = rpivid_start_streaming,
4268 + .stop_streaming = rpivid_stop_streaming,
4269 + .wait_prepare = vb2_ops_wait_prepare,
4270 + .wait_finish = vb2_ops_wait_finish,
4273 +int rpivid_queue_init(void *priv, struct vb2_queue *src_vq,
4274 + struct vb2_queue *dst_vq)
4276 + struct rpivid_ctx *ctx = priv;
4279 + src_vq->type = V4L2_BUF_TYPE_VIDEO_OUTPUT;
4280 + src_vq->io_modes = VB2_MMAP | VB2_DMABUF;
4281 + src_vq->drv_priv = ctx;
4282 + src_vq->buf_struct_size = sizeof(struct rpivid_buffer);
4283 + src_vq->min_buffers_needed = 1;
4284 + src_vq->ops = &rpivid_qops;
4285 + src_vq->mem_ops = &vb2_dma_contig_memops;
4286 + src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
4287 + src_vq->lock = &ctx->dev->dev_mutex;
4288 + src_vq->dev = ctx->dev->dev;
4289 + src_vq->supports_requests = true;
4290 + src_vq->requires_requests = true;
4292 + ret = vb2_queue_init(src_vq);
4296 + dst_vq->type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
4297 + dst_vq->io_modes = VB2_MMAP | VB2_DMABUF;
4298 + dst_vq->drv_priv = ctx;
4299 + dst_vq->buf_struct_size = sizeof(struct rpivid_buffer);
4300 + dst_vq->min_buffers_needed = 1;
4301 + dst_vq->ops = &rpivid_qops;
4302 + dst_vq->mem_ops = &vb2_dma_contig_memops;
4303 + dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
4304 + dst_vq->lock = &ctx->dev->dev_mutex;
4305 + dst_vq->dev = ctx->dev->dev;
4307 + return vb2_queue_init(dst_vq);
4310 +++ b/drivers/staging/media/rpivid/rpivid_video.h
4312 +/* SPDX-License-Identifier: GPL-2.0 */
4314 + * Raspberry Pi HEVC driver
4316 + * Copyright (C) 2020 Raspberry Pi (Trading) Ltd
4318 + * Based on the Cedrus VPU driver, that is:
4320 + * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
4321 + * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
4322 + * Copyright (C) 2018 Bootlin
4325 +#ifndef _RPIVID_VIDEO_H_
4326 +#define _RPIVID_VIDEO_H_
4328 +struct rpivid_format {
4331 + unsigned int capabilities;
4334 +extern const struct v4l2_ioctl_ops rpivid_ioctl_ops;
4336 +int rpivid_queue_init(void *priv, struct vb2_queue *src_vq,
4337 + struct vb2_queue *dst_vq);
4338 +int rpivid_prepare_src_format(struct v4l2_pix_format *pix_fmt);
4339 +int rpivid_prepare_dst_format(struct v4l2_pix_format *pix_fmt);