bcm27xx: add support for linux v5.15
[openwrt/staging/wigyori.git] / target / linux / bcm27xx / patches-5.15 / 950-0466-media-rpivid-Make-slice-ctrl-dynamic.patch
1 From 4ab81f113bdf1ca8c3b0d53c777885aa33ed27f3 Mon Sep 17 00:00:00 2001
2 From: John Cox <jc@kynesim.co.uk>
3 Date: Thu, 29 Apr 2021 19:17:06 +0100
4 Subject: [PATCH] media: rpivid: Make slice ctrl dynamic
5
6 Allows the user to submit a whole frames worth of slice headers in
7 one lump along with a single bitstream dmabuf for the whole lot.
8 This saves potentially a lot of bitstream copying.
9
10 Signed-off-by: John Cox <jc@kynesim.co.uk>
11 ---
12 drivers/staging/media/rpivid/rpivid.c | 4 +
13 drivers/staging/media/rpivid/rpivid_dec.c | 18 ++-
14 drivers/staging/media/rpivid/rpivid_h265.c | 151 +++++++++++----------
15 3 files changed, 99 insertions(+), 74 deletions(-)
16
17 --- a/drivers/staging/media/rpivid/rpivid.c
18 +++ b/drivers/staging/media/rpivid/rpivid.c
19 @@ -63,7 +63,11 @@ static const struct rpivid_control rpivi
20 },
21 {
22 .cfg = {
23 + .name = "Slice param array",
24 .id = V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS,
25 + .type = V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS,
26 + .flags = V4L2_CTRL_FLAG_DYNAMIC_ARRAY,
27 + .dims = { 0x1000 },
28 },
29 .required = true,
30 },
31 --- a/drivers/staging/media/rpivid/rpivid_dec.c
32 +++ b/drivers/staging/media/rpivid/rpivid_dec.c
33 @@ -46,22 +46,34 @@ void rpivid_device_run(void *priv)
34
35 switch (ctx->src_fmt.pixelformat) {
36 case V4L2_PIX_FMT_HEVC_SLICE:
37 + {
38 + const struct v4l2_ctrl *ctrl;
39 +
40 run.h265.sps =
41 rpivid_find_control_data(ctx,
42 V4L2_CID_MPEG_VIDEO_HEVC_SPS);
43 run.h265.pps =
44 rpivid_find_control_data(ctx,
45 V4L2_CID_MPEG_VIDEO_HEVC_PPS);
46 - run.h265.slice_params =
47 - rpivid_find_control_data(ctx,
48 - V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS);
49 run.h265.dec =
50 rpivid_find_control_data(ctx,
51 V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS);
52 +
53 + ctrl = rpivid_find_ctrl(ctx,
54 + V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS);
55 + if (!ctrl || !ctrl->elems) {
56 + v4l2_err(&dev->v4l2_dev, "%s: Missing slice params\n",
57 + __func__);
58 + goto fail;
59 + }
60 + run.h265.slice_ents = ctrl->elems;
61 + run.h265.slice_params = ctrl->p_cur.p;
62 +
63 run.h265.scaling_matrix =
64 rpivid_find_control_data(ctx,
65 V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX);
66 break;
67 + }
68
69 default:
70 break;
71 --- a/drivers/staging/media/rpivid/rpivid_h265.c
72 +++ b/drivers/staging/media/rpivid/rpivid_h265.c
73 @@ -245,7 +245,6 @@ struct rpivid_dec_state {
74
75 // Slice vars
76 unsigned int slice_idx;
77 - bool frame_end;
78 bool slice_temporal_mvp; /* Slice flag but constant for frame */
79
80 // Temp vars per run - don't actually need to persist
81 @@ -740,7 +739,8 @@ static void new_slice_segment(struct rpi
82 V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED))
83 << 24));
84
85 - if ((sps->flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED) != 0)
86 + if (!s->start_ts &&
87 + (sps->flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED) != 0)
88 write_scaling_factors(de);
89
90 if (!s->dependent_slice_segment_flag) {
91 @@ -1111,7 +1111,8 @@ static int wpp_end_previous_slice(struct
92 * next chunk code simpler
93 */
94 static int wpp_decode_slice(struct rpivid_dec_env *const de,
95 - const struct rpivid_dec_state *const s)
96 + const struct rpivid_dec_state *const s,
97 + bool last_slice)
98 {
99 bool reset_qp_y = true;
100 const bool indep = !s->dependent_slice_segment_flag;
101 @@ -1150,7 +1151,7 @@ static int wpp_decode_slice(struct rpivi
102 0, 0, s->start_ctb_x, s->start_ctb_y,
103 s->slice_qp, slice_reg_const(s));
104
105 - if (s->frame_end) {
106 + if (last_slice) {
107 rv = wpp_entry_fill(de, s, s->ctb_height - 1);
108 if (rv)
109 return rv;
110 @@ -1229,7 +1230,8 @@ static int end_previous_slice(struct rpi
111 }
112
113 static int decode_slice(struct rpivid_dec_env *const de,
114 - const struct rpivid_dec_state *const s)
115 + const struct rpivid_dec_state *const s,
116 + bool last_slice)
117 {
118 bool reset_qp_y;
119 unsigned int tile_x = ctb_to_tile_x(s, s->start_ctb_x);
120 @@ -1275,7 +1277,7 @@ static int decode_slice(struct rpivid_de
121 * now, otherwise this will be done at the start of the next slice
122 * when it will be known where this slice finishes
123 */
124 - if (s->frame_end) {
125 + if (last_slice) {
126 rv = tile_entry_fill(de, s,
127 s->tile_width - 1,
128 s->tile_height - 1);
129 @@ -1670,11 +1672,13 @@ static u32 mk_config2(const struct rpivi
130 static void rpivid_h265_setup(struct rpivid_ctx *ctx, struct rpivid_run *run)
131 {
132 struct rpivid_dev *const dev = ctx->dev;
133 - const struct v4l2_ctrl_hevc_slice_params *const sh =
134 - run->h265.slice_params;
135 const struct v4l2_ctrl_hevc_decode_params *const dec =
136 run->h265.dec;
137 -// const struct v4l2_hevc_pred_weight_table *pred_weight_table;
138 + /* sh0 used where slice header contents should be constant over all
139 + * slices, or first slice of frame
140 + */
141 + const struct v4l2_ctrl_hevc_slice_params *const sh0 =
142 + run->h265.slice_params;
143 struct rpivid_q_aux *dpb_q_aux[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
144 struct rpivid_dec_state *const s = ctx->state;
145 struct vb2_queue *vq;
146 @@ -1684,20 +1688,18 @@ static void rpivid_h265_setup(struct rpi
147 int use_aux;
148 int rv;
149 bool slice_temporal_mvp;
150 + bool frame_end;
151
152 xtrace_in(dev, de);
153 + s->sh = NULL; // Avoid use until in the slice loop
154
155 -// pred_weight_table = &sh->pred_weight_table;
156 -
157 - s->frame_end =
158 + frame_end =
159 ((run->src->flags & V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF) == 0);
160
161 - slice_temporal_mvp = (sh->flags &
162 + slice_temporal_mvp = (sh0->flags &
163 V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED);
164
165 if (de && de->state != RPIVID_DECODE_END) {
166 - ++s->slice_idx;
167 -
168 switch (de->state) {
169 case RPIVID_DECODE_SLICE_CONTINUE:
170 // Expected state
171 @@ -1830,7 +1832,7 @@ static void rpivid_h265_setup(struct rpi
172 de->rpi_config2 = mk_config2(s);
173 de->rpi_framesize = (s->sps.pic_height_in_luma_samples << 16) |
174 s->sps.pic_width_in_luma_samples;
175 - de->rpi_currpoc = sh->slice_pic_order_cnt;
176 + de->rpi_currpoc = sh0->slice_pic_order_cnt;
177
178 if (s->sps.flags &
179 V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED) {
180 @@ -1839,17 +1841,17 @@ static void rpivid_h265_setup(struct rpi
181
182 s->slice_idx = 0;
183
184 - if (sh->slice_segment_addr != 0) {
185 + if (sh0->slice_segment_addr != 0) {
186 v4l2_warn(&dev->v4l2_dev,
187 "New frame but segment_addr=%d\n",
188 - sh->slice_segment_addr);
189 + sh0->slice_segment_addr);
190 goto fail;
191 }
192
193 /* Allocate a bitbuf if we need one - don't need one if single
194 * slice as we can use the src buf directly
195 */
196 - if (!s->frame_end && !de->bit_copy_gptr->ptr) {
197 + if (!frame_end && !de->bit_copy_gptr->ptr) {
198 size_t bits_alloc;
199 bits_alloc = rpivid_bit_buf_size(s->sps.pic_width_in_luma_samples,
200 s->sps.pic_height_in_luma_samples,
201 @@ -1873,21 +1875,7 @@ static void rpivid_h265_setup(struct rpi
202 s->src_addr = 0;
203 s->src_buf = NULL;
204
205 - if (run->src->planes[0].bytesused < (sh->bit_size + 7) / 8) {
206 - v4l2_warn(&dev->v4l2_dev,
207 - "Bit size %d > bytesused %d\n",
208 - sh->bit_size, run->src->planes[0].bytesused);
209 - goto fail;
210 - }
211 - if (sh->data_bit_offset >= sh->bit_size ||
212 - sh->bit_size - sh->data_bit_offset < 8) {
213 - v4l2_warn(&dev->v4l2_dev,
214 - "Bit size %d < Bit offset %d + 8\n",
215 - sh->bit_size, sh->data_bit_offset);
216 - goto fail;
217 - }
218 -
219 - if (s->frame_end)
220 + if (frame_end)
221 s->src_addr = vb2_dma_contig_plane_dma_addr(&run->src->vb2_buf,
222 0);
223 if (!s->src_addr)
224 @@ -1898,44 +1886,65 @@ static void rpivid_h265_setup(struct rpi
225 }
226
227 // Pre calc a few things
228 - s->sh = sh;
229 s->dec = dec;
230 - s->slice_qp = 26 + s->pps.init_qp_minus26 + s->sh->slice_qp_delta;
231 - s->max_num_merge_cand = sh->slice_type == HEVC_SLICE_I ?
232 + for (i = 0; i != run->h265.slice_ents; ++i) {
233 + const struct v4l2_ctrl_hevc_slice_params *const sh = sh0 + i;
234 + const bool last_slice = frame_end && i + 1 == run->h265.slice_ents;
235 +
236 + s->sh = sh;
237 +
238 + if (run->src->planes[0].bytesused < (sh->bit_size + 7) / 8) {
239 + v4l2_warn(&dev->v4l2_dev,
240 + "Bit size %d > bytesused %d\n",
241 + sh->bit_size, run->src->planes[0].bytesused);
242 + goto fail;
243 + }
244 + if (sh->data_bit_offset >= sh->bit_size ||
245 + sh->bit_size - sh->data_bit_offset < 8) {
246 + v4l2_warn(&dev->v4l2_dev,
247 + "Bit size %d < Bit offset %d + 8\n",
248 + sh->bit_size, sh->data_bit_offset);
249 + goto fail;
250 + }
251 +
252 + s->slice_qp = 26 + s->pps.init_qp_minus26 + sh->slice_qp_delta;
253 + s->max_num_merge_cand = sh->slice_type == HEVC_SLICE_I ?
254 + 0 :
255 + (5 - sh->five_minus_max_num_merge_cand);
256 + s->dependent_slice_segment_flag =
257 + ((sh->flags &
258 + V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT) != 0);
259 +
260 + s->nb_refs[0] = (sh->slice_type == HEVC_SLICE_I) ?
261 + 0 :
262 + sh->num_ref_idx_l0_active_minus1 + 1;
263 + s->nb_refs[1] = (sh->slice_type != HEVC_SLICE_B) ?
264 0 :
265 - (5 - sh->five_minus_max_num_merge_cand);
266 - // * SH DSS flag invented by me - but clearly needed
267 - s->dependent_slice_segment_flag =
268 - ((sh->flags &
269 - V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT) != 0);
270 -
271 - s->nb_refs[0] = (sh->slice_type == HEVC_SLICE_I) ?
272 - 0 :
273 - sh->num_ref_idx_l0_active_minus1 + 1;
274 - s->nb_refs[1] = (sh->slice_type != HEVC_SLICE_B) ?
275 - 0 :
276 - sh->num_ref_idx_l1_active_minus1 + 1;
277 -
278 - if (s->sps.flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED)
279 - populate_scaling_factors(run, de, s);
280 -
281 - // Calc all the random coord info to avoid repeated conversion in/out
282 - s->start_ts = s->ctb_addr_rs_to_ts[sh->slice_segment_addr];
283 - s->start_ctb_x = sh->slice_segment_addr % de->pic_width_in_ctbs_y;
284 - s->start_ctb_y = sh->slice_segment_addr / de->pic_width_in_ctbs_y;
285 - // Last CTB of previous slice
286 - prev_rs = !s->start_ts ? 0 : s->ctb_addr_ts_to_rs[s->start_ts - 1];
287 - s->prev_ctb_x = prev_rs % de->pic_width_in_ctbs_y;
288 - s->prev_ctb_y = prev_rs / de->pic_width_in_ctbs_y;
289 + sh->num_ref_idx_l1_active_minus1 + 1;
290
291 - if ((s->pps.flags & V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED))
292 - rv = wpp_decode_slice(de, s);
293 - else
294 - rv = decode_slice(de, s);
295 - if (rv)
296 - goto fail;
297 + if (s->sps.flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED)
298 + populate_scaling_factors(run, de, s);
299 +
300 + /* Calc all the random coord info to avoid repeated conversion in/out */
301 + s->start_ts = s->ctb_addr_rs_to_ts[sh->slice_segment_addr];
302 + s->start_ctb_x = sh->slice_segment_addr % de->pic_width_in_ctbs_y;
303 + s->start_ctb_y = sh->slice_segment_addr / de->pic_width_in_ctbs_y;
304 + /* Last CTB of previous slice */
305 + prev_rs = !s->start_ts ? 0 : s->ctb_addr_ts_to_rs[s->start_ts - 1];
306 + s->prev_ctb_x = prev_rs % de->pic_width_in_ctbs_y;
307 + s->prev_ctb_y = prev_rs / de->pic_width_in_ctbs_y;
308 +
309 + if ((s->pps.flags & V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED))
310 + rv = wpp_decode_slice(de, s, last_slice);
311 + else
312 + rv = decode_slice(de, s, last_slice);
313 + if (rv)
314 + goto fail;
315 +
316 + ++s->slice_idx;
317 + }
318
319 - if (!s->frame_end) {
320 + if (!frame_end) {
321 xtrace_ok(dev, de);
322 return;
323 }
324 @@ -2054,8 +2063,8 @@ static void rpivid_h265_setup(struct rpi
325 fail:
326 if (de)
327 // Actual error reporting happens in Trigger
328 - de->state = s->frame_end ? RPIVID_DECODE_ERROR_DONE :
329 - RPIVID_DECODE_ERROR_CONTINUE;
330 + de->state = frame_end ? RPIVID_DECODE_ERROR_DONE :
331 + RPIVID_DECODE_ERROR_CONTINUE;
332 xtrace_fail(dev, de);
333 }
334